Need help decrypting my assembly program - c++

I have this encryption program in C++ and ASM which has got encryption routines but I need to know
how the decryption routine for it should look like .
This is the code :
//-ENCRYPTION ROUTINES
void encrypt_chars (int length, char EKey)
{ char temp_char;
for (int i = 0; i < length; i++)
{ temp_char = OChars [i];
__asm {
push eax
push ecx
movzx ecx,temp_char
lea eax,EKey
call encrypt
mov temp_char,al
pop ecx
pop eax
}
EChars [i] = temp_char;
}
return;
// --- Start of Assembly code
__asm {
encrypt5: push eax
mov al,byte ptr [eax]
push ecx
and eax,0x7C
ror eax,1
ror eax,1
inc eax
mov edx,eax
pop ecx
pop eax
mov byte ptr [eax],dl
xor edx,ecx
mov eax,edx
rol al,1
ret
encrypt:
mov eax,ecx
inc eax
ret
}
//--- End of Assembly code
}

The best clue ever for decryption (and as general as the question):
undo everything
I guess every instruction in that code has an conservative opposite (unless it's destroyin the data, but hey)
So if the code ends with:
inc eax
ret
You start with
[load the return in eax]
dec eax
and so on.

Related

Why in Release mode part of the variables can be watched within a debugger?

Consider the following code..
#include <vector>
std::basic_string<char> sBasicString = "basic_string";
char* buffer = new char[1000];
for (size_t i = 0 ; i < sBasicString.size() ; ++i)
{
char c;
c = sBasicString[i];
buffer[i] = c;
}
(Please ignore the memory leak - it is not relevant)
I'm compiling it in VS2012 64bit in both Release and Debug (default configuration).
When i'm running the debugger in Debug mode, i can watch the sBasicString and buffer variables as expected (query their value, etc...)
But when i'm running the debugger in Release mode, i still can watch the sBasicString but not the buffer.
Why?
As Release mode has optimization set to "Full Optimization" (default value) and "Generate Debug Info" set to "YES" - i would expect either both variables can be watched or none.
EDIT
trying to add a proper usage of the buffer variable (avoid compiler optimization) - i still get the same behavior.
EDIT 2 Adding 64bit disassemble output of Release mode compilation
int main()
{
000000013F091000 mov rax,rsp
000000013F091003 push rbx
000000013F091004 sub rsp,50h
000000013F091008 mov qword ptr [rax-38h],0FFFFFFFFFFFFFFFEh
std::basic_string<char> sBasicString = "basic_string";
000000013F091010 xor ebx,ebx
000000013F091012 mov qword ptr [rax-20h],rbx
000000013F091016 mov qword ptr [rax-18h],rbx
000000013F09101A mov qword ptr [rax-18h],0Fh
000000013F091022 mov qword ptr [rax-20h],rbx
000000013F091026 mov byte ptr [rax-30h],bl
000000013F091029 lea r8d,[rbx+0Ch]
000000013F09102D lea rdx,[__xi_z+40h (013F093238h)]
000000013F091034 lea rcx,[rax-30h]
000000013F091038 call std::basic_string<char,std::char_traits<char>,std::allocator<char> >::assign (013F0916A0h)
000000013F09103D nop
char* buffer = new char[1000];
000000013F09103E mov ecx,3E8h
000000013F091043 call operator new[] (013F091AD8h)
for (size_t i = 0 ; i < sBasicString.size() ; ++i)
000000013F091048 mov edx,ebx
000000013F09104A cmp qword ptr [rsp+38h],rbx
000000013F09104F jbe main+73h (013F091073h)
{
char c;
c = sBasicString[i];
000000013F091051 lea rcx,[sBasicString]
000000013F091056 cmp qword ptr [rsp+40h],10h
000000013F09105C cmovae rcx,qword ptr [sBasicString]
buffer[i] = c;
000000013F091062 movzx ecx,byte ptr [rcx+rdx]
buffer[i] = c;
000000013F091066 mov byte ptr [rdx+rax],cl
for (size_t i = 0 ; i < sBasicString.size() ; ++i)
000000013F091069 inc rdx
000000013F09106C cmp rdx,qword ptr [rsp+38h]
000000013F091071 jb main+51h (013F091051h)
}
std::cout << buffer << std::endl;
000000013F091073 mov rdx,rax
000000013F091076 mov rcx,qword ptr [__imp_std::cout (013F093068h)]
000000013F09107D call std::operator<<<std::char_traits<char> > (013F0910C0h)
000000013F091082 mov rcx,rax
000000013F091085 mov rdx,qword ptr [__imp_std::endl (013F093060h)]
000000013F09108C call qword ptr [__imp_std::basic_ostream<char,std::char_traits<char> >::operator<< (013F093098h)]
000000013F091092 nop
return 0;
000000013F091093 cmp qword ptr [rsp+40h],10h
000000013F091099 jb main+0A5h (013F0910A5h)
000000013F09109B mov rcx,qword ptr [sBasicString]
000000013F0910A0 call operator delete (013F091AEAh)
000000013F0910A5 mov qword ptr [rsp+40h],0Fh
000000013F0910AE mov qword ptr [rsp+38h],rbx
000000013F0910B3 mov byte ptr [sBasicString],0
return 0;
000000013F0910B8 xor eax,eax
}
000000013F0910BA add rsp,50h
000000013F0910BE pop rbx
000000013F0910BF ret
EDIT 3 Adding 32bit disassemble output
int main()
{
013B1000 push 0FFFFFFFFh
013B1002 push 13B2558h
013B1007 mov eax,dword ptr fs:[00000000h]
013B100D push eax
013B100E mov dword ptr fs:[0],esp
013B1015 sub esp,18h
013B1018 push esi
std::basic_string<char> sBasicString = "basic_string";
013B1019 push 0Ch
013B101B mov dword ptr [esp+18h],0
013B1023 mov dword ptr [esp+1Ch],0
013B102B push 13B3158h
013B1030 lea ecx,[esp+0Ch]
013B1034 mov dword ptr [esp+20h],0Fh
013B103C mov dword ptr [esp+1Ch],0
013B1044 mov byte ptr [esp+0Ch],0
013B1049 call std::basic_string<char,std::char_traits<char>,std::allocator<char> >::assign (013B17B0h)
013B104E mov dword ptr [esp+24h],0
char* buffer = new char[1000];
013B1056 push 3E8h
013B105B call operator new[] (013B1BD6h)
for (size_t i = 0 ; i < sBasicString.size() ; ++i)
013B1060 xor edx,edx
013B1062 add esp,4
013B1065 mov esi,eax
013B1067 cmp dword ptr [esp+14h],edx
013B106B jbe main+8Dh (013B108Dh)
for (size_t i = 0 ; i < sBasicString.size() ; ++i)
013B106D lea ecx,[ecx]
{
char c;
c = sBasicString[i];
013B1070 cmp dword ptr [esp+18h],10h
013B1075 lea ecx,[esp+4]
013B1079 cmovae ecx,dword ptr [esp+4]
for (size_t i = 0 ; i < sBasicString.size() ; ++i)
013B107E inc edx
buffer[i] = c;
013B107F mov al,byte ptr [ecx+edx-1]
013B1083 mov byte ptr [edx+esi-1],al
013B1087 cmp edx,dword ptr [esp+14h]
013B108B jb main+70h (013B1070h)
}
std::cout << buffer << std::endl;
013B108D push dword ptr ds:[13B3030h]
013B1093 push esi
013B1094 push dword ptr ds:[13B3034h]
013B109A call std::operator<<<std::char_traits<char> > (013B10F0h)
013B109F add esp,8
013B10A2 mov ecx,eax
013B10A4 call dword ptr ds:[13B3028h]
return 0;
013B10AA mov dword ptr [esp+24h],0FFFFFFFFh
013B10B2 cmp dword ptr [esp+18h],10h
013B10B7 pop esi
013B10B8 jb main+0C5h (013B10C5h)
013B10BA push dword ptr [esp]
013B10BD call operator delete (013B1BECh)
013B10C2 add esp,4
}
013B10C5 mov ecx,dword ptr [esp+18h]
return 0;
013B10C9 mov dword ptr [esp+14h],0Fh
013B10D1 mov dword ptr [esp+10h],0
013B10D9 mov byte ptr [esp],0
013B10DD xor eax,eax
}
013B10DF mov dword ptr fs:[0],ecx
013B10E6 add esp,24h
013B10E9 ret
Looking at x86 Assembly posted in this question, I can apply my rudimentary Assembly knowledge to understand, where the buffer variable is hidden:
char* buffer = new char[1000];
013B105B call operator new[] (013B1BD6h)
013B1065 mov esi,eax
My candidate is esi register: operator new returned the result in eax and it is moved to esi. Let's follow this register:
for (size_t i = 0 ; i < sBasicString.size() ; ++i)
013B107E inc edx
buffer[i] = c;
013B107F mov al,byte ptr [ecx+edx-1]
013B1083 mov byte ptr [edx+esi-1],al
The last line places char value al to the buffer. edx is obviously the loop counter, see ind edx. So, esi points to the buffer allocated by operator new. And finally:
013B1093 push esi
013B1094 push dword ptr ds:[13B3034h]
013B109A call std::operator<<<std::char_traits<char> > (013B10F0h)
Here esi is printed. So, the answer to your question: buffer variable is kept in the esi CPU register. You can add the line delete[] buffer; to the program and see, how operator delete is applied to esi in Assembly.
Since the whole loop doesn't contain function calls that can change CPU registers, optimized code produced by compiler just keeps the buffer in the register. Debugger doesn't know this and cannot display it.
x64 Assembly works by the same way, but it is more complicated and requires more time to understand. I hope you have an idea now, what happens.
The compiler is smart enough to see you are not doing anything with the buffer, so it simply optimized it away in the Release mode.
std::string, on the other hand comes from a library, and it is harder to detect that assigning to or reading from it does not have side effects. That's why the compiler didn't remove it.

Improper operand type error using __asm

What does improper operand type mean?
I'm trying to convert some c++ code into assembler
temp_char = OChar[i] //temp_char is a character and OChar is array and i is the index
ive tried
mov eax, i
mov temp_char, [eax+OChar]
and also
mov eax, i
movsx temp_char, [eax+OChar]
can anyone explain how i can avoid the improper operand type?
this is the full code
char temp_char;
int i;
__asm{
mov i,0
jmp checkend
startfor: mov eax,i
add eax,1
mov i,eax
checkend: cmp i,length
jge endloop
movsx temp_char, [eax+OChars]
//encryption of string//
push eax
and eax,0xAA
not al
mov edx,eax
pop eax
and eax,0x55
xor ecx,edx
xor ecx,eax
rol cl,2
sub al,0x20
pop ebp
//end of encryption//
movsx [eax+EChars], temp_char
jmp startfor
endloop: ret
}
You can't move stuff directly from memory to memory on x86 - you have to go through a register - along the lines of:
mov eax, i
mov bx, word ptr [eax+OChar]
mov temp_char, bx

Which D compilers will perform tail-call optimization on this function?

string reverse(string str) pure nothrow
{
string reverse_impl(string temp, string str) pure nothrow
{
if (str.length == 0)
{
return temp;
}
else
{
return reverse_impl(str[0] ~ temp, str[1..$]);
}
}
return reverse_impl("", str);
}
As far as I know, this code should be subject to tail-call optimization, but I can't tell if DMD is doing it or not. Which of the D compilers support tail-call optimization, and will they perform it on this function?
From looking at the disassembly, DMD performs TCO on your code:
_D4test7reverseFNaNbAyaZAya12reverse_implMFNaNbAyaAyaZAya comdat
assume CS:_D4test7reverseFNaNbAyaZAya12reverse_implMFNaNbAyaAyaZAya
L0: sub ESP,0Ch
push EBX
push ESI
cmp dword ptr 018h[ESP],0
jne L1C
LC: mov EDX,024h[ESP]
mov EAX,020h[ESP]
pop ESI
pop EBX
add ESP,0Ch
ret 010h
L1C: push dword ptr 024h[ESP]
mov EAX,1
mov EDX,offset FLAT:_D12TypeInfo_Aya6__initZ
push dword ptr 024h[ESP]
mov ECX,024h[ESP]
push ECX
push EAX
push EDX
call near ptr __d_arraycatT
mov EBX,02Ch[ESP]
mov ESI,030h[ESP]
mov 034h[ESP],EAX
dec EBX
lea ECX,1[ESI]
mov 01Ch[ESP],EBX
mov 020h[ESP],ECX
mov 02Ch[ESP],EBX
mov 030h[ESP],ECX
mov 038h[ESP],EDX
add ESP,014h
cmp dword ptr 8[ESP],0
jne L1C
jmp short LC
_D4test7reverseFNaNbAyaZAya12reverse_implMFNaNbAyaAyaZAya ends
end
A very good resource for quickly looking at the code generated by gdc is http://d.godbolt.org/. We currently don't have a dmd equivalent.

Unhandled exception at 0x93b3237d in project00.exe: 0xC0000005: Access violation

In my program when i exit the section of ASM code and returning to the C++ code i get the Unhandled exception at 0x93b3237d in project00.exe: 0xC0000005: Access violation. In Crtexe.c at line mainret = main(argc, argv, envp); and in the disassembly when the 0C is add to ESP. i think the problem might be the return address of the main function is get corrupted before returning and that causing it to fail. Program find the intersection and union of two sets. Using VS10 and am out of ideas.
include "iostream.h"//modify line to show up in code block
using namespace std;
typedef int DWORD; //4 btye double word
typedef char BYTE; //1 byte
typedef short WORD; //2 byte double word
int main(){
int i =0;
BYTE str0[50] = "1qaz2wsx3edc4rfv5tgb6yhn7ujm8ik,9ol.0p;/-[?]F!Q";
BYTE str1[50] ="QAZ#WSX#EDC$RFV%TGB^YHN&UJM*IK)P:?_{?}|1`";
DWORD length0 ;
DWORD length1 ;
BYTE IntersectArray[50];
BYTE result [100] ;
__asm{
p:
pusha
lea eax, str0
call COUNT
mov length0,ecx
lea eax, str1
call COUNT
mov length1,ecx
call INTERSECTION
call JoinSet
xor eax,eax
popa
ret
COUNT:
mov ecx,0;
Q: mov dl, [eax]
cmp dl,0h
JE cEND
inc eax
inc ecx
jmp Q
cEnd: ret
INTERSECTION:
lea edx, str0
mov ebx, length0
lea esi, IntersectArray
first: mov al, [edx]
mov ecx, length1
lea edi, str1
repne SCASB
cmp ecx,0
JNZ INTER
Back: inc edx
cmp ebx,0
JZ EXITSTUFF
dec ebx
jmp first
INTER: mov [esi] , al
inc esi
jmp Back
EXITSTUFF:
mov [esi], 0
ret
JoinSet :
lea edi, result
lea esi, str0
mov ecx, length0
REP MOVSB
lea edx, str1
mov ebx, length1
lea esi, result + [ebx]
f: mov al, [edx]
mov ecx, length0
lea edi, str0
repne SCASB
cmp ecx,0
JNZ B
mov [esi] , al
inc esi
B: inc edx
cmp ebx,0
JZ EXITSTU
dec ebx
jmp f
EXITSTU:
mov [esi], 0
ret
}
rest
for(int i =0;i < 50;i++){
cout <<IntersectArray[i];}
cout << endl;
for (int i =0; i<100;i++)
cout <<result[i];
cout << endl;
system("pause");
return 0;
}
Probably a dumb comment since I've never done any x86 assembly. But I thought asm was inline? So what is your first 'ret' actually returning from?

Call not returning properly [X86_ASM]

This is C++ using x86 inline assembly [Intel syntax]
Function:
DWORD *Call ( size_t lArgs, ... ){
DWORD *_ret = new DWORD[lArgs];
__asm {
xor edx, edx
xor esi, esi
xor edi, edi
inc edx
start:
cmp edx, lArgs
je end
push eax
push edx
push esi
mov esi, 0x04
imul esi, edx
mov ecx, esi
add ecx, _ret
push ecx
call dword ptr[ebp+esi] //Doesn't return to the next instruction, returns to the caller of the parent function.
pop ecx
mov [ecx], eax
pop eax
pop edx
pop esi
inc edx
jmp start
end:
mov eax, _ret
ret
}
}
The purpose of this function is to call multiple functions/addresses without calling them individually.
Why I'm having you debug it?
I have to start school for the day, and I need to have it done by evening.
Thanks alot, iDomo
Thank you for a complete compile-able example, it makes solving problems much easier.
According to your Call function signature, when the stack frame is set up, the lArgs is at ebp+8 , and the pointers start at ebp+C. And you have a few other issues. Here's a corrected version with some push/pop optimizations and cleanup, tested on MSVC 2010 (16.00.40219.01) :
DWORD *Call ( size_t lArgs, ... ) {
DWORD *_ret = new DWORD[lArgs];
__asm {
xor edx, edx
xor esi, esi
xor edi, edi
inc edx
push esi
start:
cmp edx, lArgs
; since you started counting at 1 instead of 0
; you need to stop *after* reaching lArgs
ja end
push edx
; you're trying to call [ebp+0xC+edx*4-4]
; a simpler way of expressing that - 4*edx + 8
; (4*edx is the same as edx << 2)
mov esi, edx
shl esi, 2
add esi, 0x8
call dword ptr[ebp+esi]
; and here you want to write the return value
; (which, btw, your printfs don't produce, so you'll get garbage)
; into _ret[edx*4-4] , which equals ret[esi - 0xC]
add esi, _ret
sub esi, 0xC
mov [esi], eax
pop edx
inc edx
jmp start
end:
pop esi
mov eax, _ret
; ret ; let the compiler clean up, because it created a stack frame and allocated space for the _ret pointer
}
}
And don't forget to delete[] the memory returned from this function after you're done.
I notice that, before calling, you push EAX, EDX, ESI, ECX (in order), but don't pop in the reverse order after returning. If the first CALL returns properly, but subsequent ones don't, that could be the issue.