MSVC++ inline assembly unhandled exception 0x80000004: Single step - c++

I am writing a code using inline asm with VC++ 2019 32bit. I have written a function to switch coroutine.This is the source code :
I tested it and it works well. The argument is a uintptr_t array that contains the register value. This function will exchagne register value except ebx.
The problem is the "Unhandled exception at 0x5514704E (pevm.dll) in tool.exe: 0x80000004: Single step.".
Register value : EAX = 00000246 EBX = 0019F5A0 ECX = E2F13240 EDX = 0019F5A0 ESI = 0019F3A8 EDI = 0019F3C8 EIP = 5514704E ESP = 0019F2BC EBP = 0019F2C0 EFL = 00000202
I can not understand why "pop eax" throw exception ?
Maybe my code destroy some "internal data structure" and the program happened to stop here, like double free. Any suggestions to how to debug ?
inline __declspec(naked) void switchCoroutine(uintptr_t* vreg)
{
//discard ebx
__asm
{
push ebp
mov ebp, esp
//save
push eax
//argument
mov ebx, [ebp + 8]
//exchange eflags
pushfd
pop eax
push[ebx]
popfd
mov[ebx], eax
pop eax
//exchange eax ,ecx,edx,esi,edi
XCHG eax, [ebx + type int]
xchg ecx, [ebx + 3 * type int]
xchg edx, [ebx + 4 * type int]
xchg esi, [ebx + 5 * type int]
xchg edi, [ebx + 6 * type int]
//exchange ebp,esp
mov esp, ebp
pop ebp
xchg ebp, [ebx + 7 * type int]
xchg esp, [ebx + 8 * type int]
//go eip
ret
}
}
55147031 C2 04 00 ret 4
--- No source file -------------------------------------------------------------
55147034 CC int 3
55147035 CC int 3
55147036 CC int 3
55147037 CC int 3
55147038 CC int 3
55147039 CC int 3
5514703A CC int 3
5514703B CC int 3
5514703C CC int 3
5514703D CC int 3
5514703E CC int 3
5514703F CC int 3
--- D:\code\c++\PEVM\core\vm\vdata.h -------------------------------------------
643: //discard ebx
644: __asm
645: {
646: push ebp
55147040 55 push ebp
647: mov ebp, esp
55147041 8B EC mov ebp,esp
648: //save
649: push eax
55147043 50 push eax
650: //argument
651: mov ebx, [ebp + 8]
55147044 8B 5D 08 mov ebx,dword ptr [vreg]
652:
653: //exchange eflags
654: pushfd
55147047 9C pushfd
655: pop eax
55147048 58 pop eax
656: push[ebx]
55147049 FF 33 push dword ptr [ebx]
657: popfd
5514704B 9D popfd
658: mov[ebx], eax
5514704C 89 03 mov dword ptr [ebx],eax
659:
660: pop eax
5514704E 58 pop eax //HERE **Unhandled exception at 0x5514704E (pevm.dll) in tool.exe: 0x80000004: Single step.**
661: //exchange eax ,ecx,edx,esi,edi
662: XCHG eax, [ebx + type int]
5514704F 87 43 04 xchg eax,dword ptr [ebx+4]
663: xchg ecx, [ebx + 3 * type int]
55147052 87 4B 0C xchg ecx,dword ptr [ebx+0Ch]
664: xchg edx, [ebx + 4 * type int]
55147055 87 53 10 xchg edx,dword ptr [ebx+10h]
665: xchg esi, [ebx + 5 * type int]
55147058 87 73 14 xchg esi,dword ptr [ebx+14h]
666: xchg edi, [ebx + 6 * type int]
5514705B 87 7B 18 xchg edi,dword ptr [ebx+18h]
667:
668: //exchange ebp,esp
669: mov esp, ebp
5514705E 8B E5 mov esp,ebp
670: pop ebp
55147060 5D pop ebp
671: xchg ebp, [ebx + 7 * type int]
55147061 87 6B 1C xchg ebp,dword ptr [ebx+1Ch]
672: xchg esp, [ebx + 8 * type int]
55147064 87 63 20 xchg esp,dword ptr [ebx+20h]
673:
674: //go eip
675: ret
55147067 C3 ret
--- No source file -------------------------------------------------------------
55147068 CC int 3
55147069 CC int 3
5514706A CC int 3
5514706B CC int 3
5514706C CC int 3
5514706D CC int 3
5514706E CC int 3
5514706F CC int 3

At 0x5514704B you set EFLAGS. When it has TF flag set, a debug exception (#DB) will be generated by the CPU after next executed instruction. Next after popfd is mov[ebx], eax, thus the exception is generated after it's execution. Since #DB is a trap, eip points to address after the executed instruction, pop eax in your case.
Check if push[ebx] at 0x55147048 has TF bit set.

Related

Assembly of constructor for virtual inheritance

Here's a simple inheritance usinig a virtual base class (code available on Compiler Explorer).
class B {
public:
int i = 1;
};
class D : virtual public B {
public:
int j = 2;
};
void Assign(B *b) {
b->i = 2;
}
int main() {
B *b = new D();
Assign(b);
return 0;
}
The assembly listing of the main() function looks like this:
09 main: # #main
10 push rbp
11 mov rbp, rsp
12 sub rsp, 32
13 mov eax, 16
14 mov edi, eax
15 mov dword ptr [rbp - 4], 0
16 call operator new(unsigned long)
17 xor esi, esi
18 mov ecx, 16
19 mov edx, ecx
20 mov rdi, rax
21 mov qword ptr [rbp - 24], rax # 8-byte Spill
22 call memset
23 mov rdi, qword ptr [rbp - 24] # 8-byte Reload
24 call D::D() [complete object constructor]
25 xor ecx, ecx
26 mov eax, ecx
27 mov rdx, qword ptr [rbp - 24] # 8-byte Reload
28 cmp rdx, 0
29 mov qword ptr [rbp - 32], rax # 8-byte Spill
30 je .LBB1_2
31 mov rax, qword ptr [rbp - 24] # 8-byte Reload
32 mov rcx, qword ptr [rax]
33 mov rcx, qword ptr [rcx - 24]
34 add rax, rcx
35 mov qword ptr [rbp - 32], rax # 8-byte Spill
36 .LBB1_2:
37 mov rax, qword ptr [rbp - 32] # 8-byte Reload
38 mov qword ptr [rbp - 16], rax
39 mov rdi, qword ptr [rbp - 16]
40 call Assign(B*)
41 xor eax, eax
42 add rsp, 32
43 pop rbp
44 ret
What is the effect of line 27-38 of the assembly?
What is the value of rax in line 29?
Why is there a branch statement?
The effect of lines 27-38 is to convert a D * to a B *. Because B is a virtual base class, it can have a variable offset from the start of D. Those 12 lines calculate where the B object is, in an unoptimized way.
The value of eax on line 29 is 0 (see lines 25-26).
The branch statement on line 30 is the result of a NULL pointer check. If the pointer to D is NULL, the conversion to a B * will also be NULL and the extra code to determine the correct offset is not wanted in that case.

ASM not working after I packed my EXE

I attached my asm into my source code of dll and hooked it to my exe, and it works like a charm but when I packed my exe using exe packer. the dll with asm not working on exe packed. any idea how to solve this problem?
#include "StdAfx.h"
void __declspec(naked) MyStuff() {
__asm {
PUSH EBP
MOV EBP, ESP
MOV EAX, DWORD PTR SS : [EBP + 0x8]
MOV EAX, DWORD PTR DS : [EAX]
XOR EAX, ENCPACKET
MOV DWORD PTR SS : [EBP + 0x8], EAX
MOV AX, WORD PTR SS : [EBP + 0xA]
POP EBP
RETN 0x4
}
}
void SetStuff(){
SetJmp((LPVOID)0x00424B1C, MyStuff);
}
The result was:
005DB4E0 /> \55 PUSH EBP
005DB4E1 |. 8BEC MOV EBP,ESP
005DB4E3 |. 56 PUSH ESI
005DB4E4 |. FF75 0C PUSH DWORD PTR SS:[EBP+C]
005DB4E7 |. B9 403E0801 MOV ECX,01083E40
005DB4EC |. E8 38CCE3FF CALL 00418129
005DB4F1 |. 8BF0 MOV ESI,EAX
005DB4F3 |. 85F6 TEST ESI,ESI
005DB4F5 |. 74 1E JE SHORT 005DB515
005DB4F7 |. FF75 08 PUSH DWORD PTR SS:[EBP+8]
005DB4FA |. 8BCE MOV ECX,ESI
005DB4FC |. E8 D799E4FF CALL 00424ED8
005DB501 |. 8B4D 10 MOV ECX,DWORD PTR SS:[EBP+10]
005DB504 |. FF75 08 PUSH DWORD PTR SS:[EBP+8]
005DB507 |. 8901 MOV DWORD PTR DS:[ECX],EAX
005DB509 |. 8BCE MOV ECX,ESI
005DB50B |. E8 EEA5E4FF CALL 00425AFE
005DB510 |. 8B4D 14 MOV ECX,DWORD PTR SS:[EBP+14]
005DB513 |. 8901 MOV DWORD PTR DS:[ECX],EAX
005DB515 |> 5E POP ESI
005DB516 |. 5D POP EBP
005DB517 \. C2 1000 RETN 10
and may I ask is it possible to run an asm into specific offset like this?
#include "StdAfx.h"
void __declspec(naked) MyStuff() {
__asm {
005DB4E0-> PUSH EBP
005DB4E1-> MOV EBP, ESP
005DB4E3-> MOV EAX, DWORD PTR SS : [EBP + 0x8]
005DB4E4-> MOV EAX, DWORD PTR DS : [EAX]
005DB4E7-> XOR EAX, ENCPACKET
005DB4EC-> MOV DWORD PTR SS : [EBP + 0x8], EAX
005DB4E1-> MOV AX, WORD PTR SS : [EBP + 0xA]
005DB4F3-> POP EBP
005DB4F4-> RETN 0x4
}
}

GCC generated assembly

Why printf function causes the change of prologue?
C code_1:
#include <cstdio>
int main(){
int a = 11;
printf("%d", a);
}
GCC -m32 generated one:
.LC0:
.string "%d"
main:
lea ecx, [esp+4] // What's purpose of this three
and esp, -16 // lines?
push DWORD PTR [ecx-4] //
push ebp
mov ebp, esp
push ecx
sub esp, 20 // why sub 20?
mov DWORD PTR [ebp-12], 11
sub esp, 8
push DWORD PTR [ebp-12]
push OFFSET FLAT:.LC0
call printf
add esp, 16
mov eax, 0
mov ecx, DWORD PTR [ebp-4]
leave
lea esp, [ecx-4]
ret
C code_2:
#include <cstdio>
int main(){
int a = 11;
}
GCC -m32:
main:
push ebp
mov ebp, esp
sub esp, 16
mov DWORD PTR [ebp-4], 11
mov eax, 0
leave
ret
What is the purpose of first three lines added in first code?
Please, explain first assembly code, if you can.
EDIT:
64-bit mode:
.LC0:
.string "%d"
main:
push rbp
mov rbp, rsp
sub rsp, 16
mov DWORD PTR [rbp-4], 11
mov eax, DWORD PTR [rbp-4]
mov esi, eax
mov edi, OFFSET FLAT:.LC0
mov eax, 0
call printf
mov eax, 0
leave
ret
The insight is that the compiler keep the stack aligned at function calls.
The alignment is 16 byte.
lea ecx, [esp+4] ;Save original ESP to ECX (ESP+4 actually)
and esp, -16 ;Align stack on 16 bytes (Lower esp)
push DWORD PTR [ecx-4] ;Push main return address (Stack at 16B + 4)
;My guess is to aid debugging tools that expect the RA
;to be at [ebp+04h]
push ebp
mov ebp, esp ;Prolog (Stack at 16B+8)
push ecx ;Save ECX (Original stack pointer) (Stack at 16B+12)
sub esp, 20 ;Reserve 20 bytes (Stack at 16B+0, ALIGNED AGAIN)
;4 for alignment + 1x16 for a variable (variable space is
;allocated in multiple of 16)
mov DWORD PTR [ebp-12], 11 ;a = 11
sub esp, 8 ;Stack at 16B+8 for later alignment
push DWORD PTR [ebp-12] ;a
push OFFSET FLAT:.LC0 ;"%d" (Stack at 16B)
call printf
add esp, 16 ;Remove args+pad from the stack (Stack at 16B)
mov eax, 0 ;Return 0
mov ecx, DWORD PTR [ebp-4] ;Restore ECX without the need to add to esp
leave ;Restore EBP
lea esp, [ecx-4] ;Restore original ESP
ret
I don't know why the compiler saves esp+4 in ecx instead of esp (esp+4 is the address of the first parameter of main).

Does empty "else" cost any time

Is there any difference in computational cost of
if(something){
return something;
}else{
return somethingElse;
}
and
if(something){
return something;
}
//else (put in comments for readibility purposes)
return somethingElse;
In theory we have command (else) but it doesn't seem it should make an actuall difference.
Edit:
After running code for different set sizes, I found that there actually is a differrence, code without else appears to be about 1.5% more efficient. But it most likely depends on compiler, as stated by many people below. Code I tested it on:
int withoutElse(bool a){
if(a)
return 0;
return 1;
}
int withElse(bool a){
if(a)
return 0;
else
return 1;
}
int main(){
using namespace std;
bool a=true;
clock_t begin,end;
begin= clock();
for(__int64 i=0;i<1000000000;i++){
a=!a;
withElse(a);
}
end = clock();
cout<<end-begin<<endl;
begin= clock();
for(__int64 i=0;i<1000000000;i++){
a=!a;
withoutElse(a);
}
end = clock();
cout<<end-begin<<endl;
return 0;
}
Checked on loops from 1 000 000 to 1 000 000 000, and results were consistently different
Edit 2:
Assembly code (once again, generated using Visual Studio 2010) also shows small difference (appareantly, I'm no good with assemblers :()
?withElse##YAH_N#Z PROC ; withElse, COMDAT
; Line 12
push ebp
mov ebp, esp
sub esp, 192 ; 000000c0H
push ebx
push esi
push edi
lea edi, DWORD PTR [ebp-192]
mov ecx, 48 ; 00000030H
mov eax, -858993460 ; ccccccccH
rep stosd
; Line 13
movzx eax, BYTE PTR _a$[ebp]
test eax, eax
je SHORT $LN2#withElse
; Line 14
xor eax, eax
jmp SHORT $LN3#withElse
; Line 15
jmp SHORT $LN3#withElse
$LN2#withElse:
; Line 16
mov eax, 1
$LN3#withElse:
; Line 17
pop edi
pop esi
pop ebx
mov esp, ebp
pop ebp
ret 0
?withElse##YAH_N#Z ENDP ; withElse
and
?withoutElse##YAH_N#Z PROC ; withoutElse, COMDAT
; Line 4
push ebp
mov ebp, esp
sub esp, 192 ; 000000c0H
push ebx
push esi
push edi
lea edi, DWORD PTR [ebp-192]
mov ecx, 48 ; 00000030H
mov eax, -858993460 ; ccccccccH
rep stosd
; Line 5
movzx eax, BYTE PTR _a$[ebp]
test eax, eax
je SHORT $LN1#withoutEls
; Line 6
xor eax, eax
jmp SHORT $LN2#withoutEls
$LN1#withoutEls:
; Line 7
mov eax, 1
$LN2#withoutEls:
; Line 9
pop edi
pop esi
pop ebx
mov esp, ebp
pop ebp
ret 0
?withoutElse##YAH_N#Z ENDP ; withoutElse
It is generically different, but the compiler may decide to execute the same jump in both cases (it will practically do this always).
The best way to see what a compiler does is reading the assembler. Assuming that you are using gcc you can try with
gcc -g -c -fverbose-asm myfile.c; objdump -d -M intel -S myfile.o > myfile.s
which creates a mix of assembler/c code and makes the job easier at the beginning.
As for your example it is:
CASE1
if(something){
23: 83 7d fc 00 cmp DWORD PTR [ebp-0x4],0x0
27: 74 05 je 2e <main+0x19>
return something;
29: 8b 45 fc mov eax,DWORD PTR [ebp-0x4]
2c: eb 05 jmp 33 <main+0x1e>
}else{
return 0;
2e: b8 00 00 00 00 mov eax,0x0
}
CASE2
if(something){
23: 83 7d fc 00 cmp DWORD PTR [ebp-0x4],0x0
27: 74 05 je 2e <main+0x19>
return something;
29: 8b 45 fc mov eax,DWORD PTR [ebp-0x4]
2c: eb 05 jmp 33 <main+0x1e>
return 0;
2e: b8 00 00 00 00 mov eax,0x0
As you could imagine there are no differences!
It won't compile if you type `return
Think that once the code gets compiled, all ifs, elses and loops are changed to goto's
If (cond) { code A } code B
turns to
if cond is false jump to code b
code A
code B
and
If (cond) { code A } else { code B } code C
turns to
if cond is false jump to code B
code A
ALWAYS jump to code C
code B
code C
Most processors 'guess' whether they're going to jump or not before checking if they actually jump. Depending on the processor, it might affect the performance to fail a guess.
So the answer is YES! (unless there's an ALWAYS jump at the end of first comparison) It will take 2-3 cycles to do the ALWAYS jump which isn't in the first if.

_asm swap and compiler additions

I've just written a bubble_sort of an integer array (see previous question) and decided to ignore the standard swap and implement an assembly swap, which looks like this:
int swap(int* x, int* y)
{
if(x != y)
{
_asm
{
mov eax,[x];
mov ebx, [y];
mov [y],eax;
mov [x], ebx;
}
}
return 0;
}
I was actually sure that it will be inserted into the resulting code as is and will work.
Well, my code which uses this swap does work, but I've looked into what the complier turned it into, and my swap was changed into this:
if(x != y)
00E01A6F inc ebp
00E01A70 or byte ptr [ebx],bh
00E01A72 inc ebp
00E01A73 or al,74h
if(x != y)
00E01A75 or al,8Bh
{
_asm
{
mov eax,[x];
00E01A77 inc ebp
00E01A78 or byte ptr [ebx+45890C5Dh],cl
mov [y],eax;
00E01A7E or al,89h
mov [x], ebx;
00E01A80 pop ebp
00E01A81 or byte ptr [ebx],dh
}
}
return 0;
00E01A83 rcr byte ptr [edi+5Eh],5Bh
}
I've compiled it in MS VS 2012.
What do all those extra lines mean, and why are they there? Why can't my _asm fragment just be used?
Can you tell us how you've compiled that function and how you got the disassembly?
When I compile using
cl /FAsc -c test.c
I get the following in the assembly listing for the inline assembler part:
; 4 : {
; 5 : _asm
0000a 53 push ebx
; 6 : {
; 7 : mov eax,[x];
0000b 8b 44 24 08 mov eax, DWORD PTR _x$[esp]
; 8 : mov ebx, [y];
0000f 8b 5c 24 0c mov ebx, DWORD PTR _y$[esp]
; 9 : mov [y],eax;
00013 89 44 24 0c mov DWORD PTR _y$[esp], eax
; 10 : mov [x], ebx;
00017 89 5c 24 08 mov DWORD PTR _x$[esp], ebx
; 4 : {
; 5 : _asm
0001b 5b pop ebx
$LN4#swap:
; 11 : }
One thing to note is that you aren't swapping what you'd really like to swap - your swapping the pointers that are passed to the function, not the items the pointers refer to. So when the function returns, the swapped data is thrown away. The function is just one big nop.
You might want to try somethign like:
_asm
{
mov eax,[x];
mov ebx,[y];
mov ecx, [eax]
mov edx, [ebx]
mov [eax], edx
mov [ebx], ecx
}
But frankly, performing the swap in C would likely result in similar (or better) code.
It's missing the first and last bytes. If you look at what the code is now:
inc ebp ; 45
or byte ptr [ebx],bh ; 08 3B
inc ebp ; 45
or al,74h ; 0C 74
or al,8Bh ; 0C 8B
inc ebp ; 45
or byte ptr [ebx+45890C5Dh],cl ; 08 8B 5D 0C 89 45
or al,89h ; 0C 89
pop ebp ; 5B
or byte ptr [ebx],dh ; 08 33
rcr byte ptr [edi+5Eh],5Bh ; C0 5F 5E 5B
If you ignore the first two bytes, you get this:
cmp eax, [ebp + 12] ; 3B 45 0C
jz skip ; 74 0C
mov eax, [ebx + 8] ; 8B 45 08
mov ebx, [ebp + 12] ; 8B 5D 0C
mov [ebp + 12], eax ; 89 45 0C
mov [ebx + 8], ebx ; 89 5B 08
skip:
xor eax, eax ; 33 C0
pop edi ; 5F
pop esi ; 5E
pop ebp ; 5B
It's missing the ret at the end, and, crucially, some instruction that has eax and [ebp + 8] as arguments (a mov would make sense there). The missing first byte desynchronized the disassembly with the instruction stream.
It's also missing the prologue, of course.
You need to push at the beginning and pop in the end if you want to see the end of main() :)
_asm
{
push eax //back-up of registers
push ebx
mov eax,[x];
mov ebx, [y];
mov [y],eax;
mov [x], ebx;
pop ebx //resume the registers where they were
pop eax // so, compiler can continue working normally
}
Because compiler uses them for other things too!
You could also have used xchg
mov eax,[x]
xchg eax, [y]
mov [x],eax
You 64 bit? Then there is a single read, single swap, single write. You can search it.
Good day!