Related
My goal is to find what two-byte opcodes generate an illegal instruction exception.
For example, opcodes 0F 0B UD2 raises an invalid opcode exception. The UD2 instruction is provided for software testing to explicitly generate an invalid opcode.
Warning Snake oil code ahead as I'm not familiar with Windows internals.
The code below allocates a 4K page with read/write/execute permissions and using UD2 as a starting point it tries to determine all the possible two-byte opcodes.
First, it copies the two-byte opcodes to the last two bytes of the 4K page
then executes them and checks for the exception code.
I figured that executing the last two page bytes would either
Generate an illegal exception EXCEPTION_ILLEGAL_INSTRUCTION with exactly two bytes.
Generate an access violation EXCEPTION_ACCESS_VIOLATION when extending beyond the 4K page.
Running the code below shows interesting instructions plus many unknowns too:
Illegal opcodes 0x0f 0x0b (error 0xc000001d)
ud2 - Generates an invalid opcode.
Illegal opcodes 0x0f 0x37 (error 0xc000001d)
getsec - Exit authenticated code execution mode.
Illegal opcodes 0x0f 0xaa (error 0xc000001d)
rsm - Resume operation of interrupted program.
Question
The hack'ish code runs fine in this opcode range
Executing opcodes 0x0f 0x0b ... Executing opcodes 0x0f 0xcb
until it encounters these two opcodes
0x0f 0xcc bswap esp
It seems anything that manipulates the stack pointer causes issues whereby it's stuck at this point (clicking Continue just repeats the message)
I've tried moving the opcode execution into its own thread since they have their own stack, but that didn't help!
Is there a way to preserve the stack pointers RSP and RBP or maybe there's a simple fix to resolve it?
(Built using M$ Visual C++ 2019)
#include <windows.h>
#include <stdio.h>
#include <string.h>
#include <string.h>
#include <intrin.h>
// The UD2 (0x0F, 0x0B) instruction is guaranteed to generate an invalid opcode exception.
DWORD InstructionResult;
void ExecuteOpcodes(LPVOID mem)
{
__try
{
// Execute opcodes...
((void(*)())((unsigned char*)mem + 0xFFE))();
}
__except (EXCEPTION_EXECUTE_HANDLER)
{
InstructionResult = GetExceptionCode();
}
}
int main()
{
LPVOID mem = VirtualAlloc(NULL, 2, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE);
DWORD oldProtect = VirtualProtect(mem, 2, PAGE_EXECUTE_READWRITE, &oldProtect);
// Start searching at the UD2 (0x0F, 0x0B) instruction which is guaranteed to generate an invalid opcode exception.
for (int i = 15; i <= 255; i++)
{
for (int j = 11; j <= 255; j++)
{
// Write two byte opcodes at the 4K page end.
*((unsigned char*)mem + 0xFFE) = i;
*((unsigned char*)mem + 0xFFF) = j;
printf("Executing opcodes 0x%02x 0x%02x\n",i,j);
HANDLE hThread = CreateThread(0, 0, (LPTHREAD_START_ROUTINE)ExecuteOpcodes, mem, 0, 0);
WaitForSingleObject(hThread, INFINITE);
CloseHandle(hThread);
if (InstructionResult == EXCEPTION_ILLEGAL_INSTRUCTION)
{
printf("Illegal opcodes 0x%02x 0x%02x (error 0x%08x)\n", i, j, InstructionResult);
}
}
}
VirtualFree(mem, 0, MEM_RELEASE);
return 0;
}
UPDATE
Based upon the good answer about creating a child process, I've pasted the updated code here: pastebin.com/j3NkL44q
#define _CRT_SECURE_NO_WARNINGS
#include <windows.h>
#include <stdio.h>
// Array of illegal single-byte opcodes
int IllegalSingleByteOpcodes[] = { 0x06 ,0x07, 0x0e, 0x16, 0x17, 0x1e, 0x1f, 0x27, 0x2f, 0x37, 0x3f, 0x60, 0x61, 0xce, 0xd6 };
// Check if a given opcode is illegal
bool is_illegal_opcode(int opcode) {
for (size_t i = 0; i < sizeof(IllegalSingleByteOpcodes) / sizeof(IllegalSingleByteOpcodes[0]); i++) {
if (IllegalSingleByteOpcodes[i] == opcode) {
return true;
}
}
return false;
}
// Create and wait for a process with the given opcodes
void create_and_wait_for_process(int opcode1, int opcode2) {
// Set up startup and process info
STARTUPINFO si;
PROCESS_INFORMATION pi;
ZeroMemory(&si, sizeof(si));
si.cb = sizeof(si);
ZeroMemory(&pi, sizeof(pi));
// Create command line string
char cmdline[256];
snprintf(cmdline, sizeof(cmdline), "IllegalOpcodes.exe %d %d", opcode1, opcode2);
// Create process
if (!CreateProcess(NULL, cmdline, NULL, NULL, FALSE, 0, NULL, NULL, &si, &pi)) {
printf("CreateProcess failed (%d).\n", GetLastError());
exit(1);
}
// Wait for process to finish
if (WaitForSingleObject(pi.hProcess, 1000) != WAIT_OBJECT_0) {
TerminateProcess(pi.hProcess, 0);
}
// Clean up handles
CloseHandle(pi.hProcess);
CloseHandle(pi.hThread);
}
int main() {
// Iterate through all possible opcode pairs
for (int opcode1 = 0; opcode1 <= 255; opcode1++) {
// Skip illegal opcodes
if (is_illegal_opcode(opcode1)) {
printf("\nSkipping Illegal Opcode 0x%02x ...\n", opcode1);
continue;
}
printf("\nChecking Opcode 0x%02x ...\n", opcode1);
for (int opcode2 = 0; opcode2 <= 255; opcode2++) {
create_and_wait_for_process(opcode1, opcode2);
}
}
return 0;
}
------------------- IllegalOpcodes.cpp -------------------
#include <windows.h>
#include <stdio.h>
// Offset to write opcodes at the 4K page end
#define CODE_PAGE_END_OFFSET 0xFFE
// Write two byte opcodes at the 4K page end and execute them
void write_and_execute_opcodes(LPVOID code_page_mem, int opcode1, int opcode2)
{
*((unsigned char*)code_page_mem + CODE_PAGE_END_OFFSET) = opcode1;
*((unsigned char*)code_page_mem + CODE_PAGE_END_OFFSET + 1) = opcode2;
__try
{
// Execute opcodes...
((void(*)())((unsigned char*)code_page_mem + CODE_PAGE_END_OFFSET))();
}
__except (EXCEPTION_EXECUTE_HANDLER)
{
switch (GetExceptionCode()) {
case EXCEPTION_ILLEGAL_INSTRUCTION:
printf("{0x%02x,0x%02x},", opcode1, opcode2);
break;
default:
// Ignore other exceptions
break;
}
}
}
int main(int argc, char* const argv[])
{
int opcode1 = atoi(argv[1]);
int opcode2 = atoi(argv[2]);
LPVOID code_page_mem = VirtualAlloc(NULL, 2, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE);
DWORD old_protect = VirtualProtect(code_page_mem, 2, PAGE_EXECUTE_READWRITE, &old_protect);
write_and_execute_opcodes(code_page_mem, opcode1, opcode2);
VirtualFree(code_page_mem, 0, MEM_RELEASE);
return 0;
}
It now includes checks to avoid these single byte illegal opcodes
32-bit Legal
06 push es
07 pop es
0e push cs
16 push ss
17 pop ss
1e push ds
1f pop ds
27 daa
2f das
37 aaa
3f aas
60 pushad
61 popad
ce into
d6 ??? <--- http://ref.x86asm.net/coder64.html#xD6
64-bit Illegal
06 ???
07 ???
0e ???
16 ???
17 ???
1e ???
1f ???
27 ???
2f ???
37 ???
3f ???
60 ???
61 ???
ce ???
d6 ???
maybe there's a simple fix to resolve it?
The UNIX-standard way to resolve this is to do all the test execution in a child process.
When I last worked on Windows 15 years ago, creating a child process was very expensive (slow). But since you have fewer that 64K byte combinations to try, even a slow mechanism will get you all the answers in at most a few hours.
The target program is an x86 program, I tried to use the following code to call MessageBoxA, the program did not report an error, but MessageBoxA did not execute either
const char* title = "hello";
const char* content = "world";
size_t titleLen = strlen(title) + 1;
size_t contentLen = strlen(content) + 1;
size_t size = titleLen + contentLen + sizeof(DWORD) * 4;
BYTE* newmem = (BYTE*)VirtualAllocEx(gc.hProcess, 0, size, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE);
printf("newmem: %x\n", newmem);
DWORD titleAddr = (DWORD)newmem;
DWORD contentAddr = titleAddr + titleLen;
DWORD paramsAddr = contentAddr + contentLen;
WriteProcessMemory(gc.hProcess, (LPVOID)titleAddr, (LPCVOID)title, titleLen, 0);
WriteProcessMemory(gc.hProcess, (LPVOID)contentAddr, (LPCVOID)content, contentLen, 0);
DWORD p1 = 0;
DWORD p4 = 0;
printf("paramsAddr %x\n", paramsAddr);
WriteProcessMemory(gc.hProcess, (LPVOID)paramsAddr, (LPCVOID)&p1, sizeof(DWORD), 0);
WriteProcessMemory(gc.hProcess, (LPVOID)(paramsAddr + 4), (LPCVOID)&titleAddr, sizeof(DWORD), 0);
WriteProcessMemory(gc.hProcess, (LPVOID)(paramsAddr + 8), (LPCVOID)&contentAddr, sizeof(DWORD), 0);
WriteProcessMemory(gc.hProcess, (LPVOID)(paramsAddr + 12), (LPCVOID)&p4, sizeof(DWORD), 0);
HANDLE hThread = CreateRemoteThread(gc.hProcess, 0, 0,
(LPTHREAD_START_ROUTINE)MessageBoxA, (LPVOID)paramsAddr,
0, 0);
printf("hThread %d\n", hThread);
WaitForSingleObject(hThread, INFINITE);
VirtualFreeEx(gc.hProcess, newmem, 0, MEM_RELEASE);
CloseHandle(hThread);
This is some information at runtime:
newmem: 5f0000
paramsAddr 5f000c
5f0000: 68 65 6C 6C 6F 00 77 6F 72 6C 64 00
00 00 00 00
00 00 5F 00
06 00 5F 00
00 00 00 00
I try to call ExitProcess function can be executed normally
HANDLE hThread = CreateRemoteThread(gc.hProcess, 0, 0,
(LPTHREAD_START_ROUTINE)ExitProcess, 0,
0, 0);
Is the call to MessageBoxA failing because of passing wrong parameters? I need some help, thanks
This is a hard-coded version that can execute MessageBoxA
const char* title = "hello";
const char* content = "world";
size_t titleLen = strlen(title) + 1;
size_t contentLen = strlen(content) + 1;
BYTE* newmem = (BYTE*)VirtualAllocEx(gc.hProcess, 0, 1024, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE);
printf("newmem: %x\n", newmem);
DWORD titleAddr = (DWORD)newmem;
DWORD contentAddr = titleAddr + titleLen;
DWORD funAddr = contentAddr + contentLen;
WriteProcessMemory(gc.hProcess, (LPVOID)titleAddr, (LPCVOID)title, titleLen, 0);
WriteProcessMemory(gc.hProcess, (LPVOID)contentAddr, (LPCVOID)content, contentLen, 0);
/*
3 00000000 6A00 push 0
4 00000002 6878563412 push 0x12345678
5 00000007 6878563412 push 0x12345678
6 0000000C 6A00 push 0
7 0000000E E800000000 call MessageBoxA
8 00000013 C3 ret
*/
BYTE funcode[] = {
0x6A, 0x00,
0x68, 0x00,0x00,0x00,0x00,
0x68, 0x00,0x00,0x00,0x00,
0x6A, 0x00,
0xE8, 0x00,0x00,0x00,0x00,
0xC3
};
DWORD pMessageBoxA = 0x76E013D0;
DWORD MessageBoxA = pMessageBoxA - (funAddr + 0xE) - 5;
memcpy_s(funcode + 0x3, 4, &titleAddr, 4);
memcpy_s(funcode + 0x8, 4, &contentAddr, 4);
memcpy_s(funcode + 0xF, 4, &MessageBoxA, 4);
WriteProcessMemory(gc.hProcess, (LPVOID)funAddr, funcode, sizeof(funcode), 0);
HANDLE hThread = CreateRemoteThread(gc.hProcess, 0, 0,
(LPTHREAD_START_ROUTINE)funAddr, 0, 0, 0);
WaitForSingleObject(hThread, INFINITE);
VirtualFreeEx(gc.hProcess, newmem, 0, MEM_RELEASE);
CloseHandle(hThread);
This is a common code on x86 and x64. If your target program is x86/x64, then your program also needs to be compiled to x86/x64. I tested several programs to work normally.
uintptr_t pMessageBoxA = GetProcAddressEx(gc.hProcess, "user32.dll", "MessageBoxA");
if (!pMessageBoxA)
{
printf("target not find MessageBoxA\n");
return 0;
}
printf("pMessageBoxA: %lp\n", pMessageBoxA);
const char* title = "hello";
const char* content = "world";
size_t titleLen = strlen(title) + 1;
size_t contentLen = strlen(content) + 1;
BYTE* newmem = (BYTE*)VirtualAllocEx(gc.hProcess, 0, 1024, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE);
printf("newmem: %x\n", newmem);
uintptr_t titleAddr = (uintptr_t)newmem;
uintptr_t contentAddr = titleAddr + titleLen;
uintptr_t funAddr = contentAddr + contentLen + 8;
printf("funAddr: %x\n", funAddr);
WriteProcessMemory(gc.hProcess, (LPVOID)titleAddr, (LPCVOID)title, titleLen, 0);
WriteProcessMemory(gc.hProcess, (LPVOID)contentAddr, (LPCVOID)content, contentLen, 0);
#ifdef _WIN64
/*
0000- 55 - push rbp
0001- 48 8B EC - mov rbp,rsp
0004- 48 83 EC 20 - sub rsp,20
0008- 48 B9 0000000000000000 - mov rcx,0000000000000000
0012- 48 BA 0000000000000000 - mov rdx,0000000000000000
001C- 49 B8 0000000000000000 - mov r8,0000000000000000
0026- 49 B9 0000000000000000 - mov r9,0000000000000000
0030- 48 B8 E02C643FFD7F0000 - mov rax,user32.MessageBoxA
003A- FF D0 - call rax
003C- 48 83 C4 20 - add rsp,20
0040- 48 8B E5 - mov rsp,rbp
0043- 5D - pop rbp
0044- C3 - ret
*/
BYTE funcode[] = {
0x55,
0x48, 0x8B, 0xEC,
0x48, 0x83, 0xEC, 0x20,
0x48, 0xB9, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x48, 0xBA, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x49, 0xB8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x49, 0xB9, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x48, 0xB8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xFF, 0xD0,
0x48, 0x83, 0xC4, 0x20,
0x48, 0x8B, 0xE5,
0x5D,
0xC3,
};
memcpy_s(funcode + 0x14, sizeof(uintptr_t), &contentAddr, sizeof(uintptr_t)); // rdx
memcpy_s(funcode + 0x1E, sizeof(uintptr_t), &titleAddr, sizeof(uintptr_t)); // r8
memcpy_s(funcode + 0x32, sizeof(uintptr_t), &pMessageBoxA, sizeof(uintptr_t)); // rax
WriteProcessMemory(gc.hProcess, (LPVOID)funAddr, funcode, sizeof(funcode), 0);
#else
/*
3 00000000 6A00 push 0
4 00000002 6878563412 push 0x12345678
5 00000007 6878563412 push 0x12345678
6 0000000C 6A00 push 0
7 0000000E E800000000 call MessageBoxA
8 00000013 C3 ret
*/
BYTE funcode[] = {
0x6A, 0x00,
0x68, 0x00,0x00,0x00,0x00,
0x68, 0x00,0x00,0x00,0x00,
0x6A, 0x00,
0xE8, 0x00,0x00,0x00,0x00,
0xC3
};
DWORD MessageBoxA = pMessageBoxA - (funAddr + 0xE) - 5;
memcpy_s(funcode + 0x3, sizeof(DWORD), &titleAddr, sizeof(DWORD));
memcpy_s(funcode + 0x8, sizeof(DWORD), &contentAddr, sizeof(DWORD));
memcpy_s(funcode + 0xF, sizeof(DWORD), &MessageBoxA, sizeof(DWORD));
WriteProcessMemory(gc.hProcess, (LPVOID)funAddr, funcode, sizeof(funcode), 0);
#endif // _WIN64
HANDLE hThread = CreateRemoteThread(gc.hProcess, 0, 0, (LPTHREAD_START_ROUTINE)funAddr, 0, 0, 0);
WaitForSingleObject(hThread, INFINITE);
VirtualFreeEx(gc.hProcess, newmem, 0, MEM_RELEASE);
CloseHandle(hThread);
GetProcAddressEx:
uintptr_t GetProcAddressEx(HANDLE hProcess, string modName, string exportFunName)
{
MODULEINFO mi = GetModuleBase(modName, GetProcessId(hProcess));
uintptr_t moduleBaseAddr = (uintptr_t)mi.lpBaseOfDll;
// is PE FILE ?
WORD e_magic = 0;
ReadProcessMemory(hProcess, (LPCVOID)moduleBaseAddr, &e_magic, sizeof(WORD), 0);
if (e_magic != 0x5A4D)
{
printf("not PE file.\n");
return 0;
}
// get ntHeader offset
DWORD e_lfanew = 0;
ReadProcessMemory(hProcess, (LPCVOID)(moduleBaseAddr + 0x3C), &e_lfanew, sizeof(DWORD), 0);
uintptr_t ntHeaderAddr = moduleBaseAddr + e_lfanew;
uintptr_t fileHeaderAddr = ntHeaderAddr + sizeof(DWORD);
// x86 is 0xE0, x64 is 0xF0
WORD optHeaderSize = 0;
ReadProcessMemory(hProcess, (LPCVOID)(fileHeaderAddr + 0x10), &optHeaderSize, sizeof(WORD), 0);
// tables
uintptr_t DataDirectoryAddr = fileHeaderAddr + sizeof(IMAGE_FILE_HEADER) + optHeaderSize - sizeof(IMAGE_DATA_DIRECTORY) * 16;
// tables[0] is export table
IMAGE_DATA_DIRECTORY exportEntry;
ReadProcessMemory(hProcess, (LPCVOID)DataDirectoryAddr, &exportEntry, sizeof(IMAGE_DATA_DIRECTORY), 0);
if (!exportEntry.Size)
{
printf("not export table. \n");
return 0;
}
auto RVA2VA = [&](uintptr_t rva) -> uintptr_t
{
return moduleBaseAddr + rva;
};
uintptr_t exportDirDataAddr = RVA2VA(exportEntry.VirtualAddress);
// the number of use name export function
DWORD NumberOfNames = 0;
ReadProcessMemory(hProcess, (LPCVOID)(exportDirDataAddr + 0x18), &NumberOfNames, sizeof(DWORD), 0);
DWORD AddressOfFunctions = 0;
ReadProcessMemory(hProcess, (LPCVOID)(exportDirDataAddr + 0x1C), &AddressOfFunctions, sizeof(DWORD), 0);
DWORD* AddressOfFunctionsVA = (DWORD*)RVA2VA(AddressOfFunctions);
// function name table
DWORD AddressOfNames = 0;
ReadProcessMemory(hProcess, (LPCVOID)(exportDirDataAddr + 0x20), &AddressOfNames, sizeof(DWORD), 0);
DWORD* AddressOfNamesVA = (DWORD*)RVA2VA(AddressOfNames);
DWORD AddressOfNameOrdinals = 0;
ReadProcessMemory(hProcess, (LPCVOID)(exportDirDataAddr + 0x24), &AddressOfNameOrdinals, sizeof(DWORD), 0);
WORD* AddressOfNameOrdinalsVA = (WORD*)RVA2VA(AddressOfNameOrdinals);
auto readASCII = [&](uintptr_t addr, char* name) -> void
{
size_t i = 0;
char c;
while (true)
{
ReadProcessMemory(hProcess, (LPCVOID)(addr + i), &c, sizeof(BYTE), 0);
name[i] = c;
if (!c) break;
i++;
}
};
DWORD itRVA = 0;
char funName[1024];
size_t funNameIndex = 0;
for (; funNameIndex < NumberOfNames; funNameIndex++)
{
ReadProcessMemory(hProcess, AddressOfNamesVA + funNameIndex, &itRVA, sizeof(DWORD), 0);
readASCII(moduleBaseAddr + itRVA, funName);
if (!strcmp(funName, exportFunName.c_str()))
break;
}
if (strlen(funName) == 0)
{
return 0;
}
// get function address index
WORD AddressOfFunctionsIndex = 0;
ReadProcessMemory(hProcess, AddressOfNameOrdinalsVA + funNameIndex, &AddressOfFunctionsIndex, sizeof(WORD), 0);
// get function address
DWORD funAddrRVA = 0;
ReadProcessMemory(hProcess, AddressOfFunctionsVA + AddressOfFunctionsIndex, &funAddrRVA, sizeof(DWORD), 0);
return RVA2VA(funAddrRVA);
}
GetModuleBase:
MODULEINFO GetModuleBase(string moduleName, DWORD pid)
{
MODULEINFO mi{ 0 };
HANDLE hSnap = CreateToolhelp32Snapshot(TH32CS_SNAPMODULE | TH32CS_SNAPMODULE32, pid);
if (hSnap != INVALID_HANDLE_VALUE)
{
MODULEENTRY32 me;
me.dwSize = sizeof(me);
if (Module32First(hSnap, &me))
{
do {
if (!_wcsicmp(me.szModule, toWstring(moduleName).c_str()))
{
mi.lpBaseOfDll = me.modBaseAddr;
mi.SizeOfImage = me.modBaseSize;
break;
}
} while (Module32Next(hSnap, &me));
}
}
CloseHandle(hSnap);
return mi;
}
Compared with hard-coded code, this is more complicated.
If you want to get the parameters passed by CreateRemoteThread
BYTE* lpParam = (BYTE*)VirtualAllocEx(gc.hProcess, 0, 8, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE);
DWORD p1 = 1;
DWORD p2 = 2;
WriteProcessMemory(gc.hProcess, lpParam, &p1, sizeof(DWORD), 0);
WriteProcessMemory(gc.hProcess, lpParam + 4, &p2, sizeof(DWORD), 0);
printf("lpParam: %lp\n", lpParam);
HANDLE hThread = CreateRemoteThread(gc.hProcess, 0, 0, (LPTHREAD_START_ROUTINE)funAddr, lpParam, 0, 0);
If the target program is x86 then you can get lpParam in [esp+4]
If the target program is x64 then you can get lpParam in rcx
I am new to CNG. I am playing with the basic program from msdn site.
I have modified the input plain string and testing the output using other websites that provides the aes cbc encrypted output.
Unfortunately only first half matches and the next half is not matching.
It would be great if someone can point me where the mistake lies,
original code from msdn is here.
Here is the output generated from my code (below). Please note that there is no difference in my code apart from modifying the input plain string.
Here is the output from the online website (http://aes.online-domain-tools.com/ and anothersite)
The first half ends at "B0 C4 29 18".. after that the 2nd half doesn't match.
Here is the code snippet
#include <windows.h>
#include <stdio.h>
#include <bcrypt.h>
#pragma comment(lib, "bcrypt.lib")
#ifndef STATUS_UNSUCCESSFUL
#define STATUS_UNSUCCESSFUL ((NTSTATUS)0xC0000001L)
#endif // !STATUS_UNSUCCESSFUL
#ifndef NT_SUCCESS
#define NT_SUCCESS(Status) ((NTSTATUS)(Status) >= 0)
#endif
void
print_inhex(char *buf, int len) {
for (int i = 0; i < len; i++)
printf(" %02x", buf[i]);
printf("\n");
}
const BYTE rgbPlaintext[] =
{
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
};
static const BYTE rgbIV[] =
{
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
};
static const BYTE rgbAES128Key[] =
{
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
};
void
CNG_aes_cbc()
{
BCRYPT_ALG_HANDLE hAesAlg = NULL;
BCRYPT_KEY_HANDLE hKey = NULL;
NTSTATUS status = STATUS_UNSUCCESSFUL;
DWORD cbCipherText = 0,
cbPlainText = 0,
cbData = 0,
cbKeyObject = 0,
cbBlockLen = 0,
cbBlob = 0;
PBYTE pbCipherText = NULL,
pbPlainText = NULL,
pbKeyObject = NULL,
pbIV = NULL,
pbBlob = NULL;
// Open an algorithm handle.
if (!NT_SUCCESS(status = BCryptOpenAlgorithmProvider(&hAesAlg, BCRYPT_AES_ALGORITHM, NULL, 0))) {
wprintf(L"**** Error 0x%x returned by BCryptOpenAlgorithmProvider\n", status);
goto Cleanup;
}
// Calculate the size of the buffer to hold the KeyObject.
if (!NT_SUCCESS(status = BCryptGetProperty(hAesAlg, BCRYPT_OBJECT_LENGTH, (PBYTE)&cbKeyObject, sizeof(DWORD), &cbData, 0))) {
wprintf(L"**** Error 0x%x returned by BCryptGetProperty\n", status);
goto Cleanup;
}
// Allocate the key object on the heap.
pbKeyObject = (PBYTE)HeapAlloc(GetProcessHeap(), 0, cbKeyObject);
if (NULL == pbKeyObject) {
wprintf(L"**** memory allocation failed\n");
goto Cleanup;
}
// Calculate the block length for the IV.
if (!NT_SUCCESS(status = BCryptGetProperty(hAesAlg, BCRYPT_BLOCK_LENGTH, (PBYTE)&cbBlockLen, sizeof(DWORD), &cbData, 0))) {
wprintf(L"**** Error 0x%x returned by BCryptGetProperty\n", status);
goto Cleanup;
}
// Determine whether the cbBlockLen is not longer than the IV length.
if (cbBlockLen > sizeof(rgbIV)) {
wprintf(L"**** block length is longer than the provided IV length\n");
goto Cleanup;
}
// Allocate a buffer for the IV. The buffer is consumed during the
// encrypt/decrypt process.
pbIV = (PBYTE)HeapAlloc(GetProcessHeap(), 0, cbBlockLen);
if (NULL == pbIV) {
wprintf(L"**** memory allocation failed\n");
goto Cleanup;
}
memcpy(pbIV, rgbIV, cbBlockLen);
if (!NT_SUCCESS(status = BCryptSetProperty(hAesAlg, BCRYPT_CHAINING_MODE, (PBYTE)BCRYPT_CHAIN_MODE_CBC, sizeof(BCRYPT_CHAIN_MODE_CBC), 0))) {
wprintf(L"**** Error 0x%x returned by BCryptSetProperty\n", status);
goto Cleanup;
}
// Generate the key from supplied input key bytes.
if (!NT_SUCCESS(status = BCryptGenerateSymmetricKey(hAesAlg, &hKey, pbKeyObject, cbKeyObject, (PBYTE)rgbAES128Key, sizeof(rgbAES128Key), 0))) {
wprintf(L"**** Error 0x%x returned by BCryptGenerateSymmetricKey\n", status);
goto Cleanup;
}
// Save another copy of the key for later.
if (!NT_SUCCESS(status = BCryptExportKey(hKey, NULL, BCRYPT_KEY_DATA_BLOB, NULL, 0, &cbBlob, 0))) {
wprintf(L"**** Error 0x%x returned by BCryptExportKey\n", status);
goto Cleanup;
}
// Allocate the buffer to hold the BLOB.
PUCHAR pbBlob_1 = (PUCHAR)malloc(sizeof(PUCHAR) * cbBlob);
//pbBlob = (PBYTE)HeapAlloc(GetProcessHeap(), 0, cbBlob);
if (NULL == pbBlob_1) {
wprintf(L"**** memory allocation failed\n");
goto Cleanup;
}
if (!NT_SUCCESS(status = BCryptExportKey(hKey, NULL, BCRYPT_KEY_DATA_BLOB, pbBlob_1, cbBlob, &cbBlob, 0))) {
wprintf(L"**** Error 0x%x returned by BCryptExportKey\n", status);
goto Cleanup;
}
PUCHAR blob = pbBlob_1 + sizeof(BCRYPT_KEY_DATA_BLOB_HEADER);
int len = cbBlob - sizeof(BCRYPT_KEY_DATA_BLOB_HEADER);
printf("key:");
print_inhex(blob, len);
cbPlainText = sizeof(rgbPlaintext);
pbPlainText = (PBYTE)HeapAlloc(GetProcessHeap(), 0, cbPlainText);
if (NULL == pbPlainText) {
wprintf(L"**** memory allocation failed\n");
goto Cleanup;
}
/*memcpy(pbPlainText, rgbPlaintext, sizeof(rgbPlaintext));*/
char *test_msg = "This is my test msg";
cbPlainText = strlen(test_msg) + 1;
memcpy(pbPlainText, test_msg, cbPlainText);
printf("plain text:");
print_inhex(test_msg, strlen(test_msg));
// Get the output buffer size.
if (!NT_SUCCESS(status = BCryptEncrypt(hKey, pbPlainText, cbPlainText, NULL, pbIV, cbBlockLen, NULL, 0, &cbCipherText, BCRYPT_BLOCK_PADDING))) {
wprintf(L"**** Error 0x%x returned by BCryptEncrypt\n", status);
goto Cleanup;
}
pbCipherText = (PBYTE)HeapAlloc(GetProcessHeap(), 0, cbCipherText);
if (NULL == pbCipherText) {
wprintf(L"**** memory allocation failed\n");
goto Cleanup;
}
// Use the key to encrypt the plaintext buffer.
// For block sized messages, block padding will add an extra block.
if (!NT_SUCCESS(status = BCryptEncrypt(hKey, pbPlainText, cbPlainText, NULL, pbIV,
cbBlockLen, pbCipherText, cbCipherText, &cbData, BCRYPT_BLOCK_PADDING))){
wprintf(L"**** Error 0x%x returned by BCryptEncrypt\n", status);
goto Cleanup;
}
printf("cipher text:");
for (int i = 0; i < cbCipherText; i++)
printf(" %02x", pbCipherText[i]);
wprintf(L"\nSuccess!\n");
Cleanup:
if (hAesAlg)
BCryptCloseAlgorithmProvider(hAesAlg, 0);
if (hKey)
BCryptDestroyKey(hKey);
if (pbCipherText)
HeapFree(GetProcessHeap(), 0, pbCipherText);
if (pbKeyObject)
HeapFree(GetProcessHeap(), 0, pbKeyObject);
if (pbIV)
HeapFree(GetProcessHeap(), 0, pbIV);
}
You aren't consistent with your value for cbPlainText.
Asides:
You also have some very scary copy/realloc code where you write a string over a buffer not guaranteed as big as the string).
You also defined NT_SUCCESS in such a way that it returns whether or not something failed. 0 is success, !0 is failure.
You hex-printed up to strlen of tst_msg. But you set cbPlainText = strlen(tst_msg) + 1. If you set it to strlen(tst_msg) then you get #zaph's answer (46CC2228E81B2A05E8E8EBF2B0C42918EC496128D7C45BD0B19BB2D6452A3936).
You don't match the website because you used CNG with PKCS#7 padding, and the website uses zero-padding. You could identify the padding used in the website by taking your output ciphertext and putting it as the plaintext, then hitting decrypt. It then says your input was 54686973206973206d792074657374206d736700000000000000000000000000. Or, if you add 00 0C 0C 0C 0C 0C 0C 0C 0C 0C 0C 0C 0C on your input to the website you'll get your original answer. Or add 0D 0D 0D 0D 0D 0D 0D 0D 0D 0D 0D 0D 0D and you'll get #zaph's answer.
So, things to do:
Don't re-evaluate the length of things to print, make one variable (cbPlainText) and stick with it.
AES is a block cipher algorithm. All block ciphers require complete blocks, a deficient last block must be padded (and under removable padding schemes a complete final block requires yet another block). Learn what this means before continuing. https://en.wikipedia.org/wiki/Padding_(cryptography)#Symmetric_cryptography
When something seems wrong with encryption, look at the decrypted output.
Very often the decrypted output with "no padding" is revealing.
Learn C, and how memory works. Or switch to C# and have a less steep learning curve.
The length of the data is not an exact multiple of the block size (16-bytes for AES) so padding is added but. That is where the implementation either rejects the data, pads with a default method such as 0x00 (cryptomathic), PKCS#7 (the generally used padding) or whatever junk follews the provided data in memory.
Don't use BCryptEncrypt, use AES Class
SymmetricAlgorithm.Padding Property Note: The default is PaddingMode.PKCS7.
It is best to specify the padding on instantiation of AES.
See PaddingMode Enumeration: PKCS7
The PKCS #7 padding string consists of a sequence of bytes, each of which is equal to the total number of padding bytes added.
Manually adding PKCS#7 padding to:
cryptommathic AES:
produces: 46CC2228E81B2A05E8E8EBF2B0C42918EC496128D7C45BD0B19BB2D6452A3936
I am trying to create my own JIT and so far managed to run very simple assembly code (in machine-code), but having trouble figuring out how to call functions this way. In Visual Studio I can see functions in disassembly window.
Another related question is how do I call Win32 MessageBox() in machine-code?
Next question is how do I call external DLL/LIB functions in this manner?
Also is there any books or tutorials which could teach me further in this subject? I have tried to search for it but get results like .NET, JVM and LLVM which I think is not really what I am looking for.
Here is a simplified version of the code that I am working on:
#include <iostream>
#include <Windows.h>
int main(int argc, char* argv[])
{
// b8 03 00 00 00 83 c0 02 c3
unsigned char code[] = {
0xb8, // mov eax, 3
0x03, 0x00, 0x00, 0x00, // 3 (32 bit)
0x83, // add eax, 2 // 0x83 = add,
0xc0, // ModR/M with immediate 8 bit value
0x02, // 2 (8 bit)
0xc3 // ret
};
void* mem = VirtualAlloc(0, sizeof(code), MEM_COMMIT, PAGE_EXECUTE_READWRITE);
memcpy(mem, code, sizeof(code));
DWORD old;
VirtualProtect(mem, sizeof(mem), PAGE_EXECUTE_READ, &old);
int(*func)() = reinterpret_cast<int(*)()>(mem);
printf("Number is %d\n", func());
VirtualFree(mem, 0, MEM_RELEASE);
return 0;
}
Is it possible to have the JIT assembly code to call a C++ function?
Before this project I made a byte-code interpreter in C++, but I wasn't really happy with the speed when comparing it to equivalent test program in C#. C# was roughly 25x times faster. So I stumbled on something called JIT to make it faster. So I hope you all can see where I am taking this JIT project. And maybe if possible make it handle GUI.
You can probably find some tutorials about writing a compiler/linker. It may help with implementing/calling dynamic libraries.
I'm not sure what you exactly mean by calling C++ functions. Anyway I wrote the following demo program that you can take a look and see if it helps at all.
#include <Windows.h>
#include <iostream>
using namespace std;
__int64 sub(__int64 a, __int64 b)
{
return a - b;
}
int main(int argc, char **argv)
{
char code[] =
{
0x48, 0x89, 0xC8, // mov rax, rcx
0xC3, // ret
0x48, 0x83, 0xEC, 0x20, // sub rsp, 0x20
0xFF, 0xD0, // call rax
0x48, 0x83, 0xC4, 0x20, // add rsp, 0x20
0xC3 // ret
};
char *mem = static_cast<char *>(VirtualAlloc(0, sizeof(code), MEM_COMMIT, PAGE_EXECUTE_READWRITE));
MoveMemory(mem, code, sizeof(code));
auto setFunc = reinterpret_cast<void *(*)(void *)>(mem);
auto callFunc = reinterpret_cast<__int64 (*)(__int64, __int64)>(mem + 4);
setFunc(sub);
__int64 r = callFunc(0, 1);
cout << "r = " << r << endl;
VirtualFree(mem, 0, MEM_RELEASE);
cin.ignore();
return 0;
}
I know it's a long post but it's mostly code and pictures, it's a quick read! First of all, here is what I'm trying to do:
I'm trying to execute a BYTE array in a detoured function in order to go back to the original code as if I didn't detour anyhting Here is my code:
DllMain (DetourAddress is all that matter):
BOOL APIENTRY DllMain( HANDLE hModule, DWORD ul_reason_for_call, LPVOID lpReserved )
{
switch (ul_reason_for_call)
{
case DLL_PROCESS_ATTACH:
AllocConsole();
freopen("CONOUT$", "w", stdout);
DetourAddress((void*)HookAddress, (void*)&DetourFunc);
case DLL_PROCESS_DETACH:
FreeConsole();
break;
}
return TRUE;
}
DetourAddress (code is self-explanatory, I think):
void DetourAddress(void* funcPtr, void* hook)
{
// write jmp
BYTE cmd[5] =
{
0xE9, //jmp
0x00, 0x00, 0x00, 0x00 //address
};
// make memory readable/writable
DWORD dwProtect;
VirtualProtect(funcPtr, 5, PAGE_EXECUTE_READWRITE, &dwProtect);
// read bytes about to be replaced
ReadProcessMemory(GetCurrentProcess(), (LPVOID)funcPtr, mem, 5, NULL);
// write jmp in cmd
DWORD offset = ((DWORD)hook - (DWORD)funcPtr - 5); // (dest address) - (source address) - (jmp size)
memcpy(&cmd[1], &offset, 4); // write address into jmp
WriteProcessMemory(GetCurrentProcess(), (LPVOID)funcPtr, cmd, 5, 0); // write jmp
// reprotect
VirtualProtect(funcPtr, 5, dwProtect, NULL);
}
DetourFunc:
_declspec(naked) void DetourFunc()
{
__asm
{
PUSHFD
PUSHAD
}
printf("function detoured\n");
__asm
{
POPAD
POPFD
}
// make memory readable/writable
DWORD dwProtect;
VirtualProtect(mem, 6, PAGE_EXECUTE_READWRITE, &dwProtect);
pByteExe();
// reprotect
VirtualProtect(mem, 6, dwProtect, NULL);
__asm
{
jmp HookReturnAddress
}
}
And finaly the global variables, typedef for pByteExe() and includes:
#include <Windows.h>
#include <cstdio>
DWORD HookAddress = 0x08B1418,
HookReturnAddress = HookAddress+5;
typedef void ( * pFunc)();
BYTE mem[6] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0xC3 };
pFunc pByteExe = (pFunc) &mem
As you can see in DetourFunc, I'm trying to execute my byte array (mem) directly. Using OllyDbg, this gets me there:
Which is exactly the bytes I'm trying to execute. Only problem is that it gives me an Access violation error when executing... Any idea why? I would have thought "VirtualProtect(mem, 5, PAGE_EXECUTE_READWRITE, &dwProtect);" would have made it safe to access... Thanks for your help!
EDIT: I just realized something wierd was happening... when I "Step into" with ollydbg, the mem instructions are correct, but as soon as I scroll a little, they change back to this:
Any idea why?
You've forgot the module offset...
DWORD module = (DWORD)GetModuleHandle(NULL);
DWORD real_address = module + (DWORD)ADDRESS;
ADDRESS have to of course relative to your module. (The module offset isn't allways the same)
And btw. why you take WriteProcessMemory, when you inject your DLL? A simple memcpy is enought...