Convert UTF-16 to UTF-8

Convert UTF-16 to UTF-8 - c++

I am current using VC++ 2008 MFC. Due to PostgreSQL doesn't support UTF-16 (Encoding used by Windows for Unicode), I need to convert string from UTF-16 to UTF-8, before store it.
Here is my code snippet.
// demo.cpp : Defines the entry point for the console application.
//
#include "stdafx.h"
#include "demo.h"
#include "Utils.h"
#include <iostream>
#ifdef _DEBUG
#define new DEBUG_NEW
#endif
// The one and only application object
CWinApp theApp;
using namespace std;
int _tmain(int argc, TCHAR* argv[], TCHAR* envp[])
{
int nRetCode = 0;
// initialize MFC and print and error on failure
if (!AfxWinInit(::GetModuleHandle(NULL), NULL, ::GetCommandLine(), 0))
{
// TODO: change error code to suit your needs
_tprintf(_T("Fatal Error: MFC initialization failed\n"));
nRetCode = 1;
}
else
{
// TODO: code your application's behavior here.
}
CString utf16 = _T("Hello");
std::cout << utf16.GetLength() << std::endl;
CStringA utf8 = UTF8Util::ConvertUTF16ToUTF8(utf16);
std::cout << utf8.GetLength() << std::endl;
getchar();
return nRetCode;
}
and the conversion functions.
namespace UTF8Util
{
//----------------------------------------------------------------------------
// FUNCTION: ConvertUTF8ToUTF16
// DESC: Converts Unicode UTF-8 text to Unicode UTF-16 (Windows default).
//----------------------------------------------------------------------------
CStringW ConvertUTF8ToUTF16( __in const CHAR * pszTextUTF8 )
{
//
// Special case of NULL or empty input string
//
if ( (pszTextUTF8 == NULL) || (*pszTextUTF8 == '\0') )
{
// Return empty string
return L"";
}
//
// Consider CHAR's count corresponding to total input string length,
// including end-of-string (\0) character
//
const size_t cchUTF8Max = INT_MAX - 1;
size_t cchUTF8;
HRESULT hr = ::StringCchLengthA( pszTextUTF8, cchUTF8Max, &cchUTF8 );
if ( FAILED( hr ) )
{
AtlThrow( hr );
}
// Consider also terminating \0
++cchUTF8;
// Convert to 'int' for use with MultiByteToWideChar API
int cbUTF8 = static_cast<int>( cchUTF8 );
//
// Get size of destination UTF-16 buffer, in WCHAR's
//
int cchUTF16 = ::MultiByteToWideChar(
CP_UTF8, // convert from UTF-8
MB_ERR_INVALID_CHARS, // error on invalid chars
pszTextUTF8, // source UTF-8 string
cbUTF8, // total length of source UTF-8 string,
// in CHAR's (= bytes), including end-of-string \0
NULL, // unused - no conversion done in this step
0 // request size of destination buffer, in WCHAR's
);
ATLASSERT( cchUTF16 != 0 );
if ( cchUTF16 == 0 )
{
AtlThrowLastWin32();
}
//
// Allocate destination buffer to store UTF-16 string
//
CStringW strUTF16;
WCHAR * pszUTF16 = strUTF16.GetBuffer( cchUTF16 );
//
// Do the conversion from UTF-8 to UTF-16
//
int result = ::MultiByteToWideChar(
CP_UTF8, // convert from UTF-8
MB_ERR_INVALID_CHARS, // error on invalid chars
pszTextUTF8, // source UTF-8 string
cbUTF8, // total length of source UTF-8 string,
// in CHAR's (= bytes), including end-of-string \0
pszUTF16, // destination buffer
cchUTF16 // size of destination buffer, in WCHAR's
);
ATLASSERT( result != 0 );
if ( result == 0 )
{
AtlThrowLastWin32();
}
// Release internal CString buffer
strUTF16.ReleaseBuffer();
// Return resulting UTF16 string
return strUTF16;
}
//----------------------------------------------------------------------------
// FUNCTION: ConvertUTF16ToUTF8
// DESC: Converts Unicode UTF-16 (Windows default) text to Unicode UTF-8.
//----------------------------------------------------------------------------
CStringA ConvertUTF16ToUTF8( __in const WCHAR * pszTextUTF16 )
{
//
// Special case of NULL or empty input string
//
if ( (pszTextUTF16 == NULL) || (*pszTextUTF16 == L'\0') )
{
// Return empty string
return "";
}
//
// Consider WCHAR's count corresponding to total input string length,
// including end-of-string (L'\0') character.
//
const size_t cchUTF16Max = INT_MAX - 1;
size_t cchUTF16;
HRESULT hr = ::StringCchLengthW( pszTextUTF16, cchUTF16Max, &cchUTF16 );
if ( FAILED( hr ) )
{
AtlThrow( hr );
}
// Consider also terminating \0
++cchUTF16;
//
// WC_ERR_INVALID_CHARS flag is set to fail if invalid input character
// is encountered.
// This flag is supported on Windows Vista and later.
// Don't use it on Windows XP and previous.
//
#if (WINVER >= 0x0600)
DWORD dwConversionFlags = WC_ERR_INVALID_CHARS;
#else
DWORD dwConversionFlags = 0;
#endif
//
// Get size of destination UTF-8 buffer, in CHAR's (= bytes)
//
int cbUTF8 = ::WideCharToMultiByte(
CP_UTF8, // convert to UTF-8
dwConversionFlags, // specify conversion behavior
pszTextUTF16, // source UTF-16 string
static_cast<int>( cchUTF16 ), // total source string length, in WCHAR's,
// including end-of-string \0
NULL, // unused - no conversion required in this step
0, // request buffer size
NULL, NULL // unused
);
ATLASSERT( cbUTF8 != 0 );
if ( cbUTF8 == 0 )
{
AtlThrowLastWin32();
}
//
// Allocate destination buffer for UTF-8 string
//
CStringA strUTF8;
int cchUTF8 = cbUTF8; // sizeof(CHAR) = 1 byte
CHAR * pszUTF8 = strUTF8.GetBuffer( cchUTF8 );
//
// Do the conversion from UTF-16 to UTF-8
//
int result = ::WideCharToMultiByte(
CP_UTF8, // convert to UTF-8
dwConversionFlags, // specify conversion behavior
pszTextUTF16, // source UTF-16 string
static_cast<int>( cchUTF16 ), // total source string length, in WCHAR's,
// including end-of-string \0
pszUTF8, // destination buffer
cbUTF8, // destination buffer size, in bytes
NULL, NULL // unused
);
ATLASSERT( result != 0 );
if ( result == 0 )
{
AtlThrowLastWin32();
}
// Release internal CString buffer
strUTF8.ReleaseBuffer();
// Return resulting UTF-8 string
return strUTF8;
}
} // namespace UTF8Util
However, during runtime, I get the exception at
ATLASSERT( cbUTF8 != 0 );
while trying to get size of destination UTF-8 buffer
What thing I had missed out?
If I am testing using a Chinese characters, How can I verify the resultant UTF-8 string is correct?

You can also use the ATL String Conversion Macros - to convert from UTF-16 to UTF-8 use CW2A and pass CP_UTF8 as the code page, e.g.:
CW2A utf8(buffer, CP_UTF8);
const char* data = utf8.m_psz;

The problem is you specified the WC_ERR_INVALID_CHARS flag:
Windows Vista and later: Fail if an invalid input character is encountered. If this flag is not set, the function silently drops illegal code points. A call to GetLastError returns ERROR_NO_UNICODE_TRANSLATION. Note that this flag only applies when CodePage is specified as CP_UTF8 or 54936 (for Windows Vista and later). It cannot be used with other code page values.
Your conversion function seems quite long. How does this one work for you?
//----------------------------------------------------------------------------
// FUNCTION: ConvertUTF16ToUTF8
// DESC: Converts Unicode UTF-16 (Windows default) text to Unicode UTF-8.
//----------------------------------------------------------------------------
CStringA ConvertUTF16ToUTF8( __in LPCWSTR pszTextUTF16 ) {
if (pszTextUTF16 == NULL) return "";
int utf16len = wcslen(pszTextUTF16);
int utf8len = WideCharToMultiByte(CP_UTF8, 0, pszTextUTF16, utf16len,
NULL, 0, NULL, NULL );
CArray<CHAR> buffer;
buffer.SetSize(utf8len+1);
buffer.SetAt(utf8len, '\0');
WideCharToMultiByte(CP_UTF8, 0, pszTextUTF16, utf16len,
buffer.GetData(), utf8len, 0, 0 );
return buffer.GetData();
}
I see you use a function called StringCchLengthW to get the required length of the output buffer. Most of the places I look recommend using the WideCharToMultiByte function itself to tell you how many CHARs it wants.
Edit:
As Rob pointed out, you can use CW2A with the CP_UTF8 code page:
CStringA str = CW2A(wStr, CP_UTF8);
While I'm editing, I can answer your second question:
How can I verify the resultant UTF-8 string is correct?
Write it to a text file, then open it in Mozilla Firefox or an equivillant program. In the View menu, you can go to Character Encoding and switch manually to UTF-8 (assuming Firefox didn't guess it correctly to begin with). Compare it with a UTF-16 document with the same text and see if there are any differences.

Related

How to translate a virtual-key code to char (depending on locale)?

I am playing around with translating user's keystrokes between the different installed languages on their Windows machine.
I found this article about virtual-key codes, and how they map to characters, and also this function to perform the mapping. But it doesn't seem to work like I expected it to.
This is my attempt at sending the virtual-key code of "A" (which is 0x41), and translating it to the character "ש" in the Hebrew keyboard (which is what pressing that key outputs to the screen, while the user is on the Hebrew keyboard layout). It still prints only "A", regardless of my current active layout.
#include <windows.h>
#include <iostream>
#include <stdlib.h>
#include <tchar.h>
int main()
{
HKL lpList[2];
GetKeyboardLayoutList(2, lpList); // returns {0x04090409 , 0xf03d040d} on my machine, which is {en-US, he-IL}
HKL hkl = lpList[1]; // sets to he-IL
char ch = MapVirtualKeyEx(0x41, MAPVK_VK_TO_CHAR, hkl); //0x41 is the Virtual Key of the keyboard button 'A'
std::cout << "ch: " << ch << std::endl; //prints "ch: A", I want it to print "ch: ש"
}
What am I missing? Is there some other way to achieve what I am trying to do?
I just tried
UINT VKCode = LOBYTE(VkKeyScan('ש')); // returns 0xbf
UINT ScanCode = MapVirtualKeyEx(VKCode, MAPVK_VK_TO_VSC, hkl); // returns 0x35
UINT VKCode2 = MapVirtualKeyEx(ScanCode, MAPVK_VSC_TO_VK, hkl); // once again 0xbf - unsurprisingly
TCHAR ch = MapVirtualKeyEx(VKCode2, MAPVK_VK_TO_CHAR, hkl); // now it returns '.'
So I convert char -> vk -> sc -> vk -> char, and end up with a different character than the one I started with. Maybe there is a different way to convert a `virtual-key code* to char?

You can use ToUnicodeEx API.
And if you want to output characters correctly, you can refer to: How to print Latin characters to the C++ console properly on Windows?
I created a sample and used the following code:
int main()
{
SetConsoleOutputCP(1256);
_setmode(_fileno(stdout), _O_U16TEXT);
HKL lpList[2];
GetKeyboardLayoutList(2, lpList);
HKL hkl = lpList[1]; // sets to he-IL
UINT VKCode = (VkKeyScanExW(L'ש',hkl));
UINT ScanCode = MapVirtualKeyExW(VKCode, MAPVK_VK_TO_VSC, hkl);
UINT VKCode2 = MapVirtualKeyExW(ScanCode, MAPVK_VSC_TO_VK, hkl);
TCHAR ch1 = MapVirtualKeyExW(VKCode2, MAPVK_VK_TO_CHAR, hkl);
BYTE uKeyboardState[256];
WCHAR oBuffer[5] = {};
//Initialization of KeyBoardState
for (int i = 0; i < 256; ++i)
{
uKeyboardState[i] = 0;
}
TCHAR buffer[1024];
ToUnicodeEx(VKCode, ScanCode, uKeyboardState, buffer, 1024, 0, hkl);
wcout << buffer;
return 0;
}
And it works for me:

According to the documentation pages (MapVirtualKeyExA function and MapVirtualKeyExW function) the function returns an UINT and not a char:
UINT MapVirtualKeyW(
UINT uCode,
UINT uMapType
);
Depending on your projact settings you'll need to inerpret this result either as char or as wchar_t, that's the reason.
You can overcome this, if you use TCHAR ch = ..., and let the project settings expand the TCHAR macro to the correct type properly.
The harder part is to decide if you need to use std::cout or std::wcout (std::cout, std::wcout). You could use some type check (e.g. if(std::is_same(ch,wchar_t)) { ... } else { ... }) to do this properly.

Open utf8 encoded filename in c++ Windows

Consider the following code:
#include <iostream>
#include <boost\locale.hpp>
#include <Windows.h>
#include <fstream>
std::string ToUtf8(std::wstring str)
{
std::string ret;
int len = WideCharToMultiByte(CP_UTF8, 0, str.c_str(), str.length(), NULL, 0, NULL, NULL);
if (len > 0)
{
ret.resize(len);
WideCharToMultiByte(CP_UTF8, 0, str.c_str(), str.length(), &ret[0], len, NULL, NULL);
}
return ret;
}
int main()
{
std::wstring wfilename = L"D://Private//Test//एउटा फोल्दर//भित्रको फाईल.txt";
std::string utf8path = ToUtf8(wfilename );
std::ifstream iFileStream(utf8path , std::ifstream::in | std::ifstream::binary);
if(iFileStream.is_open())
{
std::cout << "Opened the File\n";
//Do the work here.
}
else
{
std::cout << "Cannot Opened the file\n";
}
return 0;
}
If I am running the file, I cannot open the file thus entering into the else block. Even using boost::locale::conv::from_utf(utf8path ,"utf_8") instead of utf8path doesn't work. The code works if I consider using wifstream and using wfilename as its parameter, but I don' want to use wifstream. Is there any way to open the file with its name utf8 encoded? I am using Visual Studio 2010.

On Windows, you MUST use 8bit ANSI (and it must match the user's locale) or UTF-16 for filenames, there is no other option available. You can keep using string and UTF-8 in your main code, but you will have to convert UTF-8 filenames to UTF-16 when you are opening files. Less efficient, but that is what you need to do.
Fortunately, VC++'s implementation of std::ifstream and std::ofstream have non-standard overloads of their constructors and open() methods to accept wchar_t* strings for UTF-16 filenames.
explicit basic_ifstream(
const wchar_t *_Filename,
ios_base::openmode _Mode = ios_base::in,
int _Prot = (int)ios_base::_Openprot
);
void open(
const wchar_t *_Filename,
ios_base::openmode _Mode = ios_base::in,
int _Prot = (int)ios_base::_Openprot
);
void open(
const wchar_t *_Filename,
ios_base::openmode _Mode
);
explicit basic_ofstream(
const wchar_t *_Filename,
ios_base::openmode _Mode = ios_base::out,
int _Prot = (int)ios_base::_Openprot
);
void open(
const wchar_t *_Filename,
ios_base::openmode _Mode = ios_base::out,
int _Prot = (int)ios_base::_Openprot
);
void open(
const wchar_t *_Filename,
ios_base::openmode _Mode
);
You will have to use an #ifdef to detect Windows compilation (unfortunately, different C++ compilers identify that differently) and temporarily convert your UTF-8 string to UTF-16 when opening a file.
#ifdef _MSC_VER
std::wstring ToUtf16(std::string str)
{
std::wstring ret;
int len = MultiByteToWideChar(CP_UTF8, 0, str.c_str(), str.length(), NULL, 0);
if (len > 0)
{
ret.resize(len);
MultiByteToWideChar(CP_UTF8, 0, str.c_str(), str.length(), &ret[0], len);
}
return ret;
}
#endif
int main()
{
std::string utf8path = ...;
std::ifstream iFileStream(
#ifdef _MSC_VER
ToUtf16(utf8path).c_str()
#else
utf8path.c_str()
#endif
, std::ifstream::in | std::ifstream::binary);
...
return 0;
}
Note that this is only guaranteed to work in VC++. Other C++ compilers for Windows are not guaranteed to provide similar extensions.
UPDATE: as of Windows 10 Insider Preview Build 17035, Microsoft now supports UTF-8 as a system-wide encoding that users can set their locale to. And as of Windows 10 Version 1903 (build 18362), applications can now opt in via their app manifest to use UTF-8 as a process-wide codepage, even if the user locale is not set to UTF-8. These features allow ANSI-based APIs (like CreateFileA(), which std::ifstream/std::ofstream use internally) to work with UTF-8 strings. So, in theory, with this feature turned on, you might be able to pass a UTF-8 encoded string to std::ifstream/std::ofstream and it would "just work". I can't confirm that, as it very much depends on the implementation. It would be safer to stick with passing in UTF-16 filenames, since that is Windows' native encoding, which the ANSI APIs will simply convert to internally.

You can use std::filesystem::u8path in C++14/17:
std::filesystem::path pa = std::filesystem::u8path((const char*)yourStdStringPath.c_str());
std::ofstream ofs(pa);
It's deprecated in C++20 since you can use the u8 prefix.

Dll injection with GetFullPathNameA not working?

If I explicitly write the address the dll injection works
char s[1000]="E:\\worldisnotenough.dll"; //Works
If I use GetFullPathNameA DLL injections do not work, and they do not give any runtime or compile time errors. I checked this:
char s[1000];
int ax =GetFullPathNameA("worldisnotenough.dll",
1000,
s, //Output to save the full DLL path
NULL);
std::cout<<s; //prints the correct path. Working.
The line cout << s prints the correct path, but DLL injection doesn't happen. No errors occur. I checked VirtualAllocEx, WriteProcessMemory, and CreateRemoteThread, and all of them are working properly.
Edit: complete code
#include <QCoreApplication>
#include<windows.h>
#include<tchar.h>
#include<iostream>
#include "E:/Users/Gen/qt project freiza/FreizaLibrary/freizalibrary.h"
int main(int argc, char *argv[])
{
QCoreApplication a(argc, argv);
// FreizaLibrary lib;
// QTextStream s(stdin);
// QString value = s.readLine();
// lib.injection(value.toInt());
int procID = 13044;
HANDLE hHandle = OpenProcess( PROCESS_CREATE_THREAD |
PROCESS_QUERY_INFORMATION |
PROCESS_VM_OPERATION |
PROCESS_VM_WRITE |
PROCESS_VM_READ,
FALSE,
procID );
QString dllName = "worldisnotenough.dll";
QFile myDllFile(dllName);
QFileInfo dllInfo(dllName);
QString str =dllInfo.absoluteFilePath();
char s[]="E:\\Users\\Gen\\qt project freiza\\build-libtester-FreizaKit-Release\\release\\worldisnotenough.dll";
std::cout<<strlen(s)<<"\n";
int ax =GetFullPathNameA("worldisnotenough.dll",
86, //I set it to 1000 before posting this question.
s, //Output to save the full DLL path
NULL);
//qDebug()<< QString::fromUtf8(s) <<" "<< ax;
std::cout<<s<<"size "<<ax;
LPVOID dllPathAddr = VirtualAllocEx(hHandle,
0,
strlen(s),
MEM_RESERVE|MEM_COMMIT,
PAGE_EXECUTE_READWRITE);
std::cout<<" test \n";
std::cout<<(int*)dllPathAddr<<endl;
if(dllPathAddr==NULL)
{
qDebug()<<"virtual failed";
}
size_t x;
int n= WriteProcessMemory(hHandle,
dllPathAddr,
s,
strlen(s),
&x);
if(n==0)
{
qDebug()<<"write failed";
}
std::cout<<endl<<n<<"\t"<<x;
LPVOID addr = (LPVOID)GetProcAddress(GetModuleHandle(L"kernel32.dll"), "LoadLibraryA");
if(addr==NULL)
{
qDebug()<<"get proc failed";
}
HANDLE rThread = CreateRemoteThread(hHandle, NULL, 0, (LPTHREAD_START_ROUTINE)addr,dllPathAddr, 0, NULL);
if(rThread==NULL)
{
qDebug()<<"create remote failed";
}
WaitForSingleObject(rThread, INFINITE);
VirtualFreeEx(hHandle, dllPathAddr, 0, MEM_RELEASE);
CloseHandle(hHandle);
qDebug()<< "done";
return a.exec();
}
And why negative votes?
When I post full code. People say only post the segment of code which is not working.
And I explained the situation to its fullest. Because of these negative votes now I won't be able to ask questions on stackoverflow. Thank you.

Your problem is you are trying to use a statically defined character array as a buffer for GetFullPathNameA!
See here:
char s[]="E:\\Users\\Gen\\qt project freiza\\build-libtester-FreizaKit-Release\\release\\worldisnotenough.dll";
std::cout<<strlen(s)<<"\n";
int ax =GetFullPathNameA("worldisnotenough.dll",
86, //1000 is no good, MAX_PATH is 260
s, //Using 's' as a buffer? Don't do that please!
NULL);
Furthermore when using the ANSI version which you are as denoted by the 'A' a maximum path length of 260 characters is the maximum. MAX_PATH==260
"In the ANSI version of this function, the name is limited to MAX_PATH characters. To extend this limit to 32,767 wide characters, call the Unicode version of the function and prepend "\?\" "
Fixed code: (However I don't use QT so that is missing from here, shouldn't matter though as it wasn't used for anything needed for the injecting to work)
#include <windows.h>
#include <iostream>
#include <tlhelp32.h>
HANDLE GetProcessHandle(wchar_t *ProcessName,ULONG *ReturnedProcessId);
int main(int argc, char *argv[])
{
ULONG procID;
HANDLE hHandle=GetProcessHandle(L"ExeToInjectInto.exe",&procID);
/*HANDLE hHandle=OpenProcess(PROCESS_CREATE_THREAD|PROCESS_QUERY_INFORMATION|PROCESS_VM_OPERATION|
PROCESS_VM_WRITE|PROCESS_VM_READ,FALSE,procID);*/
std::cout<<"handle: "<<hHandle<<" process ID: "<<procID<<"\n";
char s[]="C:\\Users\\DBVM_OS\\CodeBlocksProjects\\HelpFreizaProject\\bin\\Debug\\mytestdll.dll";
std::cout<<s<<"\n"<<strlen(s)<<"\n";
//First Problem:
/*In the ANSI version of this function, the name is limited to MAX_PATH characters.
To extend this limit to 32,767 wide characters, call the Unicode version of the function and prepend "\\?\"
*/
//Second Problem:
/* Don't use a defined static char[] as a buffer! allocate some memory or use the stack */
//char s2[MAX_PATH];
//int ax=GetFullPathNameA("mytestdll.dll",MAX_PATH,s2,0);
char *s2=new char[MAX_PATH];
if(s2==0) return 0;
int ax=GetFullPathNameA("mytestdll.dll",MAX_PATH,s2,0);
std::cout<<s2<<"\nsize returned: "<<ax<<" strlen: "<<strlen(s2)<<"\n";
LPVOID dllPathAddr=VirtualAllocEx(hHandle,0,(strlen(s2)+1),MEM_COMMIT,PAGE_EXECUTE_READWRITE);
std::cout<<"Remotely Allocated String Address: \n";
std::cout<<(int*)dllPathAddr<<"\n";
if(dllPathAddr==0)
{
OutputDebugStringA("VirtualAllocEx failed...");
return 0;
}
SIZE_T x;
BOOL n=WriteProcessMemory(hHandle,dllPathAddr,s2,(strlen(s2)+1),&x);
if(n==FALSE)
{
OutputDebugStringA("write failed");
VirtualFreeEx(hHandle,dllPathAddr,0,MEM_RELEASE);
CloseHandle(hHandle);
return 0;
}
std::cout<<"WriteProcessMemory Success: "<<n<<", Bytes Written: "<<x<<"\n";
LPVOID addr=(LPVOID)GetProcAddress(GetModuleHandleW(L"kernel32.dll"), "LoadLibraryA");
if(addr==0)
{
OutputDebugStringA("get proc failed");
VirtualFreeEx(hHandle,dllPathAddr,0,MEM_RELEASE);
CloseHandle(hHandle);
return 0;
}
std::cout<<"LoadLibraryA: "<<addr<<"\n";
HANDLE rThread=CreateRemoteThread(hHandle,0,0,(LPTHREAD_START_ROUTINE)addr,dllPathAddr,0,0);
if(rThread==0)
{
OutputDebugStringA("create remote failed");
VirtualFreeEx(hHandle,dllPathAddr,0,MEM_RELEASE);
CloseHandle(hHandle);
return 0;
}
WaitForSingleObject(rThread,INFINITE);
std::cout<<"DLL Should have been injected successfully at this point...\nFreeing remote string";
BOOL freed=VirtualFreeEx(hHandle,dllPathAddr,0,MEM_RELEASE);
if(freed==0) OutputDebugStringA("Freeing Remote String Failed...");
delete[] s2; //if you dynamically allocated s2 like I've done...
CloseHandle(hHandle);
return 0;
}
HANDLE GetProcessHandle(wchar_t *ProcessName,ULONG *ReturnedProcessId)
{
PROCESSENTRY32W pe;
HANDLE Snap;
ZeroMemory(&pe, sizeof(PROCESSENTRY32W));
pe.dwSize=sizeof(PROCESSENTRY32W);
Snap=CreateToolhelp32Snapshot(TH32CS_SNAPPROCESS,0);
if(Snap==INVALID_HANDLE_VALUE) return 0;
BOOL bProcess=Process32FirstW(Snap,&pe);
while(bProcess)
{
if(_wcsicmp(pe.szExeFile,ProcessName)==0)
{
HANDLE ProcessHandle=OpenProcess(PROCESS_CREATE_THREAD|PROCESS_QUERY_INFORMATION|PROCESS_VM_OPERATION|
PROCESS_VM_WRITE|PROCESS_VM_READ,FALSE,pe.th32ProcessID);
if(ReturnedProcessId!=0)
*ReturnedProcessId=pe.th32ProcessID;
CloseHandle(Snap);
return ProcessHandle;
}
bProcess=Process32NextW(Snap, &pe);
}
if(ReturnedProcessId!=0) *ReturnedProcessId=0;
CloseHandle(Snap);
return 0;
}

you need to use
strlen(s)+1
cause it returnes the lenght of the string without including the terminating null character itself! So VirtualAllocEx and WriteProcessMemory will not write the '\0' char and the filename will terminate at a "random" position in memory.
Also
char s[]="E:\\Users\\Gen\\qt project freiza\\build-libtester-FreizaKit-Release\\release\\worldisnotenough.dll"; //- Length: 93+1
int ax =GetFullPathNameA("worldisnotenough.dll",
sizeof(s), //<-- old: 86 but s[] is 93 + 1 if this has to hold the total path may it was to small?
s, //Output to save the full DLL path
NULL);
looks wong?!

I want to copy the data in (wchar_t *)buffer but i am unable to do so bcz there are other incompatible types,typecasting but not getting the result?

I want to print buffer data at one instance avoiding all other wprintf instances but unable to convert data in compatible type with buffer.
Have a look at code:
Kindly tell me how to get through it:
DWORD PrintEvent(EVT_HANDLE hEvent)
{
DWORD status = ERROR_SUCCESS;
PEVT_VARIANT pRenderedValues = NULL;
WCHAR wsGuid[50];
LPWSTR pwsSid = NULL;
//
// Beginning of functional Logic
//
for (;;)
{
if (!EvtRender(hContext, hEvent, EvtRenderEventValues, dwBufferSize, pRenderedValues, &dwBufferUsed, &dwPropertyCount))
{
if (ERROR_INSUFFICIENT_BUFFER == (status = GetLastError()))
{
dwBufferSize = dwBufferUsed;
dwBytesToWrite = dwBufferSize;
pRenderedValues = (PEVT_VARIANT)malloc(dwBufferSize);
if (pRenderedValues)
{
EvtRender(hContext, hEvent, EvtRenderEventValues, dwBufferSize, pRenderedValues, &dwBufferUsed, &dwPropertyCount);
}
else
{
printf("malloc failed\n");
status = ERROR_OUTOFMEMORY;
break;
}
}
}
Buffer = (wchar_t*) malloc (1*wcslen(pRenderedValues[EvtSystemProviderName].StringVal));
//
// Print the values from the System section of the element.
wcscpy(Buffer,pRenderedValues[EvtSystemProviderName].StringVal);
int i = wcslen(Buffer);
if (NULL != pRenderedValues[EvtSystemProviderGuid].GuidVal)
{
StringFromGUID2(*(pRenderedValues[EvtSystemProviderGuid].GuidVal), wsGuid, sizeof(wsGuid)/sizeof(WCHAR));
wcscpy(Buffer+i,(wchar_t*)pRenderedValues[EvtSystemProviderGuid].GuidVal);
wprintf(L"Provider Guid: %s\n", wsGuid);
}
//Getting "??????" on screen after inclusion of guidval tell me the correct way to copy it??
wprintf(L"Buffer = %ls",Buffer);
//Also tell the way to copy unsigned values into buffer
wprintf(L"EventID: %lu\n", EventID);
wprintf(L"Version: %u\n", pRenderedValues[EvtSystemVersion].ByteVal);
wprintf(L"Level: %u\n", pRenderedValues[EvtSystemLevel].ByteVal);
wprintf(L"EventRecordID: %I64u\n", pRenderedValues[EvtSystemEventRecordId].UInt64Val);
if (EvtVarTypeNull != pRenderedValues[EvtSystemActivityID].Type)
{
StringFromGUID2(*(pRenderedValues[EvtSystemActivityID].GuidVal), wsGuid, sizeof(wsGuid)/sizeof(WCHAR));
wprintf(L"Correlation ActivityID: %s\n", wsGuid);
}
if (EvtVarTypeNull != pRenderedValues[EvtSystemRelatedActivityID].Type)
{
StringFromGUID2(*(pRenderedValues[EvtSystemRelatedActivityID].GuidVal), wsGuid, sizeof(wsGuid)/sizeof(WCHAR));
wprintf(L"Correlation RelatedActivityID: %s\n", wsGuid);
}
wprintf(L"Execution ProcessID: %lu\n", pRenderedValues[EvtSystemProcessID].UInt32Val);
wprintf(L"Execution ThreadID: %lu\n", pRenderedValues[EvtSystemThreadID].UInt32Val);
wprintf(L"Channel: %s\n",pRenderedValues[EvtSystemChannel].StringVal);
wprintf(L"Computer: %s\n", pRenderedValues[EvtSystemComputer].StringVal);
//
// Final Break Point
//
break;
}
}

The first error is when starting to write to the buffer:
Buffer = (wchar_t*) malloc (1*wcslen(pRenderedValues[EvtSystemProviderName].StringVal));
wcscpy(Buffer,pRenderedValues[EvtSystemProviderName].StringVal);
StringVal points to a wide character string with a trailing null byte, so you should
Buffer = malloc (sizeof(wchar_t)*(wcslen(pRenderedValues[EvtSystemProviderName].StringVal)+1));
or even better
Buffer = wcsdup(pRenderedValues[EvtSystemProviderName].StringVal);
Second error is when appending the GUID.
You are not allocating enough memory, you are just appending to the already full Buffer. And you are appending the raw GUID, not the GUID string. You should replace
int i = wcslen(Buffer);
wcscpy(Buffer+i,(wchar_t*)pRenderedValues[EvtSystemProviderGuid].GuidVal);
with something like
// Attention: memory leak if realloc returns NULL! So better use a second variable for the return code and check that before assigning to Buffer.
Buffer = realloc(Buffer, wcslen(Buffer) + wcslen(wsGuid) + 1);
wcscat(Buffer,wsGuid);
Also:
Besides, you should do better error checking for EvtRender. And you should check dwPropertyCount before accessing pRenderedValues[i].
BTW, wprintf(L"Buffer = %s",Buffer); (with %s instead of %ls) is sufficient with wprintf.
And to your last question: if you want to append unsigned values to a buffer you can use wsprintf to write to a string. If you can do it C++-only then you should consider using std::wstring. This is much easier for you with regard to allocating the buffers the right size.

How to convert C-string (with cyrillic characters) to UTF-8-encoded string? [duplicate]

I was wondering if there is a recommended 'cross' Windows and Linux method for the purpose of converting strings from UTF-16LE to UTF-8? or one should use different methods for each environment?
I've managed to google few references to 'iconv' , but for somreason I can't find samples of basic conversions, such as - converting a wchar_t UTF-16 to UTF-8.
Anybody can recommend a method that would be 'cross', and if you know of references or a guide with samples, would very appreciate it.
Thanks, Doori Bar

Change encoding to UTF-8 with PowerShell:
Get-Content PATH\temp.txt -Encoding Unicode | Set-Content -Encoding UTF8 PATH2\temp.txt

The open source ICU library is very commonly used.

If you don't want to use ICU,
Windows: WideCharToMultiByte
Linux: iconv (Glibc)

If you have MSYS2 installed then the iconv package (which is installed by default) lets you use:
iconv -f utf-16le -t utf-8 <input.txt >output.txt

#include <iconv.h>
wchar_t *src = ...; // or char16_t* on non-Windows platforms
int srclen = ...;
char *dst = ...;
int dstlen = ...;
iconv_t conv = iconv_open("UTF-8", "UTF-16");
iconv(conv, (char*)&src, &srclen, &dst, &dstlen);
iconv_close(conv);

I have run into this problem too, I solve it by using boost locale library
try
{
std::string utf8 = boost::locale::conv::utf_to_utf<char, short>(
(short*)wcontent.c_str(),
(short*)(wcontent.c_str() + wcontent.length()));
content = boost::locale::conv::from_utf(utf8, "ISO-8859-1");
}
catch (boost::locale::conv::conversion_error e)
{
std::cout << "Fail to convert from UTF-8 to " << toEncoding << "!" << std::endl;
break;
}
The boost::locale::conv::utf_to_utf function try to convert from a buffer that encoded by UTF-16LE to UTF-8,
The boost::locale::conv::from_utf function try to convert from a buffer that encoded by UTF-8 to ANSI, make sure the encoding is right(Here I use encoding for Latin-1, ISO-8859-1).
Another reminder is, in Linux std::wstring is 4 bytes long, but in Windows std::wstring is 2 bytes long, so you would better not use std::wstring to contain UTF-16LE buffer.

There's also utfcpp, which is a header-only library.

Another portable C possibility to convert string between UTF-8, UTF-16, UTF-32, wchar - is mdz_unicode library.

Thanks guys, this is how I managed to solve the 'cross' windows and linux requirement:
Downloaded and installed: MinGW , and MSYS
Downloaded the libiconv source package
Compiled libiconv via MSYS.
That's about it.

You can also roll your own, which has several benefits:
Not subject to the somewhat restrictive license of iconv
No restriction on statically linking to avoid ICU version hell or other dynamic-link headaches
Avoid the need to link a very large library (such as icu or boost) (which, if statically linked, can add tens of MB of size to what might otherwise be a very small binary)
Note: the below assumes you have installed utf8proc, which is very compact. however, if you prefer, you can simply use its header file and copy the one utf8proc_encode_char() function that this code uses.
utf16le_to_utf8.h:
#ifndef UTF16LE_TO_UTF8_H
#define UTF16LE_TO_UTF8_H
enum utf816le_status {
utf816le_status_ok = 0,
utf816le_status_malformed_utf6le_input,
utf816le_status_memory,
utf816le_status_unencodable,
utf816le_status_buffsize
};
/*
* #return converted string, or NULL on error
* #param str input string
* #param len length (in bytes) of input string
* #param buff optional user-provided output buffer. if not provided, the returned
* converted string must be freed by caller using free()
* #param buffsize length of user-provided buffer, or 0 if no buffer provider
* #param out_len pointer to length of converted output, in bytes
* #param status pointer to status, set to non-zero in case of error
*/
unsigned char *utf16le_to_utf8(const unsigned char *str, size_t len,
unsigned char *buff, size_t buffsize,
size_t *out_len,
enum utf816le_status *status);
#endif
utf16le_to_utf8.c:
#include <stdlib.h>
#include <string.h>
#include <utf8proc.h>
#include "utf16le_to_utf8.h"
#if defined( __FreeBSD__ ) || defined( __OpenBSD__ ) || defined( __NetBSD__ )
# include <sys/endian.h>
#elif defined( BSD ) && ( BSD >= 199103 ) || defined( __APPLE__ ) || \
defined( __CYGWIN32__ ) || defined( __DJGPP__ ) || defined( __osf__ )
# include <machine/endian.h>
#elif defined( __linux__ ) || defined( __GNUC__ ) || defined( __GNU_LIBRARY__ )
# if !defined( __MINGW32__ ) && !defined( _AIX )
# include <endian.h>
# if !defined( __BEOS__ )
# include <byteswap.h>
# endif
# endif
#endif
static inline uint16_t little_endian_16(uint16_t v) {
#if __BYTE_ORDER == __LITTLE_ENDIAN
return v;
#else
return (v << 8) | (v >> 8);
#endif
}
static utf8proc_int32_t utf16le_next_codepoint(uint16_t *text, unsigned int max_bytes,
unsigned int *bytes_read) {
uint16_t c1 = little_endian_16(text[0]);
if (c1 >= 0xd800 && c1 < 0xdc00) {
if(max_bytes < 4) {
*bytes_read = 0;
return 0;
}
*bytes_read = 4;
uint16_t c2 = text[1];
return ((c1 & 0x3ff) << 10) + (c2 & 0x3ff) + 0x10000;
}
if(max_bytes < 2) {
*bytes_read = 0;
return 0;
}
*bytes_read = 2;
return c1;
}
unsigned char *utf16le_to_utf8(const unsigned char *str, size_t len,
unsigned char *buff, size_t buffsize,
size_t *out_len,
enum utf816le_status *status) {
if(!buffsize)
buff = NULL;
if(!buff)
buffsize = 0;
unsigned char *dst = buff;
size_t sizeof_dst = buffsize;
size_t written = 0;
*status = utf816le_status_ok;
unsigned char_len;
for(size_t i = 0; i < len; i+= char_len) {
utf8proc_int32_t codepoint = utf16le_next_codepoint((uint16_t *)(str + i), len - i, &char_len);
if(!char_len) { // error! bad utf
*status = utf816le_status_malformed_utf6le_input;
break;
}
// we need at least 4 bytes to encode to utf8. add 1 for terminal null and 1 for good measure
if(sizeof_dst < written + 6) {
if(buffsize > 0) { // user-provided buffer is too small
*status = utf816le_status_buffsize;
break;
}
size_t new_size = sizeof_dst == 0 ? 64 : sizeof_dst * 2;
unsigned char *new_dst = realloc(dst, new_size);
if(!new_dst) { // out of memory!
*status = utf816le_status_memory;
break;
}
dst = new_dst;
sizeof_dst = new_size;
}
utf8proc_ssize_t want = utf8proc_encode_char(codepoint, dst + written);
if(!want) { // error
*status = utf816le_status_unencodable;
break;
}
written += want;
}
if(*status == utf816le_status_ok) {
*out_len = written;
dst[written] = '\0';
return dst;
}
*out_len = 0;
if(dst != buff)
free(dst);
return NULL;
}
which you can use like so:
...
unsigned char *converted = utf16le_to_utf8(utf16buff, utf16byte_count, NULL, 0, &output_len, &status);
if(!converted || !output_len)
fprintf(stderr, "Error! %i\n", status);
else
fprintf(stdout, "Converted to utf8 with length %zu: %s\n", output_len, converted);
free(converted);
}
}

We Keep Coding

c++ django amazon-web-services regex python-2.7 google-cloud-platform list unit-testing opengl ember.js

Convert UTF-16 to UTF-8 - c++

You can also use the ATL String Conversion Macros - to convert from UTF-16 to UTF-8 use CW2A and pass CP_UTF8 as the code page, e.g.: CW2A utf8(buffer, CP_UTF8); const char* data = utf8.m_psz;

Related

How to translate a virtual-key code to char (depending on locale)?

Open utf8 encoded filename in c++ Windows

Dll injection with GetFullPathNameA not working?

I want to copy the data in (wchar_t *)buffer but i am unable to do so bcz there are other incompatible types,typecasting but not getting the result?

How to convert C-string (with cyrillic characters) to UTF-8-encoded string? [duplicate]

Categories

Resources