freopen and fwprintf writes incorrect wide characters into TXT file - c++

I've already spent the whole day searching for an answer about UTF-8 and UTF-16 options when freopen and fwprintf used and no results for now. I will add my code below, maybe someone can help. Thanks in advance.
template<typename... ArgsT>
void log(const wchar_t* message, ArgsT... args)
{
fwprintf(stdout, message, args...);
fwprintf(stdout, L"\n");
fflush(stdout);
}
int main()
{
bool init = true;
if (!std::freopen("log.txt", "w", stdout))
{
init = false;
}
if (std::fwide(stdout, 1) <= 0)
{
init = false;
}
if (init)
{
std::wstring str = L"кирилиця";
log(L"Some text in cyrillic %S and some number %i", str.c_str(), 10);
}
return 0;
}
As the result in TXT file I have: Some text in cyrillic :8#8;8FO and some number 10

You need to start your file with wchar_t(0xFEFF).
It tells text editor apps to treat following data as unicode.

Related

std::wcout, why is the printed character not the same as the input? [duplicate]

I tried to printf with some accented characters such as á é í ó ú:
printf("my name is Seán\n");
The text editor in the DEVC++ IDE displays them fine - i.e the source code looks fine.
I guess I need some library other than stdio.h and maybe some variant of the normal printf.
I'm using IDE Bloodshed DEVC running on Windows XP.
Perhaps the best is to use Unicode.
Here's how...
First, manually set your console font to "Consolas" or "Lucida Console" or whichever True-Type Unicode font you can choose ("Raster fonts" may not work, those aren't Unicode fonts, although they may include characters you're interested in).
Next, set the console code page to 65001 (UTF-8) with SetConsoleOutputCP(CP_UTF8).
Then convert your text to UTF-8 (if it's not yet in UTF-8) using WideCharToMultiByte(CP_UTF8, ...).
Finally, call WriteConsoleA() to output the UTF-8 text.
Here's a little function that does all these things for you, it's an "improved" variant of wprintf():
int _wprintf(const wchar_t* format, ...)
{
int r;
static int utf8ModeSet = 0;
static wchar_t* bufWchar = NULL;
static size_t bufWcharCount = 256;
static char* bufMchar = NULL;
static size_t bufMcharCount = 256;
va_list vl;
int mcharCount = 0;
if (utf8ModeSet == 0)
{
if (!SetConsoleOutputCP(CP_UTF8))
{
DWORD err = GetLastError();
fprintf(stderr, "SetConsoleOutputCP(CP_UTF8) failed with error 0x%X\n", err);
utf8ModeSet = -1;
}
else
{
utf8ModeSet = 1;
}
}
if (utf8ModeSet != 1)
{
va_start(vl, format);
r = vwprintf(format, vl);
va_end(vl);
return r;
}
if (bufWchar == NULL)
{
if ((bufWchar = malloc(bufWcharCount * sizeof(wchar_t))) == NULL)
{
return -1;
}
}
for (;;)
{
va_start(vl, format);
r = vswprintf(bufWchar, bufWcharCount, format, vl);
va_end(vl);
if (r < 0)
{
break;
}
if (r + 2 <= bufWcharCount)
{
break;
}
free(bufWchar);
if ((bufWchar = malloc(bufWcharCount * sizeof(wchar_t) * 2)) == NULL)
{
return -1;
}
bufWcharCount *= 2;
}
if (r > 0)
{
if (bufMchar == NULL)
{
if ((bufMchar = malloc(bufMcharCount)) == NULL)
{
return -1;
}
}
for (;;)
{
mcharCount = WideCharToMultiByte(CP_UTF8,
0,
bufWchar,
-1,
bufMchar,
bufMcharCount,
NULL,
NULL);
if (mcharCount > 0)
{
break;
}
if (GetLastError() != ERROR_INSUFFICIENT_BUFFER)
{
return -1;
}
free(bufMchar);
if ((bufMchar = malloc(bufMcharCount * 2)) == NULL)
{
return -1;
}
bufMcharCount *= 2;
}
}
if (mcharCount > 1)
{
DWORD numberOfCharsWritten, consoleMode;
if (GetConsoleMode(GetStdHandle(STD_OUTPUT_HANDLE), &consoleMode))
{
fflush(stdout);
if (!WriteConsoleA(GetStdHandle(STD_OUTPUT_HANDLE),
bufMchar,
mcharCount - 1,
&numberOfCharsWritten,
NULL))
{
return -1;
}
}
else
{
if (fputs(bufMchar, stdout) == EOF)
{
return -1;
}
}
}
return r;
}
Following tests this function:
_wprintf(L"\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7"
L"\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF"
L"\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7"
L"\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF"
L"\n"
L"\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7"
L"\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF"
L"\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7"
L"\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF"
L"\n"
L"\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7"
L"\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF"
L"\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7"
L"\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF"
L"\n");
_wprintf(L"\x391\x392\x393\x394\x395\x396\x397"
L"\x398\x399\x39A\x39B\x39C\x39D\x39E\x39F"
L"\x3A0\x3A1\x3A2\x3A3\x3A4\x3A5\x3A6\x3A7"
L"\x3A8\x3A9\x3AA\x3AB\x3AC\x3AD\x3AE\x3AF\x3B0"
L"\n"
L"\x3B1\x3B2\x3B3\x3B4\x3B5\x3B6\x3B7"
L"\x3B8\x3B9\x3BA\x3BB\x3BC\x3BD\x3BE\x3BF"
L"\x3C0\x3C1\x3C2\x3C3\x3C4\x3C5\x3C6\x3C7"
L"\x3C8\x3C9\x3CA\x3CB\x3CC\x3CD\x3CE"
L"\n");
_wprintf(L"\x410\x411\x412\x413\x414\x415\x401\x416\x417"
L"\x418\x419\x41A\x41B\x41C\x41D\x41E\x41F"
L"\x420\x421\x422\x423\x424\x425\x426\x427"
L"\x428\x429\x42A\x42B\x42C\x42D\x42E\x42F"
L"\n"
L"\x430\x431\x432\x433\x434\x435\x451\x436\x437"
L"\x438\x439\x43A\x43B\x43C\x43D\x43E\x43F"
L"\x440\x441\x442\x443\x444\x445\x446\x447"
L"\x448\x449\x44A\x44B\x44C\x44D\x44E\x44F"
L"\n");
And should result in the following text in the console:
 ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿
ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞß
àáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ
ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡ΢ΣΤΥΦΧΨΩΪΫάέήίΰ
αβγδεζηθικλμνξοπρςστυφχψωϊϋόύώ
АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ
абвгдеёжзийклмнопрстуфхцчшщъыьэюя
I do not know the encoding in which your IDE stores non-ASCII characters in .c/.cpp files and I do not know what your compiler does when encounters non-ASCII characters. This part you should figure out yourself.
As long as you supply to _wprintf() properly encoded UTF-16 text or call WriteConsoleA() with properly encoded UTF-8 text, things should work.
P.S. Some gory details about console fonts can be found here.
Windows console is generally considered badly broken regarding to character encodings. You can read about this problem here, for example.
The problem is that Windows generally uses the ANSI codepage (which is assuming you are in Western Europe or America Windows-1252), but the console uses the OEM codepage (CP850 under the same assumption).
You have several options:
Convert the text to CP850 before writing it (see CharToOem()). The drawback is that if the user redirects the output to a file (> file.txt) and opens the file with e.g. Notepad, he will see it wrong.
Change the codepage of the console: You need to select a TTF console font (Lucida Console, for example) and use the command chcp 1252.
Use UNICODE text and wprintf(): You need the TTF console font anyway.
The Windows-1252 (also known as "ANSI") character set used by Windows console mode is not the same as that used by GUI applications. Hence the IDE representation differs from the runtime representation.
A quick-and-dirty solution for your example is:
printf("my name is Se\xe9n\n");
Most solutions to this problem are flawed one way or another and the simplest solution for Windows applications that need extensive multi-language localisation is to write them as GUI apps using Unicode.

Passing const char* into constructor gives null

I'm trying to make a simple logger to log to a file to give me debug information about my program. I want to avoid using a library so I'm making one myself.
logging.cpp
#include <string.h> // String stuff
#include <time.h> // Time
#include "logging.hpp"
// Cathooks main logging util
CatLogger g_CatLogging("/tmp/nekohook.log");
//CatLogger g_CatLogging;
CatLogger::CatLogger(const char* _file_path, bool _ptime) : ptime(_ptime) {
file_path = _file_path;
}
CatLogger::~CatLogger() { fclose(log_handle); }
void CatLogger::log(const char* fmt, ...) {
// Basicly an init, because this cant be done on construct
if (log_handle == nullptr) {
log_handle = fopen(file_path, "w");
}
// Print our time if needed
if (ptime) {
// Get our time
time_t current_time = time(0);
struct tm* time_info = localtime(&current_time);
// print it to a string
char timeString[10];
strftime(timeString, sizeof(timeString), "%H:%M:%S", time_info);
// Print the time into the log
fprintf(log_handle, "%% [%s] ", timeString);
}
// Get the string we want to log
char buffer[1024];
va_list list;
va_start(list, fmt);
vsprintf(buffer, fmt, list);
va_end(list);
// Write our log to the file
fprintf(log_handle, "%s\n", file_path, buffer);
fflush(log_handle);
// Push result var to a console here, if i ever make a console api
}
logging.hpp
#include <stdarg.h> // ... arg
#include <stdio.h> // fopen(), fprint(), fputs()
class CatLogger {
public:
CatLogger(const char* _file_path, bool _ptime = false);
~CatLogger();
void log(const char* fmt, ...); // Use to log with
private:
FILE* log_handle = 0; // Handle used to log to files with
const char* file_path; // Path to log file
const bool ptime; // Whether to print time
};
// Use this to log
extern CatLogger g_CatLogging;
When I use the log function, it fails. I have no idea why. I made a dummy function that crashes when ran to get info from gdb of the input. I input the file_path variable into it and it returns 0x0. I'm not sure why this happens, I've made a sample executable separate from the library I'm using this in and it works flawlessly. Could this be due to the way I'm linking libraries or the lack of?
Here is the library I am working on with a link directly to the logging file.
https://github.com/oneechanhax/nekohook/blob/master/src/util/logging.cpp
It crashes on fprintf() on both due to fopen not returning a file handle, which is in turn because const char* isn't being passes for some reason.
Please tell me a way to debug this or point out where this went wrong as I'm at a loss trying for myself.
EDIT:
If i replace the following in CatLogger::log
if (log_handle == nullptr) {
log_handle = fopen(file_path, "w");
}
With the following
if (log_handle == nullptr) {
log_handle = fopen("/tmp/nekohook.log", "w");
}
It now works but i cant change the log location for other log classes now...
EDIT2:
Here is some debug info. Somehow the const char* doesnt get saved into the class. Thats the main issue that i have...
example
Maybe the string becomes null after constructing...
There are a lot of potential bugs.
if (log_handle == nullptr) {
log_handle = fopen(file_path, "w");
if(!log_handle) {
perror("File opening failed"); // check your console output.
return EXIT_FAILURE;
}
}
// Get the string we want to log
char buffer[1024];
va_list list;
va_start(list, fmt);
vsprintf(buffer, fmt, list); // potential segmentation fault
va_end(list);
use this instead
int vsnprintf( char* buffer, std::size_t buf_size, const char* format, va_list vlist ); // (since C++11)
And more it the program is multithreaded.
This was a case of static init order fiasco where the const char* wouldn't get initialized before the function was called.
The solution was to make the file link first compared to other files and the object works now.

Call HPDF_SaveToFile() with japanese filename

Im trying to save one pdf in path that contains japanese username. In this case, HPDF_SaveToFile is doing crash my app on windows. Any options to compile or other thing? Any idea to support Unicode filenames with libhaur? I not want to create pdf with japanese encode, I want to write pdf with japanese filename.
A solution in Qt. If you use C++, you can use fstream/ofstream(::write). If you use C, you can use fwrite.
QFile file(path);
if (file.open(QIODevice::WriteOnly))
{
HPDF_SaveToStream(m_pdf);
/* get the data from the stream and write it to file. */
for (;;)
{
HPDF_BYTE buf[4096];
HPDF_UINT32 siz = 4096;
HPDF_STATUS ret = HPDF_ReadFromStream(m_pdf, buf, &siz);
if (siz == 0)
{
break;
}
if (-1 == file.write(reinterpret_cast<const char *>(buf), siz))
{
qDebug() << "Write PDF error";
break;
}
}
}
HPDF_Free(m_pdf);
Refrence: Libharu Usage examples

How can I check a log for a string or a digit?

I'm doing some very basic coding in Visual Studio.
I am attempting to check the output of a command for a group of period separated digits or a string. It works fine when checking for a string but it doesn't seem to work if I ask it to look for a group of period separated digits such as 4.2.3
The code I am using is as follows
BOOL CheckLogForString(char* str)
{
FILE* f = fopen (LogName, "r");
if (NULL == f){ MessageBox(0, "Can't find log!","error",MB_APPLMODAL|MB_OK|MB_ICONSTOP);return FALSE;}
BOOL found = false;
while (!feof(f))
{
char buf[1000]="";
if (fgets(buf,1000,f)==NULL) break;
strupr(buf);
if (0 != strstr (buf, str))found = TRUE;
}
fclose (f);
return found;
}
I then call this as follows
if (CheckLogForString("WORDS"))
{
DO SOMETHING
}
That works fine but when I try
if (CheckLogForString("4.2.3"))
{
DO SOMETHING
}
It doesn't work. What can I do to make it work please?

receiving webpages using windows sockets(C ++),but got some unexpected words

I am trying to get a webpage with sockets,using http GET.I do get the page,but there is something little wrong.
Sometimes I got it all right,but sometimes I got it with wrong characters like:
**<td class="c_ba2636">09</t
1ff8
d>**
it should be :
<td class="c_ba2636">09</td>
I donot know why there is a "1ff8" and some "\r\n".
It happens here and there from time to time.And sometimes it occurs like:
06
again it should be :
<td class="c_ba2636">06</td>
this is how I receive and save the page from a socket:
ofstream out("webpage.html");
char text[2050]="";
int recvbytes=0;
string content;
while ( (recvbytes = recv(sock, text, 2048, 0)) > 0)
{
content=string(text,recvbytes);
out << content.c_str();
//System::Console::Write(gcnew String(content.c_str()));
}
closesocket(sock);
out.close();
I tried :out << text; it did not work.
Please does anyone know what's wrong with my codes.
I am using VS2010,and this is a winform program.
It may be normal if your input text is UTF8 encoded and contains characters out of ASCII space
Now I got it done.it turns out that those "1ff8" "2000" or whatever are from some http protocols,to indicates something(length?).I just need to delete those lines and rearrange the lines that are interrupted by them.So I add a function:
private: void rearrangment()
{
ifstream ifile("webpage.html");
ofstream ofile("web.html");
char line1[2048]="";
char line2[2048]="";
char line3[2048]="";
ifile.getline(line1,2047);
//ifile.getline(line2,2047);
while(!ifile.eof())
{
ifile.getline(line2,2047);
if(string(line2,0,3)!=" "
&& line2[0]!='<' && line2[1]!='<' && line2[2]!='<')//they are "1ff8"s
{
ifile.getline(line3,2047);
for(int i=0;i<2046;++i)
{
if(line1[i]==13)
{
line1[i]=0;
break;
}
}
strcat(line1,line3);
}
else
{
ofile<<line1<<endl;
strcpy(line1,line2);
//ofile<<line1;
}
//ofile<<line1;
}
ifile.close();
ofile.close();
}
and now it works well.
Sorry about this stupid question,I should have searched before I asked.