Can't read file with cyrillic path in C++ - c++

I'm trying to read file, which contains Cyrillic characters in their path, and got ifstream.is_open() == false
This is my code:
std::string ReadFile(const std::string &path) {
std::string newLine, fileContent;
std::ifstream in(path.c_str(), std::ios::in);
if (!in.is_open()) {
return std::string("isn't opened");
}
while (in.good()) {
getline(in, newLine);
fileContent += newLine;
}
in.close();
return fileContent;
}
int main() {
std::string path = "C:\\test\\документ.txt";
std::string content = ReadFile(path);
std::cout << content << std::endl;
return 0;
}
Specified file exists
I'm trying to find solution in google, but I got nothing
Here is links, which I saw:
I don't need wstring
The same as previous
no answer here
is not about C++
has no answer too
P.S. I need to get file's content in string, not in wstring
THIS IS ENCODING SETTINGS OF MY IDE (CLION 2017.1)

You'll need an up-to-date compiler or Boost. std::filesystem::path can handle these names, but it's new in the C++17 standard. Your compiler may still have it as std::experimental::filesystem::path, or else you'd use the third-party boost::filesystem::path. The interfaces are pretty comparable as the Boost version served as the inspiration.

The definition for std::string is std::basic_string, so your Cyrillic chararecters are not stored as intended. Atleast, try to use std::wstring to store your file path and then you can read from file using std::string.

First of all, set your project settings to use UTF-8 encoding instead of windows-1251. Until standard library gets really good (not any time soon) you basically can not rely on it if you want to deal with io properly. To make input stream read from files on Windows you need to write your own custom input stream buffer that opens files using 2-byte wide chars or rely on some third-party implementations of such routines. Here is some incomplete (but sufficient for your example) implementation:
// assuming that usual Windows SDK macros such as _UNICODE, WIN32_LEAN_AND_MEAN are defined above
#include <Windows.h>
#include <string>
#include <iostream>
#include <system_error>
#include <memory>
#include <utility>
#include <cstdlib>
#include <cstdio>
static_assert(2 == sizeof(wchar_t), "wchar_t size must be 2 bytes");
using namespace ::std;
class MyStreamBuf final: public streambuf
{
#pragma region Fields
private: ::HANDLE const m_file_handle;
private: char m_buffer; // typically buffer should be much bigger
#pragma endregion
public: explicit
MyStreamBuf(wchar_t const * psz_file_path)
: m_file_handle(::CreateFileW(psz_file_path, FILE_GENERIC_READ, FILE_SHARE_READ, nullptr, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL))
, m_buffer{}
{
if(INVALID_HANDLE_VALUE == m_file_handle)
{
auto const error_code{::GetLastError()};
throw(system_error(static_cast< int >(error_code), system_category(), "::CreateFileW call failed"));
}
}
public:
~MyStreamBuf(void)
{
auto const closed{::CloseHandle(m_file_handle)};
if(FALSE == closed)
{
auto const error_code{::GetLastError()};
//throw(::std::system_error(static_cast< int >(error_code), system_category(), "::CloseHandle call failed"));
// throwing in destructor is kinda wrong
// but if CloseHandle returned false then our program is in inconsistent state
// and must be terminated anyway
(void) error_code; // not used
abort();
}
}
private: auto
underflow(void) -> int_type override
{
::DWORD bytes_count_to_read{1};
::DWORD read_bytes_count{};
{
auto const succeeded{::ReadFile(m_file_handle, addressof(m_buffer), bytes_count_to_read, addressof(read_bytes_count), nullptr)};
if(FALSE == succeeded)
{
auto const error_code{::GetLastError()};
setg(nullptr, nullptr, nullptr);
throw(system_error(static_cast< int >(error_code), system_category(), "::ReadFile call failed"));
}
}
if(0 == read_bytes_count)
{
setg(nullptr, nullptr, nullptr);
return(EOF);
}
setg(addressof(m_buffer), addressof(m_buffer), addressof(m_buffer) + 1);
return(m_buffer);
}
};
string
MyReadFile(wchar_t const * psz_file_path)
{
istream in(new MyStreamBuf(psz_file_path)); // note that we create normal stream
string new_line;
string file_content;
while(in.good())
{
getline(in, new_line);
file_content += new_line;
}
return(::std::move(file_content));
}
int
main(void)
{
string content = MyReadFile(L"C:\\test\\документ.txt"); // note that path is a wide string
cout << content << endl;
return 0;
}

Change your code to use wstring and save your file using Unicode encoding (non UTF8 one, use USC-2, UTF16 or something like that). MSVC has non-standard overload specifically for this reason to be able to handle non-ascii chars in filenames:
std::string ReadFile(const std::wstring &path)
{
std::string newLine, fileContent;
std::ifstream in(path.c_str(), std::ios::in);
if (!in)
return std::string("isn't opened");
while (getline(in, newLine))
fileContent += newLine;
return fileContent;
}
int main()
{
std::wstring path = L"C:\\test\\документ.txt";
std::string content = ReadFile(path);
std::cout << content << std::endl;
}
Also, note corrected ReadFile code.

Related

File.exe has Triggered a Breakpoint because of Fseek

I'm trying to determine how big a file i'm reading is in bytes so I used Fseek to jump to the end and it triggered the error: file.exe has triggered a breakpoint.
Heses the code:
FileUtils.cpp:
#include "FileUtils.h"
namespace impact {
std::string read_file(const char* filepath)
{
FILE* file = fopen(filepath, "rt");
fseek(file, 0, SEEK_END);
unsigned long length = ftell(file);
char* data = new char[length + 1];
memset(data, 0, length + 1);
fseek(file, 0 ,SEEK_SET);
fread(data, 1, length, file);
fclose(file);
std::string result(data);
delete[] data;
return result;
}
}
FileUtils.h:
#pragma once
#include <stdio.h>
#include <string>
#include <fstream>
namespace impact {
std::string read_file(const char* filepath);
}
If more info is required just ask me for it I would be more than happy to provide more!
You are doing this in the C way, C++ has much better (in my opinion) ways of handling files.
Your error looks like it may be caused because the file didn't open correctly (you need to check if file != nullptr).
To do this in C++17 you should use the standard library filesystem
(Note: You can also do this with C++11 experimental/filesystem using std::experimental::filesystem namespace)
Example:
std::string read_file(const std::filesystem::path& filepath) {
auto f_size = std::filesystem::file_size(filepath);
...
}
Additionally to read a file in C++ you do not need to know the size of the file. You can use streams:
std::string read_file(const std::filesystem::path& filepath) {
std::ifstream file(filepath); // Open the file
// Throw if failed to open the file
if (!file) throw std::runtime_error("File failed to open");
std::stringstream data; // Create the buffer
data << file.rdbuf(); // Read into the buffer the internal buffer of the file
return data.str(); // Convert the stringstream to string and return it
}
As you can see, the C++ way of doing it is much shorter and much easier to debug (helpful exceptions with descriptions are thrown when something goes wrong)

getline() function error in c++ code

Can someone tell me what am i doing wrong here i am getting an error saying getline() not declared in this scope.........any help would be appreciated.
no matching function for call to getline(char**, size_t*, FILE*&)
#include<iostream>
#include<fstream>
#include<string>
using namespace std;
char *s;
int main(int argc, char *argv[])
{
FILE* fd = fopen("input.txt", "r");
if(fd == NULL)
{
fputs("Unable to open input.txt\n", stderr);
exit(EXIT_FAILURE);
}
size_t length = 0;
ssize_t read;
const char* backup;
while ((read = getline(&s, &length, fd) ) > 0)
{
backup = s;
if (A() && *s == '\n')
{
printf("%sis in the language\n", backup);
}
else
{
fprintf(stderr, "%sis not in the language\n", backup);
}
}
fclose(fd);
return 0;
}
You'll need to use C++ style code in order to use getline in a cross platform way.
#include <fstream>
#include <string>
using namespace std;
std::string s;
bool A() { return true; }
int main(int argc, char *argv[])
{
ifstream myfile("input.txt");
if(!myfile.is_open())
{
fprintf(stderr, "Unable to open input.txt\n");
return 1;
}
size_t length = 0;
size_t read;
std::string backup;
while (getline(myfile, s))
{
backup = s;
if (A() && s == "\n")
{
printf("%s is in the language\n", backup.c_str());
}
else
{
fprintf(stderr, "%s is not in the language\n", backup.c_str());
}
}
return 0;
}
What are you trying to do with getline(&s, &length, fd)? Are you trying to use the C getline?
Assuming you have opened the file correctly, in c++ your getline should look something like this: getline(inputStream, variableToReadInto, optionalDelimiter).
You didn't include <stdio.h> but you did include <fstream>. Maybe use ifstream fd("input.txt");
What's A()
If you ARE trying to use the C getline, the using namespace std may be interfering
Why are you using printf and fprintf and not cout << xxxxxx and fd << xxxxxx
You seem to be a bit confused with various getline function signatures.
The standard C++ std::getline signature is
template< class CharT, class Traits, class Allocator >
std::basic_istream<CharT,Traits>& getline( std::basic_istream<CharT,Traits>& input,
std::basic_string<CharT,Traits,Allocator>& str,
CharT delim );
It takes an input stream object, a string and a character delimiter (there's an overload without the delimiter too).
The posix getline signature is
ssize_t getdelim(char **lineptr, size_t *n, int delim, FILE *stream);
with the delimiter optional again.
now in your code your passing arguments as if calling the posix version without delimiter. If you want to use the standard one you'll have to change the arguments (i.e. istream object instead of FILE*). I don't know if the posix one is even available for you, since posix is different from any C++ standard.
Note that the fputs, FILE*, fprintf are C filehandling functions, not the C++ ones.

Writing C++ program compressing/decompressing data

I have to write C++ program that like gzip can
*Take input from file or from char stream like compression below
gzip file
type file | gzip
*Program have file or char stream output like decompression below
gzip -d file.gz
gzip -dc file.gz
I don't know how to take to the task and what techniques have to use and how to create classes buffering input and output. I have classes buffering input and output and read/write data from/to file.
DataBuffer.h (taking uncompressed data from file):
#ifndef DataBuffer_h
#define DataBuffer_h
#include <fstream>
#include <string>
enum DataBufferState
{
DATABUFFER_OK = 0,
DATABUFFER_EOF = 1
};
class DataBuffer
{
std::fstream file;
std::string buffer;
unsigned int maxBufferSize;
public:
DataBuffer(const std::string& filename, unsigned int maxBuffSize);
~DataBuffer();
bool OpenFile(const std::string& filename);
void SetMaxBufferSize(unsigned int maxBuffSize);
DataBufferState FullBufferWithDataOld();
DataBufferState FullBufferWithData();
std::string GetDataBuffer();
};
#endif
DataBuffer.cpp:
#include "DataBuffer.h"
using namespace std;
DataBuffer::DataBuffer(const string& filename, unsigned int maxBuffSize)
{
OpenFile(filename);
SetMaxBufferSize(maxBuffSize);
}
DataBuffer::~DataBuffer()
{
file.close();
}
bool DataBuffer::OpenFile(const string& filename)
{
file.open(filename.c_str(),ios::in);
if(!file.is_open())
return false;
return true;
}
void DataBuffer::SetMaxBufferSize(unsigned int maxBuffSize)
{
maxBufferSize = maxBuffSize;
}
DataBufferState DataBuffer::FullBufferWithDataOld()
{
while(true)
{
string line;
streampos pos = file.tellg(); // Zapamietaj polozenie przed pobraniem linii
getline(file,line);
if( buffer.size()+line.size()>maxBufferSize )
{
// Cofnac wskaznik pliku
file.seekg(pos,ios::beg); // Przywroc polozenie sprzed pobrania linii
break;
}
buffer += line + "\n";
if(file.eof())
return DATABUFFER_EOF;
}
return DATABUFFER_OK;
}
DataBufferState DataBuffer::FullBufferWithData()
{
char c;
for(unsigned int i=0;i<maxBufferSize;++i)
{
c = file.get();
if(file.eof()) break;
buffer += c;
}
if(file.eof())
return DATABUFFER_EOF;
return DATABUFFER_OK;
}
string DataBuffer::GetDataBuffer()
{
string buf = buffer;
buffer.clear();
return buf;
}
BufferWriter.h (Save uncompressed data into file):
#ifndef BufferWriter_h
#define BufferWriter_h
#include <string>
#include <fstream>
class BufferWriter
{
std::string filename;
std::fstream file;
public:
BufferWriter(const std::string& filename_);
~BufferWriter();
bool OpenFile(const std::string& filename, bool appending);
void SendBufferToFile(std::string& buffer);
};
#endif
BufferWriter.cpp
#include "BufferWriter.h"
using namespace std;
BufferWriter::BufferWriter(const string& filename_)
{
filename = filename_;
OpenFile(filename.c_str(),false);
file.close();
}
BufferWriter::~BufferWriter()
{
file.close();
}
bool BufferWriter::OpenFile(const string& filename, bool appending)
{
if(appending)
file.open(filename.c_str(),ios::out | ios::app);
else
file.open(filename.c_str(),ios::out);
if(!file.is_open())
return false;
return true;
}
void BufferWriter::SendBufferToFile(string& buffer)
{
OpenFile(filename,true);
file.write(buffer.c_str(),buffer.size());
file.close();
}
Can you give me some hints how to improve code for input and output mechanisms?
Assume that I have class presented below, how to use istream or iterators to fill buffer with data from file or standard input. What classes from std or boost? What parameters? Somelike to support definition of class with this functionality.
[EDIT]:
#ifndef StreamBuffer_h
#define StreamBuffer_h
#include <string>
using namespace std;
enum DataBufferState
{
DATABUFFER_OK = 0,
DATABUFFER_EOF = 1
};
// gzip plik
// type plik | gzip -d
// gzip -d plik.gz
// gzip -dc plik.gz
// Parametr konstruktora to strumien z ktorego chcemy czytac i dlugosc bufora
class StreamBuffer
{
int maxBufferSize;
std::string buffer;
StreamBuffer(int maxBuffSize)
{
SetMaxBufferSize(maxBuffSize);
}
~StreamBuffer()
{
}
void SetMaxBufferSize(unsigned int maxBuffSize)
{
maxBufferSize = maxBuffSize;
}
DataBufferState FullBufferWithData()
{
// What to use what to do in this method to read part of file or standard char input to buffer?
}
std::string GetDataBuffer()
{
return buffer;
}
};
#endif
[EDIT2]:
I want to do the same thing as in this thread: Read from file or stdin, but in C++.
In general you read input from a source and write it to a sink. The simplest case is when you simply write what you read. You, however, want to apply a transformation (or filter) to the data that you read. Seeing as you're after "the c++ way," I'd suggest taking a look at boost::iostreams which abstracts the task in terms of sources/sinks.
Boost defines an abstract source by:
struct Source {
typedef char char_type;
typedef source_tag category;
std::streamsize read(char* s, std::streamsize n)
{
// Read up to n characters from the input
// sequence into the buffer s, returning
// the number of characters read, or -1
// to indicate end-of-sequence.
}
};
And sinks are defined in a similar way (with a write instead of a read, of course). The benefit of this is that the details of the source/sink is irrelevant - you can read/write to file, to a network adapter, or whatever, without any structural changes.
To apply filters I'd again suggest looking at boost::iostreams, although they do abstract a lot which somewhat complicates implementation..

Reading popen results in C++

I am writing a C++ application and I need to read the result of a system command.
I am using popen() more or less as shown here:
const int MAX_BUFFER = 2048;
string cmd="ls -l";
char buffer[MAX_BUFFER];
FILE *stream = popen(cmd.c_str(), "r");
if (stream){
while (!feof(stream))
{
if (fgets(buffer, MAX_BUFFER, stream) != NULL)
{
//here is all my code
}
}
pclose(stream);
}
I've been trying to re-write this in a different way. I saw some non-standard solutions like:
FILE *myfile;
std::fstream fileStream(myfile);
std::string mystring;
while(std::getline(myfile,mystring))
{
// .... Here I do what I need
}
My compiler does not accept this though.
How can I read from popen in C++?
Your example:
FILE *myfile;
std::fstream fileStream(myfile);
std::string mystring;
while(std::getline(myfile,mystring))
Does't work because although you're very close the standard library doesn't provide an fstream that can be constructed from a FILE*. Boost iostreams does however provide an iostream that can be constructed from a file descriptor and you can get one from a FILE* by calling fileno.
E.g.:
typedef boost::iostreams::stream<boost::iostreams::file_descriptor_sink>
boost_stream;
FILE *myfile;
// make sure to popen and it succeeds
boost_stream stream(fileno(myfile));
stream.set_auto_close(false); // https://svn.boost.org/trac/boost/ticket/3517
std::string mystring;
while(std::getline(stream,mystring))
Don't forget to pclose later still.
Note: Newer versions of boost have deprecated the constructor which takes just a fd. Instead you need to pass one of boost::iostreams::never_close_handle or boost::iostreams::close_handle as a mandatory second argument to the constructor.
Here is something which i wrote long back, may help you. It might have some errors.
#include <vector>
#include <string>
#include <stdio.h>
#include <iostream>
bool my_popen (const std::string& cmd,std::vector<std::string>& out ) {
bool ret_boolValue = true;
FILE* fp;
const int SIZEBUF = 1234;
char buf [SIZEBUF];
out = std::vector<std::string> ();
if ((fp = popen(cmd.c_str (), "r")) == NULL) {
return false;
}
std::string cur_string = "";
while (fgets(buf, sizeof (buf), fp)) {
cur_string += buf;
}
out.push_back (cur_string.substr (0, cur_string.size () - 1));
pclose(fp);
return true;
}
int main ( int argc, char **argv) {
std::vector<std::string> output;
my_popen("ls -l > /dev/null ", output);
for ( std::vector<std::string>::iterator itr = output.begin();
itr != output.end();
++itr) {
std::cout << *itr << std::endl;
}
}

UTF-8 output on Windows console

The following code shows unexpected behaviour on my machine (tested with Visual C++ 2008 SP1 on Windows XP and VS 2012 on Windows 7):
#include <iostream>
#include "Windows.h"
int main() {
SetConsoleOutputCP( CP_UTF8 );
std::cout << "\xc3\xbc";
int fail = std::cout.fail() ? '1': '0';
fputc( fail, stdout );
fputs( "\xc3\xbc", stdout );
}
I simply compiled with cl /EHsc test.cpp.
Windows XP: Output in a console window is
ü0ü (translated to Codepage 1252, originally shows some line drawing
charachters in the default Codepage, perhaps 437). When I change the settings
of the console window to use the "Lucida Console" character set and run my
test.exe again, output is changed to 1ü, which means
the character ü can be written using fputs and its UTF-8 encoding C3 BC
std::cout does not work for whatever reason
the streams failbit is setting after trying to write the character
Windows 7: Output using Consolas is ��0ü. Even more interesting. The correct bytes are written, probably (at least when redirecting the output to a file) and the stream state is ok, but the two bytes are written as separate characters).
I tried to raise this issue on "Microsoft Connect" (see here),
but MS has not been very helpful. You might as well look here
as something similar has been asked before.
Can you reproduce this problem?
What am I doing wrong? Shouldn't the std::cout and the fputs have the same
effect?
SOLVED: (sort of) Following mike.dld's idea I implemented a std::stringbuf doing the conversion from UTF-8 to Windows-1252 in sync() and replaced the streambuf of std::cout with this converter (see my comment on mike.dld's answer).
I understand the question is quite old, but if someone would still be interested, below is my solution. I've implemented a quite simple std::streambuf descendant and then passed it to each of standard streams on the very beginning of program execution.
This allows you to use UTF-8 everywhere in your program. On input, data is taken from console in Unicode and then converted and returned to you in UTF-8. On output the opposite is done, taking data from you in UTF-8, converting it to Unicode and sending to console. No issues found so far.
Also note, that this solution doesn't require any codepage modification, with either SetConsoleCP, SetConsoleOutputCP or chcp, or something else.
That's the stream buffer:
class ConsoleStreamBufWin32 : public std::streambuf
{
public:
ConsoleStreamBufWin32(DWORD handleId, bool isInput);
protected:
// std::basic_streambuf
virtual std::streambuf* setbuf(char_type* s, std::streamsize n);
virtual int sync();
virtual int_type underflow();
virtual int_type overflow(int_type c = traits_type::eof());
private:
HANDLE const m_handle;
bool const m_isInput;
std::string m_buffer;
};
ConsoleStreamBufWin32::ConsoleStreamBufWin32(DWORD handleId, bool isInput) :
m_handle(::GetStdHandle(handleId)),
m_isInput(isInput),
m_buffer()
{
if (m_isInput)
{
setg(0, 0, 0);
}
}
std::streambuf* ConsoleStreamBufWin32::setbuf(char_type* /*s*/, std::streamsize /*n*/)
{
return 0;
}
int ConsoleStreamBufWin32::sync()
{
if (m_isInput)
{
::FlushConsoleInputBuffer(m_handle);
setg(0, 0, 0);
}
else
{
if (m_buffer.empty())
{
return 0;
}
std::wstring const wideBuffer = utf8_to_wstring(m_buffer);
DWORD writtenSize;
::WriteConsoleW(m_handle, wideBuffer.c_str(), wideBuffer.size(), &writtenSize, NULL);
}
m_buffer.clear();
return 0;
}
ConsoleStreamBufWin32::int_type ConsoleStreamBufWin32::underflow()
{
if (!m_isInput)
{
return traits_type::eof();
}
if (gptr() >= egptr())
{
wchar_t wideBuffer[128];
DWORD readSize;
if (!::ReadConsoleW(m_handle, wideBuffer, ARRAYSIZE(wideBuffer) - 1, &readSize, NULL))
{
return traits_type::eof();
}
wideBuffer[readSize] = L'\0';
m_buffer = wstring_to_utf8(wideBuffer);
setg(&m_buffer[0], &m_buffer[0], &m_buffer[0] + m_buffer.size());
if (gptr() >= egptr())
{
return traits_type::eof();
}
}
return sgetc();
}
ConsoleStreamBufWin32::int_type ConsoleStreamBufWin32::overflow(int_type c)
{
if (m_isInput)
{
return traits_type::eof();
}
m_buffer += traits_type::to_char_type(c);
return traits_type::not_eof(c);
}
The usage then is as follows:
template<typename StreamT>
inline void FixStdStream(DWORD handleId, bool isInput, StreamT& stream)
{
if (::GetFileType(::GetStdHandle(handleId)) == FILE_TYPE_CHAR)
{
stream.rdbuf(new ConsoleStreamBufWin32(handleId, isInput));
}
}
// ...
int main()
{
FixStdStream(STD_INPUT_HANDLE, true, std::cin);
FixStdStream(STD_OUTPUT_HANDLE, false, std::cout);
FixStdStream(STD_ERROR_HANDLE, false, std::cerr);
// ...
std::cout << "\xc3\xbc" << std::endl;
// ...
}
Left out wstring_to_utf8 and utf8_to_wstring could easily be implemented with WideCharToMultiByte and MultiByteToWideChar WinAPI functions.
Oi. Congratulations on finding a way to change the code page of the console from inside your program. I didn't know about that call, I always had to use chcp.
I'm guessing the C++ default locale is getting involved. By default it will use the code page provide by GetThreadLocale() to determine the text encoding of non-wstring stuff. This generally defaults to CP1252. You could try using SetThreadLocale() to get to UTF-8 (if it even does that, can't recall), with the hope that std::locale defaults to something that can handle your UTF-8 encoding.
It's time to close this now. Stephan T. Lavavej says the behaviour is "by design", although I cannot follow this explanation.
My current knowledge is: Windows XP console in UTF-8 codepage does not work with C++ iostreams.
Windows XP is getting out of fashion now and so does VS 2008. I'd be interested to hear if the problem still exists on newer Windows systems.
On Windows 7 the effect is probably due to the way the C++ streams output characters. As seen in an answer to Properly print utf8 characters in windows console, UTF-8 output fails with C stdio when printing one byte after after another like putc('\xc3'); putc('\xbc'); as well. Perhaps this is what C++ streams do here.
I just follow mike.dld's answer in this question, and add the printf support for the UTF-8 string.
As mkluwe mentioned in his answer that by default, printf function will output to the console one by one byte, while the console can't handle single byte correctly. My method is quite simple, I use the snprintf function to print the whole content to a internal string buffer, and then dump the buffer to std::cout.
Here is the full testing code:
#include <iostream>
#include <locale>
#include <windows.h>
#include <cstdlib>
using namespace std;
// https://stackoverflow.com/questions/4358870/convert-wstring-to-string-encoded-in-utf-8
#include <codecvt>
#include <string>
// convert UTF-8 string to wstring
std::wstring utf8_to_wstring (const std::string& str)
{
std::wstring_convert<std::codecvt_utf8<wchar_t>> myconv;
return myconv.from_bytes(str);
}
// convert wstring to UTF-8 string
std::string wstring_to_utf8 (const std::wstring& str)
{
std::wstring_convert<std::codecvt_utf8<wchar_t>> myconv;
return myconv.to_bytes(str);
}
// https://stackoverflow.com/questions/1660492/utf-8-output-on-windows-console
// mike.dld's answer
class ConsoleStreamBufWin32 : public std::streambuf
{
public:
ConsoleStreamBufWin32(DWORD handleId, bool isInput);
protected:
// std::basic_streambuf
virtual std::streambuf* setbuf(char_type* s, std::streamsize n);
virtual int sync();
virtual int_type underflow();
virtual int_type overflow(int_type c = traits_type::eof());
private:
HANDLE const m_handle;
bool const m_isInput;
std::string m_buffer;
};
ConsoleStreamBufWin32::ConsoleStreamBufWin32(DWORD handleId, bool isInput) :
m_handle(::GetStdHandle(handleId)),
m_isInput(isInput),
m_buffer()
{
if (m_isInput)
{
setg(0, 0, 0);
}
}
std::streambuf* ConsoleStreamBufWin32::setbuf(char_type* /*s*/, std::streamsize /*n*/)
{
return 0;
}
int ConsoleStreamBufWin32::sync()
{
if (m_isInput)
{
::FlushConsoleInputBuffer(m_handle);
setg(0, 0, 0);
}
else
{
if (m_buffer.empty())
{
return 0;
}
std::wstring const wideBuffer = utf8_to_wstring(m_buffer);
DWORD writtenSize;
::WriteConsoleW(m_handle, wideBuffer.c_str(), wideBuffer.size(), &writtenSize, NULL);
}
m_buffer.clear();
return 0;
}
ConsoleStreamBufWin32::int_type ConsoleStreamBufWin32::underflow()
{
if (!m_isInput)
{
return traits_type::eof();
}
if (gptr() >= egptr())
{
wchar_t wideBuffer[128];
DWORD readSize;
if (!::ReadConsoleW(m_handle, wideBuffer, ARRAYSIZE(wideBuffer) - 1, &readSize, NULL))
{
return traits_type::eof();
}
wideBuffer[readSize] = L'\0';
m_buffer = wstring_to_utf8(wideBuffer);
setg(&m_buffer[0], &m_buffer[0], &m_buffer[0] + m_buffer.size());
if (gptr() >= egptr())
{
return traits_type::eof();
}
}
return sgetc();
}
ConsoleStreamBufWin32::int_type ConsoleStreamBufWin32::overflow(int_type c)
{
if (m_isInput)
{
return traits_type::eof();
}
m_buffer += traits_type::to_char_type(c);
return traits_type::not_eof(c);
}
template<typename StreamT>
inline void FixStdStream(DWORD handleId, bool isInput, StreamT& stream)
{
if (::GetFileType(::GetStdHandle(handleId)) == FILE_TYPE_CHAR)
{
stream.rdbuf(new ConsoleStreamBufWin32(handleId, isInput));
}
}
// some code are from this blog
// https://blog.csdn.net/witton/article/details/108087135
#define printf(fmt, ...) __fprint(stdout, fmt, ##__VA_ARGS__ )
int __vfprint(FILE *fp, const char *fmt, va_list va)
{
// https://stackoverflow.com/questions/7315936/which-of-sprintf-snprintf-is-more-secure
size_t nbytes = snprintf(NULL, 0, fmt, va) + 1; /* +1 for the '\0' */
char *str = (char*)malloc(nbytes);
snprintf(str, nbytes, fmt, va);
std::cout << str;
free(str);
return nbytes;
}
int __fprint(FILE *fp, const char *fmt, ...)
{
va_list va;
va_start(va, fmt);
int n = __vfprint(fp, fmt, va);
va_end(va);
return n;
}
int main()
{
FixStdStream(STD_INPUT_HANDLE, true, std::cin);
FixStdStream(STD_OUTPUT_HANDLE, false, std::cout);
FixStdStream(STD_ERROR_HANDLE, false, std::cerr);
// ...
std::cout << "\xc3\xbc" << std::endl;
printf("\xc3\xbc");
// ...
return 0;
}
The source code is saved in UTF-8 format, and build under Msys2's GCC and run under Windows 7 64bit. Here is the result
ü
ü