Problem with working with utf-8 text file c++ - c++

lately I approach a problem when trying to get line from file that is utf-8(?) formatted. I also need to copy that string to clipboard and be able to paste it in .txt file.
#include <iostream>
#include <windows.h>
#include <cstdio>
#include <conio.h>
#include <time.h>
#include <string>
#include <fstream>
#include <wchar.h>
using namespace std;
wstring lastLine;
void mesparse()
{
wifstream client("Client.txt");
if(client.is_open())
{
client.seekg(-7,ios_base::end);
int kloop=0;
while (kloop<1)
{
wchar_t ch;
client.get(ch);
if(ch == '\n') {
kloop=1;
}
else {
client.seekg(-4,ios_base::cur);
}}
getline(client,lastLine);
client.close();
}
else
{
cout<<"Unable to open client.txt file.";
}
}
void toClipboard(std::wstring s){
const wchar_t* text = s.c_str();
int len = wcslen(text);
HGLOBAL hMem = GlobalAlloc(GMEM_MOVEABLE, (len + 1) * sizeof(wchar_t));
wchar_t* buffer = (wchar_t*)GlobalLock(hMem);
wcscpy_s(buffer, len + 1, text);
GlobalUnlock(hMem);
OpenClipboard(NULL);
EmptyClipboard();
SetClipboardData(CF_UNICODETEXT, hMem);
CloseClipboard();
}
int main()
{
mesparse();
toClipboard(lastLine);
wcout<<lastLine<<endl;
}
What I'm trying to copy:
йцукaеёśнгшщㅂхфывапрㅊджэячсмитъбюㅗ
йцукaеёśнгшщㅂхфывапрㅊджэя
йцукaеёśнгшщㅂхфывапрㅊ
йцукaеёśнгшщㅂхфыва

CF_UNICODETEXT != UTF-8.
The first is wide chars, the second is 8 bit.
You first have to convert it with MultiByteToWideChar().
Therefore, read all the text to std::string, not to std::wstring. Then get the std::wstring with MultiByteToWideChar() and then copy it to clipboard.
Also, doing character search in UTF-8 text is usually a bad idea (variable encoding).

This is an example code of solution pointed out by #Michael Chourdakis.
string mesparse()
{
string lastLine = "";
ifstream client("Client.txt");
if (client.is_open())
{
client.seekg(-7, ios_base::end);
int kloop = 0;
while (kloop < 1)
{
char ch;
client.get(ch);
if (ch == '\n') {
kloop = 1;
}
else {
client.seekg(-4, ios_base::cur);
}
}
getline(client, lastLine);
client.close();
}
else
{
cout << "Unable to open client.txt file.";
}
return lastLine;
}
void toClipboard(std::string s) {
int len;
// Retrieve the length
len = MultiByteToWideChar(CP_UTF8, MB_PRECOMPOSED, s.c_str(), -1, NULL, 0);
HGLOBAL hMem = GlobalAlloc(GMEM_MOVEABLE, (len) * sizeof(wchar_t));
wchar_t* buffer = (wchar_t*)GlobalLock(hMem);
// Convert to wide char string
len = MultiByteToWideChar(CP_UTF8, MB_PRECOMPOSED, s.c_str(), -1, buffer, len);
GlobalUnlock(hMem);
wcout << buffer << endl;
OpenClipboard(NULL);
EmptyClipboard();
SetClipboardData(CF_UNICODETEXT, hMem);
CloseClipboard();
}
int main()
{
string copiedStr = mesparse();
if (copiedStr.length() == 0)
return 0;
toClipboard(copiedStr);
}

Related

Cannot add files to .zip file

I tried with sample code below (from How do I use Minizip (on Zlib)?).
It does not work when adding files from a specific folder. The zipfile gets created but nothing in it. I do not understand what I am missing.
I want to zip all the files in the directory "c:\\temp1\\*".
If I hard code the filenames, as shown in below 2 lines, then they get added to the zip.
const char* Filenames[] = {"Readme.txt", "foo.bin"};
unsigned int nCount = sizeof( Filenames )/sizeof( char* );
Here is my code:
#include "stdafx.h"
#include <vector>
#include <string>
#include <fstream>
#include <windows.h>
#include <comdef.h>
#include <Shlwapi.h>
#pragma comment (lib, "Shlwapi.lib")
#include "zlib-1.2.11\zlib.h"
#include "zlib-1.2.11\zip.h"
using namespace std;
wchar_t *convertCharArrayToLPCWSTR(const char* charArray)
{
wchar_t* wString=new wchar_t[4096];
MultiByteToWideChar(CP_ACP, 0, charArray, -1, wString, 4096);
return wString;
}
std::wstring CombinePaths(std::wstring const &pattern, LPCWSTR filename) {
std::wstring tmp(pattern);
tmp.push_back('\0');
PathRemoveFileSpec(&tmp[0]);
std::wstring retVal(MAX_PATH, '\0');
PathCombine(&retVal[0], tmp.c_str(), filename);
return retVal.c_str();
}
int zip()
{
WIN32_FIND_DATA FindFileData;
HANDLE hFind;
zipFile myZip = zipOpen64("MyTest.zip", APPEND_STATUS_CREATE);
std::wstring myDir(L"C:\\temp1\\*");
hFind = FindFirstFile(myDir.c_str(), &FindFileData);
typedef std::vector<std::wstring> listofFiles;
listofFiles ff;
// List all the files in the directory
do
{
std::wstring fullPath = CombinePaths(myDir, FindFileData.cFileName);
ff.push_back(fullPath);
}
while (FindNextFile(hFind, &FindFileData) != 0);
FindClose(hFind);
bool myreturn = true;
// add files to the zip file
for ( unsigned int i = 0; i < ff.size(); i++ )
{
ifstream file(ff[i].c_str(), ios::binary | ios::in);
_bstr_t b(ff[i].c_str());
const char* filename = b;
FILE *stream;
errno_t err = fopen_s(&stream, filename, "r");
if (file.is_open() )
{
file.seekg(0, ios::end);
size_t size = file.tellg();
file.seekg(0, ios::beg);
std::vector<char> buffer(size);
if (size == 0 || file.read(&buffer[0], size))
{
zip_fileinfo zi = {0};
if (ZIP_OK == zipOpenNewFileInZip(myZip, filename, &zi, NULL, 0, NULL, 0, NULL, Z_DEFLATED, Z_DEFAULT_COMPRESSION))
{
if (zipWriteInFileInZip(myZip, size == 0 ? "" : &buffer[0], (unsigned int) size))
myreturn = false;
zipCloseFileInZip(myZip);
file.close();
continue;
}
else
{
file.close();
return ZIP_ERRNO;
}
}
file.close();
}
if (stream != NULL)
fclose(stream);
}
ff.clear();
zipCloseFileInZip( myZip );
if (zipClose(myZip, 0))
return ZIP_BADZIPFILE;
return ZIP_OK;
}
void unzip()
{
}
int _tmain(int argc, _TCHAR* argv[])
{
zip();
unzip();
return 0;
}

How to replace special quotes with straight quotes C++

For example the file I am parsing contains unicode char u201d ie. ” (accented quote)
How do I replace it with " (Straight quote)?
using c++ and STL i would use a code like this, you still need to save to output buffer to file.. tested on linux.
#include <string>
#include <stdio.h>
#include <stdlib.h>
#include <iostream>
// load file data
char* load_file(const char *filename)
{
FILE *fp;
fp = fopen(filename, "r");
if (!fp)
return NULL;
size_t size;
if ((0 != fseek(fp, 0, SEEK_END)) || (-1 == (size = ftell(fp))))
size = 0;
// set fp at file start
fseek(fp, 0, 0);
char *buffer;
buffer = (char*) malloc(size);
if(!buffer)
{
fclose (fp);
return NULL;
}
if(size != fread(buffer, 1, size, fp))
{
free (buffer);
buffer = NULL;
}
fclose (fp);
return buffer;
}
// replace string
std::string replace(const std::string& str, const std::string& from, const std::string& to)
{
if(str.size() < 1)
return str;
std::string temp_str(str);
size_t start_pos = 0;
while((start_pos = temp_str.find(from, start_pos)) != std::string::npos)
{
temp_str.replace(start_pos, from.length(), to);
start_pos += to.length();
}
return temp_str.c_str();
}
int main(int argc, char** argv)
{
const char* file_name = "test.txt";
char* file_bytes = load_file(file_name);
if(file_bytes == nullptr)
return EXIT_FAILURE;
std::cout << replace(file_bytes, "”", "\"") << std::endl;
return EXIT_SUCCESS;
}

Why is my unicode string not being handled (printed/copied) correctly?

I'm trying to make a program that can work with unicode strings, eventually being able to copy them to the Windows clipboard.
I began with a simple cout test program:
#include <iostream>
int main()
{
std::cout << "( ͡° ͜ʖ ͡°)\n";
return 0;
}
Unfortunately, it wasn't handling the unicode, and instead, returned:
( ͡° ͜ʖ ͡°)
I had a feeling the issue was that the code file wasn't even being saved with the unicode characters, so I decided to represent them with escape sequences from here:
#include <iostream>
using namespace std;
int main()
{
std::cout << "( \u0361\u00b0 \u035c\u0296 \u0361\u00b0)\n";
return 0;
}
Unfortunately, the program returned the same result as before.
My end goal is to make a program that can copy a unicode string to the Windows clipboard, so if a different data-type to string is required, and it isn't as simple as adding some data/signature to the beginning of the string to indicate it's unicode, I'm not even sure if I can adapt this clipboard writing code to support unicode:
#include <string>
#include <windows.h>
void clip(std::string input);
int main()
{
clip("( ͡° ͜ʖ ͡°)");
return 0;
}
void clip(std::string input)
{
HGLOBAL hMem = GlobalAlloc(GMEM_MOVEABLE, strlen(input.c_str()) + 1);
memcpy(GlobalLock(hMem), input.c_str(), strlen(input.c_str()) + 1);
GlobalUnlock(hMem);
OpenClipboard(0);
EmptyClipboard();
SetClipboardData(CF_TEXT, hMem);
CloseClipboard();
}
Notes:
I am developing this program on Windows 10.
On my system, conhost.exe (the Windows console) can handle the unicode string in question.
I am using Code::Blocks with MinGW Compiler.
Use CF_UNICODETEXT, not just CF_TEXT. Moreover, you should declare unicode string with L prefix, e.g. L"".
#include <Windows.h>
#include <cstring>
#include <string>
int main() {
std::wstring str(L"( ͡° ͜ʖ ͡°)");
if (OpenClipboard(HWND_DESKTOP)) {
EmptyClipboard();
auto sz = (str.size() + 1) * sizeof(str[0]);
auto hMem = GlobalAlloc(GMEM_MOVEABLE, sz);
if (hMem != nullptr) {
auto pMem = GlobalLock(hMem);
memcpy(pMem, str.c_str(), sz);
GlobalUnlock(hMem);
SetClipboardData(CF_UNICODETEXT, hMem);
}
else {
fprintf(stderr, "Can't allocate memory");
}
CloseClipboard();
}
else {
fprintf(stderr, "Can't open clipboard");
}
return 0;
}

Code dealing with windows clipboard don't work on some computer

i wrote this program for a friend of mine
It's purpose is to save every text you copy into a file
On my pc the program work fine but on my friend pc it wont copy all the line
#include <windows.h>
#include <stdio.h>
using namespace std;
int GetKeyboardInput(HANDLE hstdin);
int main()
{
HANDLE clip;
char* lastClip = (char*) malloc(1024);
char* currClip = (char*) malloc(1024);
FILE* file;
HANDLE hstdin;
int key;
hstdin = GetStdHandle(STD_INPUT_HANDLE);
strcpy(lastClip, "");
file = fopen("clipboard.txt", "w");
if(file != NULL)
{
do
{
if (OpenClipboard(NULL))
clip = GetClipboardData(CF_TEXT);
if(clip != NULL)
{
if(strlen((char*)clip) <= MAXLEN)
strcpy(currClip, (char*) clip);
else
strcpy(currClip, "String toooooo long");
if (strcmp(currClip,lastClip) != 0)
{
fprintf(file, "%s \n", currClip);
strcpy(lastClip, currClip);
}
}
CloseClipboard();
key = GetKeyboardInput(hstdin);
}while (key != VK_ESCAPE);
fclose(file);
}
else
printf("Failed opening file");
system("pause");
return 0;
}
int GetKeyboardInput(HANDLE hstdin)
{
INPUT_RECORD irInput;
DWORD InputsRead = 0;
ReadConsoleInput(hstdin, &irInput, 1, &InputsRead);
return irInput.Event.KeyEvent.wVirtualKeyCode;
}
The code is very simple so i don't think it need explanation
I cannot recreate the same circumstance [i tried to copy the same text, but it works for me] of the other pc so i think that there's a bug on the code
EDIT: my friend use Windows 8 64bit instead i use 7 at 64bit, could be this the problem?

How to make my `std::string url_encode_wstring(const std::wstring &input)` work on Linux?

So we have such function:
std::string url_encode_wstring(const std::wstring &input)
{
std::string output;
int cbNeeded = WideCharToMultiByte(CP_UTF8, 0, input.c_str(), -1, NULL, 0, NULL, NULL);
if (cbNeeded > 0) {
char *utf8 = new char[cbNeeded];
if (WideCharToMultiByte(CP_UTF8, 0, input.c_str(), -1, utf8, cbNeeded, NULL, NULL) != 0) {
for (char *p = utf8; *p; *p++) {
char onehex[5];
_snprintf(onehex, sizeof(onehex), "%%%02.2X", (unsigned char)*p);
output.append(onehex);
}
}
delete[] utf8;
}
return output;
}
Its grate for windows but I wonder how (and is it possible) to make it work under linux?
IMHO you should use a portable character codec library.
Here's an example of minimal portable code using iconv, which should be more than enough.
It's supposed to work on Windows (if it does, you can get rid of your windows-specific code altogether).
I follow the GNU guidelines not to use the wcstombs & co functions ( https://www.gnu.org/s/hello/manual/libc/iconv-Examples.html )
Depending on the use case, handle errors appropriately... and to enhance performance, you can create a class out of it.
#include <iostream>
#include <iconv.h>
#include <cerrno>
#include <cstring>
#include <stdexcept>
std::string wstring_to_utf8_string(const std::wstring &input)
{
size_t in_size = input.length() * sizeof(wchar_t);
char * in_buf = (char*)input.data();
size_t buf_size = input.length() * 6; // pessimistic: max UTF-8 char size
char * buf = new char[buf_size];
memset(buf, 0, buf_size);
char * out_buf(buf);
size_t out_size(buf_size);
iconv_t conv_desc = iconv_open("UTF-8", "wchar_t");
if (conv_desc == iconv_t(-1))
throw std::runtime_error(std::string("Could not open iconv: ") + strerror(errno));
size_t iconv_value = iconv(conv_desc, &in_buf, &in_size, &out_buf, &out_size);
if (iconv_value == -1)
throw std::runtime_error(std::string("When converting: ") + strerror(errno));
int ret = iconv_close(conv_desc);
if (ret != 0)
throw std::runtime_error(std::string("Could not close iconv: ") + strerror(errno));
std::string s(buf);
delete [] buf;
return s;
}
int main() {
std::wstring in(L"hello world");
std::wcout << L"input: [" << in << L"]" << std::endl;
std::string out(wstring_to_utf8_string(in));
std::cerr << "output: [" << out << "]" << std::endl;
return 0;
}