How to replace special quotes with straight quotes C++ - c++

For example the file I am parsing contains unicode char u201d ie. ” (accented quote)
How do I replace it with " (Straight quote)?

using c++ and STL i would use a code like this, you still need to save to output buffer to file.. tested on linux.
#include <string>
#include <stdio.h>
#include <stdlib.h>
#include <iostream>
// load file data
char* load_file(const char *filename)
{
FILE *fp;
fp = fopen(filename, "r");
if (!fp)
return NULL;
size_t size;
if ((0 != fseek(fp, 0, SEEK_END)) || (-1 == (size = ftell(fp))))
size = 0;
// set fp at file start
fseek(fp, 0, 0);
char *buffer;
buffer = (char*) malloc(size);
if(!buffer)
{
fclose (fp);
return NULL;
}
if(size != fread(buffer, 1, size, fp))
{
free (buffer);
buffer = NULL;
}
fclose (fp);
return buffer;
}
// replace string
std::string replace(const std::string& str, const std::string& from, const std::string& to)
{
if(str.size() < 1)
return str;
std::string temp_str(str);
size_t start_pos = 0;
while((start_pos = temp_str.find(from, start_pos)) != std::string::npos)
{
temp_str.replace(start_pos, from.length(), to);
start_pos += to.length();
}
return temp_str.c_str();
}
int main(int argc, char** argv)
{
const char* file_name = "test.txt";
char* file_bytes = load_file(file_name);
if(file_bytes == nullptr)
return EXIT_FAILURE;
std::cout << replace(file_bytes, "”", "\"") << std::endl;
return EXIT_SUCCESS;
}

Related

Problem with working with utf-8 text file c++

lately I approach a problem when trying to get line from file that is utf-8(?) formatted. I also need to copy that string to clipboard and be able to paste it in .txt file.
#include <iostream>
#include <windows.h>
#include <cstdio>
#include <conio.h>
#include <time.h>
#include <string>
#include <fstream>
#include <wchar.h>
using namespace std;
wstring lastLine;
void mesparse()
{
wifstream client("Client.txt");
if(client.is_open())
{
client.seekg(-7,ios_base::end);
int kloop=0;
while (kloop<1)
{
wchar_t ch;
client.get(ch);
if(ch == '\n') {
kloop=1;
}
else {
client.seekg(-4,ios_base::cur);
}}
getline(client,lastLine);
client.close();
}
else
{
cout<<"Unable to open client.txt file.";
}
}
void toClipboard(std::wstring s){
const wchar_t* text = s.c_str();
int len = wcslen(text);
HGLOBAL hMem = GlobalAlloc(GMEM_MOVEABLE, (len + 1) * sizeof(wchar_t));
wchar_t* buffer = (wchar_t*)GlobalLock(hMem);
wcscpy_s(buffer, len + 1, text);
GlobalUnlock(hMem);
OpenClipboard(NULL);
EmptyClipboard();
SetClipboardData(CF_UNICODETEXT, hMem);
CloseClipboard();
}
int main()
{
mesparse();
toClipboard(lastLine);
wcout<<lastLine<<endl;
}
What I'm trying to copy:
йцукaеёśнгшщㅂхфывапрㅊджэячсмитъбюㅗ
йцукaеёśнгшщㅂхфывапрㅊджэя
йцукaеёśнгшщㅂхфывапрㅊ
йцукaеёśнгшщㅂхфыва
CF_UNICODETEXT != UTF-8.
The first is wide chars, the second is 8 bit.
You first have to convert it with MultiByteToWideChar().
Therefore, read all the text to std::string, not to std::wstring. Then get the std::wstring with MultiByteToWideChar() and then copy it to clipboard.
Also, doing character search in UTF-8 text is usually a bad idea (variable encoding).
This is an example code of solution pointed out by #Michael Chourdakis.
string mesparse()
{
string lastLine = "";
ifstream client("Client.txt");
if (client.is_open())
{
client.seekg(-7, ios_base::end);
int kloop = 0;
while (kloop < 1)
{
char ch;
client.get(ch);
if (ch == '\n') {
kloop = 1;
}
else {
client.seekg(-4, ios_base::cur);
}
}
getline(client, lastLine);
client.close();
}
else
{
cout << "Unable to open client.txt file.";
}
return lastLine;
}
void toClipboard(std::string s) {
int len;
// Retrieve the length
len = MultiByteToWideChar(CP_UTF8, MB_PRECOMPOSED, s.c_str(), -1, NULL, 0);
HGLOBAL hMem = GlobalAlloc(GMEM_MOVEABLE, (len) * sizeof(wchar_t));
wchar_t* buffer = (wchar_t*)GlobalLock(hMem);
// Convert to wide char string
len = MultiByteToWideChar(CP_UTF8, MB_PRECOMPOSED, s.c_str(), -1, buffer, len);
GlobalUnlock(hMem);
wcout << buffer << endl;
OpenClipboard(NULL);
EmptyClipboard();
SetClipboardData(CF_UNICODETEXT, hMem);
CloseClipboard();
}
int main()
{
string copiedStr = mesparse();
if (copiedStr.length() == 0)
return 0;
toClipboard(copiedStr);
}

Cannot add files to .zip file

I tried with sample code below (from How do I use Minizip (on Zlib)?).
It does not work when adding files from a specific folder. The zipfile gets created but nothing in it. I do not understand what I am missing.
I want to zip all the files in the directory "c:\\temp1\\*".
If I hard code the filenames, as shown in below 2 lines, then they get added to the zip.
const char* Filenames[] = {"Readme.txt", "foo.bin"};
unsigned int nCount = sizeof( Filenames )/sizeof( char* );
Here is my code:
#include "stdafx.h"
#include <vector>
#include <string>
#include <fstream>
#include <windows.h>
#include <comdef.h>
#include <Shlwapi.h>
#pragma comment (lib, "Shlwapi.lib")
#include "zlib-1.2.11\zlib.h"
#include "zlib-1.2.11\zip.h"
using namespace std;
wchar_t *convertCharArrayToLPCWSTR(const char* charArray)
{
wchar_t* wString=new wchar_t[4096];
MultiByteToWideChar(CP_ACP, 0, charArray, -1, wString, 4096);
return wString;
}
std::wstring CombinePaths(std::wstring const &pattern, LPCWSTR filename) {
std::wstring tmp(pattern);
tmp.push_back('\0');
PathRemoveFileSpec(&tmp[0]);
std::wstring retVal(MAX_PATH, '\0');
PathCombine(&retVal[0], tmp.c_str(), filename);
return retVal.c_str();
}
int zip()
{
WIN32_FIND_DATA FindFileData;
HANDLE hFind;
zipFile myZip = zipOpen64("MyTest.zip", APPEND_STATUS_CREATE);
std::wstring myDir(L"C:\\temp1\\*");
hFind = FindFirstFile(myDir.c_str(), &FindFileData);
typedef std::vector<std::wstring> listofFiles;
listofFiles ff;
// List all the files in the directory
do
{
std::wstring fullPath = CombinePaths(myDir, FindFileData.cFileName);
ff.push_back(fullPath);
}
while (FindNextFile(hFind, &FindFileData) != 0);
FindClose(hFind);
bool myreturn = true;
// add files to the zip file
for ( unsigned int i = 0; i < ff.size(); i++ )
{
ifstream file(ff[i].c_str(), ios::binary | ios::in);
_bstr_t b(ff[i].c_str());
const char* filename = b;
FILE *stream;
errno_t err = fopen_s(&stream, filename, "r");
if (file.is_open() )
{
file.seekg(0, ios::end);
size_t size = file.tellg();
file.seekg(0, ios::beg);
std::vector<char> buffer(size);
if (size == 0 || file.read(&buffer[0], size))
{
zip_fileinfo zi = {0};
if (ZIP_OK == zipOpenNewFileInZip(myZip, filename, &zi, NULL, 0, NULL, 0, NULL, Z_DEFLATED, Z_DEFAULT_COMPRESSION))
{
if (zipWriteInFileInZip(myZip, size == 0 ? "" : &buffer[0], (unsigned int) size))
myreturn = false;
zipCloseFileInZip(myZip);
file.close();
continue;
}
else
{
file.close();
return ZIP_ERRNO;
}
}
file.close();
}
if (stream != NULL)
fclose(stream);
}
ff.clear();
zipCloseFileInZip( myZip );
if (zipClose(myZip, 0))
return ZIP_BADZIPFILE;
return ZIP_OK;
}
void unzip()
{
}
int _tmain(int argc, _TCHAR* argv[])
{
zip();
unzip();
return 0;
}

Using iconv translit to convert from UTF-8 to CP1251

I'am trying to convert string "aÜ" from UTF-8 to CP1251 via C++ library iconv.h using TRANSLIT and as a result I get string "a?", when expecting "aU".
When I use php script <?php echo iconv("UTF-8", "Windows-1251//TRANSLIT", "Ü");> on this computer, I get "aU" string as result.
Here's the code:
#include <cstdlib>
#include <iconv.h>
#include <locale.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
using namespace std;
int IConvert(char *buf,char *outbuf, size_t len, const char *from, const char *to)
{
iconv_t iconv_cd;
if ((iconv_cd = iconv_open(to, from)) == (iconv_t) -1) {
printf("Cannot open iconv from %s to %s\n", from, to);
return 0;
}
char *inbuf = buf;
size_t inlen = len;
size_t outlen = len;
size_t res = 0;
while (inlen > 0 && outlen > 0) {
res = iconv(iconv_cd, &inbuf, &inlen, &outbuf, &outlen);
if (res == 0)
break;
if (res == (size_t) (-1)) {
if (errno != EILSEQ && errno != EINVAL) {
iconv_close(iconv_cd);
*outbuf = '\0';
printf("Erorr %s (%s)\n", strerror(errno), from);
return 0;
} else if (inbuf < outbuf) {
iconv_close(iconv_cd);
*outbuf = '\0';
printf("Erorr %s (inbuf < outbuf)\n", strerror(errno));
return 0;
}
}
if (inlen > 0 && outlen > 0) {
*outbuf++ = *inbuf++;
inlen--;
outlen--;
}
}
iconv_close(iconv_cd);
*outbuf = '\0';
return 1;
}
int main(int argc, char** argv) {
char* line = "\u00C0a\u00DC";
char* from = (char*) malloc(strlen(from)+1);
char* to = (char*) malloc(strlen(from)+1);
strcpy(from, line);
printf("%s\n", from);
IConvert(from, to, strlen(from)+1, "UTF-8", "CP1251//TRANSLIT");
printf("%s\n", to);
return 0;
}
Any idea what problem could be?
The solution is
setlocale(LC_ALL, "");
at the beginning of your program. Yes, the locale influences the transliteration. In a German locale Ü would be transliterated to UE not U.

Readline: How to list all autocomplete matches on double tab?

I'm using "readline" library to create a console interface for my program. I'm able to autocomplete words using tab, but when I have words that share the same prefix like (car, card, carbon) it always chooses the shortest one. Here's my program (mostly taken from link):
#include <readline/readline.h>
#include <readline/history.h>
#include <stdlib.h>
#include <iostream>
const char *words[] = {"add", "remove", "rm", "update", "child", "children", "wife", "wifes"};
void *xmalloc (int size)
{
void *buf;
buf = malloc (size);
if (!buf)
{
fprintf (stderr, "Error: Out of memory. Exiting.\n");
exit (1);
}
return buf;
}
char *dupstr (const char *str)
{
char *temp;
temp = (char *) xmalloc (strlen (str) + 1);
strcpy (temp, str);
return (temp);
}
char *my_generator (const char *text, int state)
{
static int list_index, len;
const char *name;
if (!state)
{
list_index = 0;
len = strlen (text);
}
while (name = words[list_index])
{
list_index++;
if (strncmp (name, text, len) == 0) return dupstr (name);
}
// If no names matched, then return NULL.
return ((char *) NULL);
}
static char **my_completion (const char *text, int start, int end)
{
char **matches = (char **) NULL;
if (start == 0)
{
matches = rl_completion_matches ((char *) text, &my_generator);
}
else rl_bind_key ('\t', rl_abort);
return matches;
}
int main (int argc, char *argv[])
{
char *buf;
rl_attempted_completion_function = my_completion;
while ((buf = readline(">> ")) != NULL)
{
rl_bind_key ('\t', rl_complete);
if (strcmp (buf, "exit") == 0) break;
else if (buf[0] == '\0') continue;
else
{
std::cout << buf << std::endl;
add_history (buf);
}
}
free (buf);
return 0;
}
Is it possible to list all matches on double tab just like in ubuntu terminal?
I managed to get it to work by commenting out these two lines:
rl_bind_key ('\t', rl_complete);
and:
else rl_bind_key ('\t', rl_abort);
The default completion behaviour of readline works exactly like in ubuntu terminal, one tab to complete and two tabs to list possible completions. Not sure though what's the default completion function that's binded with the tab key, from the documentation i thought it was rl_possible_completions but it didn't give the same results.
Also i added the following line to my_completion function to prevent adding space at the end of the matched word:
rl_completion_append_character = '\0';
I removed dupstrfunction it and replaced it with the native strdup function instead (this has nothing to do with the auto complete problem, it's just to remove unnecessary code).
This is the final code:
#include <readline/readline.h>
#include <readline/history.h>
#include <stdlib.h>
#include <iostream>
const char *words[] = {"add", "remove", "rm", "update", "child", "children", "wife", "wives"};
// Generator function for word completion.
char *my_generator (const char *text, int state)
{
static int list_index, len;
const char *name;
if (!state)
{
list_index = 0;
len = strlen (text);
}
while (name = words[list_index])
{
list_index++;
if (strncmp (name, text, len) == 0) return strdup (name);
}
// If no names matched, then return NULL.
return ((char *) NULL);
}
// Custom completion function
static char **my_completion (const char *text, int start, int end)
{
// This prevents appending space to the end of the matching word
rl_completion_append_character = '\0';
char **matches = (char **) NULL;
if (start == 0)
{
matches = rl_completion_matches ((char *) text, &my_generator);
}
// else rl_bind_key ('\t', rl_abort);
return matches;
}
int main (int argc, char *argv[])
{
char *buf;
rl_attempted_completion_function = my_completion;
while ((buf = readline(">> ")) != NULL)
{
// rl_bind_key ('\t', rl_complete);
if (strcmp (buf, "exit") == 0) break;
else if (buf[0] == '\0')
{
free (buf);
continue;
}
else
{
std::cout << buf << std::endl;
add_history (buf);
}
free (buf);
buf = NULL;
}
if (buf != NULL) free (buf);
return 0;
}
The answer by razzak is almost correct, but this NULL must be added at the end of array of strings:
const char *words[] = {"add", "remove", "rm", "update", "child", "children", "wife", "wives", NULL};
Some changes for nonwarning compilation in my_generator() function:
while ((name = words[list_index++]))
{
if (strncmp (name, text, len) == 0) return strdup (name);
}

Generate SHA hash in C++ using OpenSSL library

How can I generate SHA1 or SHA2 hashes using the OpenSSL libarary?
I searched google and could not find any function or example code.
From the command line, it's simply:
printf "compute sha1" | openssl sha1
You can invoke the library like this:
#include <stdio.h>
#include <string.h>
#include <openssl/sha.h>
int main()
{
unsigned char ibuf[] = "compute sha1";
unsigned char obuf[20];
SHA1(ibuf, strlen(ibuf), obuf);
int i;
for (i = 0; i < 20; i++) {
printf("%02x ", obuf[i]);
}
printf("\n");
return 0;
}
OpenSSL has a horrible documentation with no code examples, but here you are:
#include <openssl/sha.h>
bool simpleSHA256(void* input, unsigned long length, unsigned char* md)
{
SHA256_CTX context;
if(!SHA256_Init(&context))
return false;
if(!SHA256_Update(&context, (unsigned char*)input, length))
return false;
if(!SHA256_Final(md, &context))
return false;
return true;
}
Usage:
unsigned char md[SHA256_DIGEST_LENGTH]; // 32 bytes
if(!simpleSHA256(<data buffer>, <data length>, md))
{
// handle error
}
Afterwards, md will contain the binary SHA-256 message digest. Similar code can be used for the other SHA family members, just replace "256" in the code.
If you have larger data, you of course should feed data chunks as they arrive (multiple SHA256_Update calls).
Adaptation of #AndiDog version for big file:
static const int K_READ_BUF_SIZE{ 1024 * 16 };
std::optional<std::string> CalcSha256(std::string filename)
{
// Initialize openssl
SHA256_CTX context;
if(!SHA256_Init(&context))
{
return std::nullopt;
}
// Read file and update calculated SHA
char buf[K_READ_BUF_SIZE];
std::ifstream file(filename, std::ifstream::binary);
while (file.good())
{
file.read(buf, sizeof(buf));
if(!SHA256_Update(&context, buf, file.gcount()))
{
return std::nullopt;
}
}
// Get Final SHA
unsigned char result[SHA256_DIGEST_LENGTH];
if(!SHA256_Final(result, &context))
{
return std::nullopt;
}
// Transform byte-array to string
std::stringstream shastr;
shastr << std::hex << std::setfill('0');
for (const auto &byte: result)
{
shastr << std::setw(2) << (int)byte;
}
return shastr.str();
}
correct syntax at command line should be
echo -n "compute sha1" | openssl sha1
otherwise you'll hash the trailing newline character as well.
Here is OpenSSL example of calculating sha-1 digest using BIO:
#include <openssl/bio.h>
#include <openssl/evp.h>
std::string sha1(const std::string &input)
{
BIO * p_bio_md = nullptr;
BIO * p_bio_mem = nullptr;
try
{
// make chain: p_bio_md <-> p_bio_mem
p_bio_md = BIO_new(BIO_f_md());
if (!p_bio_md) throw std::bad_alloc();
BIO_set_md(p_bio_md, EVP_sha1());
p_bio_mem = BIO_new_mem_buf((void*)input.c_str(), input.length());
if (!p_bio_mem) throw std::bad_alloc();
BIO_push(p_bio_md, p_bio_mem);
// read through p_bio_md
// read sequence: buf <<-- p_bio_md <<-- p_bio_mem
std::vector<char> buf(input.size());
for (;;)
{
auto nread = BIO_read(p_bio_md, buf.data(), buf.size());
if (nread < 0) { throw std::runtime_error("BIO_read failed"); }
if (nread == 0) { break; } // eof
}
// get result
char md_buf[EVP_MAX_MD_SIZE];
auto md_len = BIO_gets(p_bio_md, md_buf, sizeof(md_buf));
if (md_len <= 0) { throw std::runtime_error("BIO_gets failed"); }
std::string result(md_buf, md_len);
// clean
BIO_free_all(p_bio_md);
return result;
}
catch (...)
{
if (p_bio_md) { BIO_free_all(p_bio_md); }
throw;
}
}
Though it's longer than just calling SHA1 function from OpenSSL, but it's more universal and can be reworked for using with file streams (thus processing data of any length).
C version of #Nayfe code, generating SHA1 hash from file:
#include <stdio.h>
#include <openssl/sha.h>
static const int K_READ_BUF_SIZE = { 1024 * 16 };
unsigned char* calculateSHA1(char *filename)
{
if (!filename) {
return NULL;
}
FILE *fp = fopen(filename, "rb");
if (fp == NULL) {
return NULL;
}
unsigned char* sha1_digest = malloc(sizeof(char)*SHA_DIGEST_LENGTH);
SHA_CTX context;
if(!SHA1_Init(&context))
return NULL;
unsigned char buf[K_READ_BUF_SIZE];
while (!feof(fp))
{
size_t total_read = fread(buf, 1, sizeof(buf), fp);
if(!SHA1_Update(&context, buf, total_read))
{
return NULL;
}
}
fclose(fp);
if(!SHA1_Final(sha1_digest, &context))
return NULL;
return sha1_digest;
}
It can be used as follows:
unsigned char *sha1digest = calculateSHA1("/tmp/file1");
The res variable contains the sha1 hash.
You can print it on the screen using the following for-loop:
char *sha1hash = (char *)malloc(sizeof(char) * 41);
sha1hash[40] = '\0';
int i;
for (i = 0; i < SHA_DIGEST_LENGTH; i++)
{
sprintf(&sha1hash[i*2], "%02x", sha1digest[i]);
}
printf("SHA1 HASH: %s\n", sha1hash);