Closed. This question needs debugging details. It is not currently accepting answers.
Edit the question to include desired behavior, a specific problem or error, and the shortest code necessary to reproduce the problem. This will help others answer the question.
Closed 4 years ago.
Improve this question
I have a task to convert a tool written in windows to run in Linux, so I have to write a function convert string to wstring or convert wstring to string in Linux.
Due to I am not familiar with c++,when I call Linux API, I have to change string to char* like function IsFileExist below.
If I remove function setlocale, error message below:
libc++abi.dylib: terminating with uncaught exception of type std::length_error: basic_string
Question: Is this correct to use setlocale? Actually after google still confused about this.
Here is the code:
#include <iostream>
#include <vector>
#include <sys/stat.h>
#include <unistd.h>
#include <string>
#include <fstream>
/*
string converts to wstring
*/
std::wstring s2ws(const std::string& src)
{
std::wstring res = L"";
size_t const wcs_len = mbstowcs(NULL, src.c_str(), 0);
std::vector<wchar_t> buffer(wcs_len + 1);
mbstowcs(&buffer[0], src.c_str(), src.size());
res.assign(buffer.begin(), buffer.end() - 1);
return res;
}
/*
wstring converts to string
*/
std::string ws2s(std::wstring const & src)
{
setlocale(LC_CTYPE, "");
std::string res = "";
size_t const mbs_len = wcstombs(NULL, src.c_str(), 0);
std::vector<char> buffer(mbs_len + 1);
wcstombs(&buffer[0], src.c_str(), buffer.size());
res.assign(buffer.begin(), buffer.end() - 1);
return res;
}
int IsFileExist(const std::wstring& name ) {
struct stat buffer;
/*convert wstring to string,then to C style char* */
std::string str = ws2s(name.c_str());
char *cstr = new char[str.length() + 1];
strncpy(cstr, str.c_str(),str.size());
/*judge if file exist*/
if(0 == stat(cstr,&buffer))
{
delete [] cstr;
return 1;
}
else
{
delete [] cstr;
return 0;
}
}
int main()
{
std::wstring str=L"chines中文œ∑®";
std::string res = ws2s(str);
std::cout<<res<<std::endl;
char dst[]="abcdef";
std::wstring fun = s2ws(dst);
std::wcout<<fun<<std::endl;
std::wstring file=L"/Users/coder52/Downloads/mac.zip";
std::cout << IsFileExist(file) << std::endl;
return 0;
}
You should not do it platform dependant. Please make it C++17 style and use std::filesystem::path for the path name and check the file existance with std::filesystem::exists. std::filesystem::path is able to handle both char* and wchar_t*.
Related
Looking for a working solution to the classic UTF8 to UTF32 in a stable and tested system.
Now I have the source to Unicode.org's
C code:
https://android.googlesource.com/platform/external/id3lib/+/master/unicode.org/ConvertUTF.c
https://android.googlesource.com/platform/external/id3lib/+/master/unicode.org/ConvertUTF.h
License:
https://android.googlesource.com/platform/external/id3lib/+/master/unicode.org/readme.txt
Using the following C++ which interfaces the C library code from above:
std::wstring Utf8_To_wstring(const std::string& utf8string)
{
if (utf8string.length()==0)
{
return std::wstring();
}
size_t widesize = utf8string.length();
if (sizeof(wchar_t) == 2)
{
std::wstring resultstring;
resultstring.resize(widesize, L'\0');
const UTF8* sourcestart = reinterpret_cast<const UTF8*>(utf8string.c_str());
const UTF8* sourceend = sourcestart + widesize;
UTF16* targetstart = reinterpret_cast<UTF16*>(&resultstring[0]);
UTF16* targetend = targetstart + widesize;
ConversionResult res = ConvertUTF8toUTF16(&sourcestart, sourceend, &targetstart, targetend, strictConversion);
if (res != conversionOK)
{
return std::wstring(utf8string.begin(), utf8string.end());
}
*targetstart = 0;
return std::wstring(resultstring.c_str());
}
else if (sizeof(wchar_t) == 4)
{
std::wstring resultstring;
resultstring.resize(widesize, L'\0');
const UTF8* sourcestart = reinterpret_cast<const UTF8*>(utf8string.c_str());
const UTF8* sourceend = sourcestart + widesize;
UTF32* targetstart = reinterpret_cast<UTF32*>(&resultstring[0]);
UTF32* targetend = targetstart + widesize;
ConversionResult res = ConvertUTF8toUTF32(&sourcestart, sourceend, &targetstart, targetend, lenientConversion);
if (res != conversionOK)
{
return std::wstring(utf8string.begin(), utf8string.end());
}
*targetstart = 0;
if(!resultstring.empty() && resultstring.size() > 0) {
std::wstring result = std::wstring(resultstring.c_str());
return result;
} else {
return std::wstring();
}
}
else
{
assert(false);
return L"";
}
return L"";
}
Now this code initially works however crashes soon after due to some issues in the above interfacing code. This interfacing code was adapted from open source code found on GitHub from a production project...
However crashes a few strings into the conversion, so I guess there's a overflow in this code
Does anyone have a good replacement or example code for a simple C++11/C++17 solution to convert a std::string to std::wstring to get UTF32 unicode values encoded
I have a working solution for UTF8 to UTF16 using C++17 Locale:
This seems to do the job for me to convert to the correct level of Unicode to enable extraction of character codes to int to load glyph codes correctly
#include <locale>
#include <codecvt>
#include <string>
std::wstring Utf8_To_wstring(const std::string& utf8string)
{
wstring_convert<codecvt_utf8_utf16<wchar_t>> converter;
wstring utf16;
try {
utf16 = converter.from_bytes(utf8string);
}
catch(range_error e)
{
// log / handle exp
}
return utf16;
}
I'm using this GetWindowsDirectoryA Windows API function to get the location for Windows folder.
#include <iostream>
#include <string>
#include <vector>
#ifdef __WIN32
#include <fcntl.h>
#include <io.h>
#include <Windows.h>
#include <sysinfoapi.h>
#endif
std::string GetOSFolder() {
std::vector<char> buffer(MAX_PATH + 1);
GetWindowsDirectoryA(buffer.data(), MAX_PATH);
std::string windowsRoot{ buffer.data(), buffer.size() };
return windowsRoot + "/SomeFolder";
}
int main() {
std::cout << GetOSFolder() << "\n";
}
I want to concrete a folder name with the returned Windows folder string result.
windowsRoot + "/SomeFolder"
Above attempt results the following string,
C:\Windows
/SomeFolder
This seems happening because the buffer size is set to MAX_PATH which is larger than the actual string.
Is there a way to construct the string from buffer with actual string size?
You could initialize the string with the path, you wouldn't even need to know the size (which is returned by GetWindowsDirectoryA as stated in the comment section), but it's advisable to use it given that it does optimize std::string initialization.
Example:
std::string GetOSFolder() {
char buffer[MAX_PATH + 1];
const auto size = GetWindowsDirectoryA(buffer, MAX_PATH);
if (size) {
return std::string(buffer, size).append("/SomeFolder");
}
return ""; // or however you want to handle the error
}
int main() {
std::cout << GetOSFolder() << "\n";
}
Or you could avoid the second variable for buffer but you then have to resize the original buffer:
std::string GetOSFolder() {
std::string buffer(MAX_PATH + 1, 0);
auto result = GetWindowsDirectoryA(buffer.data(), MAX_PATH); // or &buffer[0] prior to C++17
if (result) {
buffer.resize(result);
return buffer.append("/SomeFolder");
}
return buffer; // will be an empty string if API call fails
}
Now, if you want to avoid resize you could still use some trickery:
std::string GetOSFolder() {
const auto size = GetWindowsDirectoryA(nullptr , 0);
if (size) {
std::string buffer(size, 0);
const auto result = GetWindowsDirectoryA(buffer.data(), MAX_PATH); // or &buffer[0] prior to C++17
if (result) {
return buffer.append("\\OtherPath");
}
}
return "";
}
Here you call GetWindowsDirectoryA twice, the first one to know the size you need for your buffer, the second to actually retrieve the path.
Note that in this last option, the first call will return the length of the string with the null terminator whereas the second call will only return the length of the string without including the null byte as is typical in WIN32 API for this type of call, it's a well-known pattern.
I'm new to C++ and I have this issue. I have a string called DATA_DIR that I need for format into a wstring.
string str = DATA_DIR;
std::wstring temp(L"%s",str);
Visual Studio tells me that there is no instance of constructor that matches with the argument list. Clearly, I'm doing something wrong.
I found this example online
std::wstring someText( L"hello world!" );
which apparently works (no compile errors). My question is, how do I get the string value stored in DATA_DIR into the wstring constructor as opposed to something arbitrary like "hello world"?
Here is an implementation using wcstombs (Updated):
#include <iostream>
#include <cstdlib>
#include <string>
std::string wstring_from_bytes(std::wstring const& wstr)
{
std::size_t size = sizeof(wstr.c_str());
char *str = new char[size];
std::string temp;
std::wcstombs(str, wstr.c_str(), size);
temp = str;
delete[] str;
return temp;
}
int main()
{
std::wstring wstr = L"abcd";
std::string str = wstring_from_bytes(wstr);
}
Here is a demo.
This is in reference to the most up-voted answer but I don't have enough "reputation" to just comment directly on the answer.
The name of the function in the solution "wstring_from_bytes" implies it is doing what the original poster wants, which is to get a wstring given a string, but the function is actually doing the opposite of what the original poster asked for and would more accurately be named "bytes_from_wstring".
To convert from string to wstring, the wstring_from_bytes function should use mbstowcs not wcstombs
#define _CRT_SECURE_NO_WARNINGS
#include <iostream>
#include <cstdlib>
#include <string>
std::wstring wstring_from_bytes(std::string const& str)
{
size_t requiredSize = 0;
std::wstring answer;
wchar_t *pWTempString = NULL;
/*
* Call the conversion function without the output buffer to get the required size
* - Add one to leave room for the NULL terminator
*/
requiredSize = mbstowcs(NULL, str.c_str(), 0) + 1;
/* Allocate the output string (Add one to leave room for the NULL terminator) */
pWTempString = (wchar_t *)malloc( requiredSize * sizeof( wchar_t ));
if (pWTempString == NULL)
{
printf("Memory allocation failure.\n");
}
else
{
// Call the conversion function with the output buffer
size_t size = mbstowcs( pWTempString, str.c_str(), requiredSize);
if (size == (size_t) (-1))
{
printf("Couldn't convert string\n");
}
else
{
answer = pWTempString;
}
}
if (pWTempString != NULL)
{
delete[] pWTempString;
}
return answer;
}
int main()
{
std::string str = "abcd";
std::wstring wstr = wstring_from_bytes(str);
}
Regardless, this is much more easily done in newer versions of the standard library (C++ 11 and newer)
#include <locale>
#include <codecvt>
#include <string>
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
std::wstring wide = converter.from_bytes(narrow_utf8_source_string);
printf-style format specifiers are not part of the C++ library and cannot be used to construct a string.
If the string may only contain single-byte characters, then the range constructor is sufficient.
std::string narrower( "hello" );
std::wstring wider( narrower.begin(), narrower.end() );
The problem is that we usually use wstring when wide characters are applicable (hence the w), which are represented in std::string by multibyte sequences. Doing this will cause each byte of a multibyte sequence to translate to an sequence of incorrect wide characters.
Moreover, to convert a multibyte sequence requires knowing its encoding. This information is not encapsulated by std::string nor std::wstring. C++11 allows you to specify an encoding and translate using std::wstring_convert, but I'm not sure how widely supported it is of yet. See 0x....'s excellent answer.
The converter mentioned for C++11 and above has deprecated this specific conversion in C++17, and suggests using the MultiByteToWideChar function.
The compiler error (c4996) mentions defining _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING.
wstring temp = L"";
for (auto c : DATA_DIR)
temp.push_back(c);
I found this function. Could not find any predefined method to do this.
std::wstring s2ws(const std::string& s)
{
int len;
int slength = (int)s.length() + 1;
len = MultiByteToWideChar(CP_ACP, 0, s.c_str(), slength, 0, 0);
wchar_t* buf = new wchar_t[len];
MultiByteToWideChar(CP_ACP, 0, s.c_str(), slength, buf, len);
std::wstring r(buf);
delete[] buf;
return r;
}
std::wstring stemp = s2ws(myString);
I have some strings read from the database, stored in a char* and in UTF-8 format (you know, "á" is encoded as 0xC3 0xA1). But, in order to write them to a file, I first need to convert them to ANSI (can't make the file in UTF-8 format... it's only read as ANSI), so that my "á" doesn't become "á". Yes, I know some data will be lost (chinese characters, and in general anything not in the ANSI code page) but that's exactly what I need.
But the thing is, I need the code to compile in various platforms, so it has to be standard C++ (i.e. no Winapi, only stdlib, stl, crt or any custom library with available source).
Anyone has any suggestions?
A few days ago, somebody answered that if I had a C++11 compiler, I could try this:
#include <string>
#include <codecvt>
#include <locale>
string utf8_to_string(const char *utf8str, const locale& loc)
{
// UTF-8 to wstring
wstring_convert<codecvt_utf8<wchar_t>> wconv;
wstring wstr = wconv.from_bytes(utf8str);
// wstring to string
vector<char> buf(wstr.size());
use_facet<ctype<wchar_t>>(loc).narrow(wstr.data(), wstr.data() + wstr.size(), '?', buf.data());
return string(buf.data(), buf.size());
}
int main(int argc, char* argv[])
{
string ansi;
char utf8txt[] = {0xc3, 0xa1, 0};
// I guess you want to use Windows-1252 encoding...
ansi = utf8_to_string(utf8txt, locale(".1252"));
// Now do something with the string
return 0;
}
Don't know what happened to the response, apparently someone deleted it. But, turns out that it is the perfect solution. To whoever posted, thanks a lot, and you deserve the AC and upvote!!
If you mean ASCII, just discard any byte that has bit 7 set, this will remove all multibyte sequences. Note that you could create more advanced algorithms, like removing the accent from the "á", but that would require much more work.
This should work:
#include <string>
#include <codecvt>
using namespace std::string_literals;
std::string to_utf8(const std::string& str, const std::locale& loc = std::locale{}) {
using wcvt = std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t>;
std::u32string wstr(str.size(), U'\0');
std::use_facet<std::ctype<char32_t>>(loc).widen(str.data(), str.data() + str.size(), &wstr[0]);
return wcvt{}.to_bytes(wstr.data(),wstr.data() + wstr.size());
}
std::string from_utf8(const std::string& str, const std::locale& loc = std::locale{}) {
using wcvt = std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t>;
auto wstr = wcvt{}.from_bytes(str);
std::string result(wstr.size(), '0');
std::use_facet<std::ctype<char32_t>>(loc).narrow(wstr.data(), wstr.data() + wstr.size(), '?', &result[0]);
return result;
}
int main() {
auto s0 = u8"Blöde C++ Scheiße äöü!!1Elf"s;
auto s1 = from_utf8(s0);
auto s2 = to_utf8(s1);
return 0;
}
For VC++:
#include <string>
#include <codecvt>
using namespace std::string_literals;
std::string to_utf8(const std::string& str, const std::locale& loc = std::locale{}) {
using wcvt = std::wstring_convert<std::codecvt_utf8<int32_t>, int32_t>;
std::u32string wstr(str.size(), U'\0');
std::use_facet<std::ctype<char32_t>>(loc).widen(str.data(), str.data() + str.size(), &wstr[0]);
return wcvt{}.to_bytes(
reinterpret_cast<const int32_t*>(wstr.data()),
reinterpret_cast<const int32_t*>(wstr.data() + wstr.size())
);
}
std::string from_utf8(const std::string& str, const std::locale& loc = std::locale{}) {
using wcvt = std::wstring_convert<std::codecvt_utf8<int32_t>, int32_t>;
auto wstr = wcvt{}.from_bytes(str);
std::string result(wstr.size(), '0');
std::use_facet<std::ctype<char32_t>>(loc).narrow(
reinterpret_cast<const char32_t*>(wstr.data()),
reinterpret_cast<const char32_t*>(wstr.data() + wstr.size()),
'?', &result[0]);
return result;
}
int main() {
auto s0 = u8"Blöde C++ Scheiße äöü!!1Elf"s;
auto s1 = from_utf8(s0);
auto s2 = to_utf8(s1);
return 0;
}
#include <stdio.h>
#include <string>
#include <codecvt>
#include <locale>
#include <vector>
using namespace std;
std::string utf8_to_string(const char *utf8str, const locale& loc){
// UTF-8 to wstring
wstring_convert<codecvt_utf8<wchar_t>> wconv;
wstring wstr = wconv.from_bytes(utf8str);
// wstring to string
vector<char> buf(wstr.size());
use_facet<ctype<wchar_t>>(loc).narrow(wstr.data(), wstr.data() + wstr.size(), '?', buf.data());
return string(buf.data(), buf.size());
}
int main(int argc, char* argv[]){
std::string ansi;
char utf8txt[] = {0xc3, 0xa1, 0};
// I guess you want to use Windows-1252 encoding...
ansi = utf8_to_string(utf8txt, locale(".1252"));
// Now do something with the string
return 0;
}
char cmd[40];
driver = FuncGetDrive(driver);
sprintf_s(cmd, "%c:\\test.exe", driver);
I cannot use cmd in
sei.lpFile = cmad;
so,
how to convert char array to wchar_t array ?
Just use this:
static wchar_t* charToWChar(const char* text)
{
const size_t size = strlen(text) + 1;
wchar_t* wText = new wchar_t[size];
mbstowcs(wText, text, size);
return wText;
}
Don't forget to call delete [] wCharPtr on the return result when you're done, otherwise this is a memory leak waiting to happen if you keep calling this without clean-up. Or use a smart pointer like the below commenter suggests.
Or use standard strings, like as follows:
#include <cstdlib>
#include <cstring>
#include <string>
static std::wstring charToWString(const char* text)
{
const size_t size = std::strlen(text);
std::wstring wstr;
if (size > 0) {
wstr.resize(size);
std::mbstowcs(&wstr[0], text, size);
}
return wstr;
}
From MSDN:
#include <iostream>
#include <stdlib.h>
#include <string>
using namespace std;
using namespace System;
int main()
{
char *orig = "Hello, World!";
cout << orig << " (char *)" << endl;
// Convert to a wchar_t*
size_t origsize = strlen(orig) + 1;
const size_t newsize = 100;
size_t convertedChars = 0;
wchar_t wcstring[newsize];
mbstowcs_s(&convertedChars, wcstring, origsize, orig, _TRUNCATE);
wcscat_s(wcstring, L" (wchar_t *)");
wcout << wcstring << endl;
}
From your example using swprintf_s would work
wchar_t wcmd[40];
driver = FuncGetDrive(driver);
swprintf_s(wcmd, "%C:\\test.exe", driver);
Note the C in %C has to be written with uppercase since driver is a normal char and not a wchar_t.
Passing your string to swprintf_s(wcmd,"%S",cmd) should also work