Cannot read to file(exist) with UNICODE - c++

I have a project which need to read path of SysData file.I want to move SysData file which contains "ç","ş","ğ" path way but cannot read this char.I have to read with UNICODE(like that utf-8).
There is code;
bool TSimTextFileStream::ReadLine ( mstring * str )
{
*str = "";
char c = ' ';
bool first = true;
// while ( read ( hFile, &c, 1 ) )
while ( fread ( &c, 1, 1, hFile ) )
{
if (first) first = false;
#ifdef __linux__
if ( c == 13 )
continue;
else
if ( c == 10 )
break;
else
*str += c;
#else
if( c == 13 || c == 10)
break;
else
*str += c;
#endif
}
return !first;
}
And there is code, calling this method;
mstring GetSysDataDirectory ( )
{
static mstring sysDataDir = "";
if ( sysDataDir == "" )
{
if (mIsEnvironmentVarExist("SYSDATAPATH"))
{
mstring folder = mGetEnvVar("SYSDATAPATH");
if (folder.size() == 0)
{
folder = mGetCurrentDir ( ) + "/SysData";
}
sysDataDir = folder;
}
else if ( mIsFileExist ( "SysDataPath.dat" ) )
{
TSimTextFileStream txtfile;
txtfile.OpenFileForRead( "SysDataPath.dat" );
mstring folder;
if ( txtfile.ReadLine( &folder ) )
{
sysDataDir = folder;
}
else
{
sysDataDir = mGetCurrentDir ( ) + "/SysData";
}
}
else
{
sysDataDir = mGetCurrentDir ( ) + "/SysData";
}
}
return sysDataDir;
}
I search and find some solution but not work, like that;
bool TSimTextFileStream::OpenFileForRead(mstring fname)
{
if (hFile != NULL) CloseFile();
hFile = fopen(fname.c_str(), "r,ccs=UNICODE");
if (hFile == NULL) return false; else return true;
}
and tried this;
hFile = fopen(fname.c_str(), "r,ccs=UTF-8");
But not work again. Can you help me please?
enter image description here
This situation is my problem :((

Windows does not support UTF-8 encoded path names for fopen:
The fopen function opens the file that is specified by filename. By
default, a narrow filename string is interpreted using the ANSI
codepage (CP_ACP).
Source.
Instead, a second function, called _wfopen is provided, which accepts a wide-character string as path argument.
Similar restrictions apply when using the C++ fstreams for File I/O.
So the only way for you to solve this is by converting your UTF-8 encoded pathname either to the system codepage or to a wide character string.

fopen usually reads unicode chars. try to change the files encoding

Related

zlib minizip unpacked executables are corrupted

I'm trying to use miniunzip to extract some files. It works on Linux. On Windows, it throws no errors, but if the file is executable, the resulting binary doesn't work. I get a message window with a message about incompatibility with 64-bit Windows. If I use another utility, such as 7-zip, to unpack it, everything works fine, so the problem is here in my code. Here is the class method that does all the work.
bool FileHandler::unzip( string inputFile, string outputDirectory )
{
if (!fileExists(inputFile)) {
this->errorMessage = "Can't find file at " + inputFile;
return false;
}
unzFile zipFile = unzOpen(inputFile.c_str());
if( zipFile == nullptr ){
this->errorMessage = "FileHandler::unzip failed to open input file";
return false;
}
vector<string> files;
vector<string> folders;
unz_global_info globalInfo;
int err = unzGetGlobalInfo( zipFile, &globalInfo );
if (unzGoToFirstFile(zipFile) != UNZ_OK) {
this->errorMessage = "FileHandler::unzip failed calling unzGoToFirstFile";
return false;
}
for ( unsigned long i=0; i < globalInfo.number_entry && err == UNZ_OK; i++ ){
char filename[FILENAME_MAX];
unz_file_info subFileInfo;
err = unzGetCurrentFileInfo( zipFile, &subFileInfo, filename,
sizeof(filename), NULL, 0, NULL, 0);
if ( err == UNZ_OK )
{
char nLast = filename[subFileInfo.size_filename-1];
if ( nLast =='/' || nLast == '\\' )
{
folders.push_back(filename);
}
else
{
files.push_back(filename);
}
err = unzGoToNextFile(zipFile);
}
}
for ( string & folder : folders ){
string strippedFolder = folder.substr(0, folder.length()-1);
string dirPath = normalizePath(outputDirectory+"/"+strippedFolder);
if( ! makeDirectory( dirPath ) ){
this->errorMessage = "FileHandler::unzip Failed to create directory "+dirPath;
return false;
}
}
for ( auto it = files.begin(); it != files.end(); it++ ){
if( zipFile == 0 ){
this->errorMessage = "FileHandler::unzip invalid unzFile object at position 1";
return false;
}
string filename = *it;
//string filepath = outputDirectory + "/" + *it;
string filepath = normalizePath( outputDirectory + "/" + *it );
const char * cFile = filename.c_str();
const char * cPath = filepath.c_str();
int err = unzLocateFile( zipFile, cFile, 0 );
if ( err != UNZ_OK ){
this->errorMessage = "FileHandler::unzip error locating sub-file.";
return false;
}
err = unzOpenCurrentFile( zipFile );
if( err != UNZ_OK ){
this->errorMessage = "FileHandler::unzip error opening current file";
return false;
}
ofstream fileStream{ cPath };
// Need an ostream object here.
if( fileStream.fail() ){
this->errorMessage = "FileHandler::unzip error opening file stream at "+string(cPath);
return false;
}
unz_file_info curFileInfo;
err = unzGetCurrentFileInfo( zipFile, &curFileInfo, 0, 0, 0, 0, 0, 0);
if ( err != UNZ_OK )
{
this->errorMessage = "FileHandler::unzip failed to read size of file";
return false;
}
unsigned int size = (unsigned int)curFileInfo.uncompressed_size;
char * buf = new char[size];
size = unzReadCurrentFile( zipFile, buf, size );
if ( size < 0 ){
this->errorMessage = "FileHandler::unzip unzReadCurrentFile returned an error. ";
return false;
}
fileStream.write( buf, size );
fileStream.flush();
delete [] buf;
fileStream.close();
#ifndef _WIN32
vector<string> parts = splitString(filename, ".");
if( parts.size() == 1 ){ // In linux, assume file without extension is executable
mode_t old_mask = umask( 000 );
chmod( cPath, S_IRWXU|S_IRWXG|S_IROTH|S_IXOTH );
umask( old_mask );
}
#endif
unzCloseCurrentFile( zipFile );
}
unzClose(zipFile);
return true;
}
std::ostream opens files in text mode by default, you need to make it use binary mode instead.
On Linux there doesn't seem to be any difference between text and binary modes. But on Windows, attempting to write \n into a text file produces \r\n, currupting your data.
You need to change this line
ofstream fileStream{ cPath };
to
ofstream fileStream{ cPath, ostream::out | ostream::binary };

C++ program opens file corectly on Linux but not on Windows

I compiled a Linux program on Windows via Mingw but the output is wrong.
Error description:
The output of the program looks different on Windows than on Linux. This is how it looks on Windows:
>tig_2
CAATCTTCAGAGTCCAGAGTGGGAGGCACAGACTACAGAAAATGAGCAGCGGGGCTGGTA
>cluster_1001_conTTGGTGAAGAGAATTTGGACATGGATGAAGGCTTGGGCTTGACCATGCGAAGG
Expected output:
>cluster_1001_contig2
CAATCTTCAGAGTCCAGAGTGGGAGGCACAGACTACAGAAAATGAGCAGCGGGGCTGGTA
>cluster_1001_contig1
TTGGTGAAGAGAATTTGGACATGGATGAAGGCTTGGGCTTGACCATGCGAAGG
(Note: the output is very large to paste it here so the examples above are pseudo-real).
Possible cause:
I have observed that if I convert the enter characters the input file from Linux (LF) to Windows (CRLF) it almost works: the first character (>) in file is missing. The same code works perfectly on Linux without any enter conversion. So, the problem must be in the function that is parsing the input not in the one that writes the output:
seq_db.Read( db_in.c_str(), options );
Source code:
This is the piece that is parsing the input file. Anyway, I might me wrong. The fault might be in other place. In case it is needed, the FULL source code is here :)
void SequenceDB::Read( const char *file, const Options & options )
{
Sequence one;
Sequence dummy;
Sequence des;
Sequence *last = NULL;
FILE *swap = NULL;
FILE *fin = fopen( file, "r" );
char *buffer = NULL;
char *res = NULL;
size_t swap_size = 0;
int option_l = options.min_length;
if( fin == NULL ) bomb_error( "Failed to open the database file" );
if( options.store_disk ) swap = OpenTempFile( temp_dir );
Clear();
dummy.swap = swap;
buffer = new char[ MAX_LINE_SIZE+1 ];
while (not feof( fin ) || one.size) { /* do not break when the last sequence is not handled */
buffer[0] = '>';
if ( (res=fgets( buffer, MAX_LINE_SIZE, fin )) == NULL && one.size == 0) break;
if( buffer[0] == '+' ){
int len = strlen( buffer );
int len2 = len;
while( len2 && buffer[len2-1] != '\n' ){
if ( (res=fgets( buffer, MAX_LINE_SIZE, fin )) == NULL ) break;
len2 = strlen( buffer );
len += len2;
}
one.des_length2 = len;
dummy.des_length2 = len;
fseek( fin, one.size, SEEK_CUR );
}else if (buffer[0] == '>' || buffer[0] == '#' || (res==NULL && one.size)) {
if ( one.size ) { // write previous record
one.dat_length = dummy.dat_length = one.size;
if( one.identifier == NULL || one.Format() ){
printf( "Warning: from file \"%s\",\n", file );
printf( "Discarding invalid sequence or sequence without identifier and description!\n\n" );
if( one.identifier ) printf( "%s\n", one.identifier );
printf( "%s\n", one.data );
one.size = 0;
}
one.index = dummy.index = sequences.size();
if( one.size > option_l ) {
if ( swap ) {
swap_size += one.size;
// so that size of file < MAX_BIN_SWAP about 2GB
if ( swap_size >= MAX_BIN_SWAP) {
dummy.swap = swap = OpenTempFile( temp_dir );
swap_size = one.size;
}
dummy.size = one.size;
dummy.offset = ftell( swap );
dummy.des_length = one.des_length;
sequences.Append( new Sequence( dummy ) );
one.ConvertBases();
fwrite( one.data, 1, one.size, swap );
}else{
//printf( "==================\n" );
sequences.Append( new Sequence( one ) );
//printf( "------------------\n" );
//if( sequences.size() > 10 ) break;
}
//if( sequences.size() >= 10000 ) break;
}
}
one.size = 0;
one.des_length2 = 0;
int len = strlen( buffer );
int len2 = len;
des.size = 0;
des += buffer;
while( len2 && buffer[len2-1] != '\n' ){
if ( (res=fgets( buffer, MAX_LINE_SIZE, fin )) == NULL ) break;
des += buffer;
len2 = strlen( buffer );
len += len2;
}
size_t offset = ftell( fin );
one.des_begin = dummy.des_begin = offset - len;
one.des_length = dummy.des_length = len;
int i = 0;
if( des.data[i] == '>' || des.data[i] == '#' || des.data[i] == '+' ) i += 1;
if( des.data[i] == ' ' or des.data[i] == '\t' ) i += 1;
if( options.des_len and options.des_len < des.size ) des.size = options.des_len;
while( i < des.size and ( des.data[i] != '\n') ) i += 1;
des.data[i] = 0;
one.identifier = dummy.identifier = des.data;
} else {
one += buffer;
}
}
#if 0
int i, n = 0;
for(i=0; i<sequences.size(); i++) n += sequences[i].bufsize + 4;
cout<<n<<"\t"<<sequences.capacity() * sizeof(Sequence)<<endl;
int i;
scanf( "%i", & i );
#endif
one.identifier = dummy.identifier = NULL;
delete[] buffer;
fclose( fin );
}
The format of the input file is like this:
> comment
ACGTACGTACGTACGTACGTACGTACGTACGT
> comment
ACGTACGTACGTACGTACGTACGTACGTACGT
> comment
ACGTACGTACGTACGTACGTACGTACGTACGT
etc
The issue is more than likely you need to open the file using the "rb" switch in the call to fopen. The "rb" opens the file in binary mode, as opposed to "r", which opens a file in "text" mode.
Since you're going back and forth between Linux and Windows, the end-of-line characters will be different. If you open the file as "text" in Windows, but the file was formatted for Linux, you're lying to Windows that it is a text file. So the runtime will do CR/LF conversion all wrong.
Therefore you should open the file as binary, "rb" so that the CR/LF translation isn't done.

Programmatically modify/create vcproj files

I have a visual c++ project file (vcproj) and i want to programmatically modify it in order to add additional include directories or link libraries.
One solution could be to parse the vcproj as an XML file and modify it. There is any other API to make this easier?
Here is some code I wrote for fetching from a Visual Studio solution (.sln) all the projects it contains, and from each project, all the file (including full paths) it contain.
static strList parseSolution( const char * solName )
{
strList result;
static char drive[_MAX_DRIVE];
static char somepath[_MAX_PATH];
static char buffer[_MAX_PATH];
static char path[_MAX_PATH];
static char ext[_MAX_EXT];
_splitpath( solName, drive, somepath, buffer, ext );
FILE * f = fopen( solName, "r" );
if( NULL == f )
{
printf("ERROR: Solution %s is missing or unavailable.\n", solName );
exit(1);
}
while( !feof(f) )
{
char * res = fgets( buffer, sizeof(buffer), f );
if( NULL == res )
continue;
if( NULL != strstr(buffer, "Project(") )
{
char * ptrName = strchr( buffer, '=' );
char * ptrFile = strchr( ptrName, ',' );
*ptrFile++ = 0;
char * ptrEnd = strchr( ptrFile, ',' );
*ptrEnd++ = 0;
while( ('=' == *ptrName)
||(' ' == *ptrName)
||('"' == *ptrName) ) ptrName++;
if( '"' == ptrName[strlen(ptrName)-1] )
ptrName[strlen(ptrName)-1] = 0;
while( (' ' == *ptrFile)
||('"' == *ptrFile) ) ptrFile++;
if( '"' == ptrFile[strlen(ptrFile)-1] )
ptrFile[strlen(ptrFile)-1] = 0;
_makepath( path, drive, somepath, ptrFile, NULL );
result.push_back( std::string(path) );
}
}
fclose(f);
return result;
}
/**
* Parse project and extract fullpath source filename from project.
*/
static strList parseProject( const char * projName )
{
strList result;
static char drive[_MAX_DRIVE];
static char somepath[_MAX_PATH];
static char buffer[_MAX_PATH];
static char path[_MAX_PATH];
static char ext[_MAX_EXT];
_splitpath( projName, drive, somepath, buffer, ext );
FILE * f = fopen( projName, "r" );
if( NULL == f )
{
printf("ERROR: Project %s is missing or unavailable.\n", projName );
exit(1);
}
while( !feof(f) )
{
char * res = fgets( buffer, sizeof(buffer), f );
if( NULL == res )
continue;
if( (NULL != strstr(buffer, "<ClInclude Include="))
||(NULL != strstr(buffer, "<ClCompile Include=")) )
{
char * ptrName = strchr( buffer, '=' );
char * ptrName1 = strstr( buffer, "/>" );
if( NULL != ptrName1 ) *ptrName1 = 0;
while( ('=' == *ptrName)
||(' ' == *ptrName)
||('"' == *ptrName) ) ptrName++;
while( ('"' == ptrName[strlen(ptrName)-1])
||(' ' == ptrName[strlen(ptrName)-1])
||('\n' == ptrName[strlen(ptrName)-1]))
ptrName[strlen(ptrName)-1] = 0;
_makepath( path, drive, somepath, ptrName, NULL );
result.push_back( std::string(path) );
}
}
fclose(f);
return result;
}
/**
* Recoding source file.
*/
Using these functions you can work on each file, or work on the projects themselves.
strList projectList = parseSolution( argv[1] );
strList::iterator itProj = projectList.begin();
while( itProj != projectList.end() )
{
printf("Project: %s\n", itProj->c_str());
strList fileName = parseProject( itProj->c_str() );
strList::iterator itFile = fileName.begin();
while( itFile != fileName.end() )
{
printf(" File %s\n", itFile->c_str());
// do something with the project file
itFile++;
}
fileName.clear();
itProj++;
}

Environment PATH Directories Iteration

I cant find any code (neither C nor C++ Boost.Filsystem) on how to iterate (parse) the directories present in the PATH environment variable in preferrably in a platform-independent way. It is not so hard to write but I want to reuse standard modules if they are available. Links or suggestions anyone?
This is what I used before:
const vector<string>& get_environment_PATH()
{
static vector<string> result;
if( !result.empty() )
return result;
#if _WIN32
const std::string PATH = convert_to_utf8( _wgetenv(L"PATH") ); // Handle Unicode, just remove if you don't want/need this. convert_to_utf8 uses WideCharToMultiByte in the Win32 API
const char delimiter = ';';
#else
const std::string PATH = getenv( "PATH" );
const char delimiter = ':';
#endif
if( PATH.empty() )
throw runtime_error( "PATH should not be empty" );
size_t previous = 0;
size_t index = PATH.find( delimiter );
while( index != string::npos )
{
result.push_back( PATH.substr(previous, index-previous));
previous=index+1;
index = PATH.find( delimiter, previous );
}
result.push_back( PATH.substr(previous) );
return result;
}
This only "calculates" the thing once per program run. It's not really thread-safe either, but heck, nothing environment-related is.
Here is my own code snippet without advanced boost libraries:
if( exe.GetLength() )
{
wchar_t* pathEnvVariable = _wgetenv(L"PATH");
for( wchar_t* pPath = wcstok( pathEnvVariable, L";" ) ; pPath ; pPath = wcstok( nullptr, L";" ) )
{
CStringW exePath = pPath;
exePath += L"\\";
exePath += exe;
if( PathFileExists(exePath) )
{
exe = exePath;
break;
}
} //for
} //if

recursive file search

I'm trying to figure out how to work this thing out .. For some reason, it ends at a certain point.. I'm not very good at recursion and I'm sure the problem lies somewhere there..
Also, even if I checked for cFileName != "..", it still shows up at the end, not sure why but the "." doesn't show up anymore..
void find_files( wstring wrkdir )
{
wstring temp;
temp = wrkdir + L"\\" + L"*";
fHandle = FindFirstFile( temp.c_str(), &file_data );
if( fHandle == INVALID_HANDLE_VALUE )
{
return;
}
else
{
while( FindNextFile( fHandle, &file_data ) )
{
if( file_data.dwFileAttributes == FILE_ATTRIBUTE_DIRECTORY &&
wcscmp(file_data.cFileName, L".") != 0 &&
wcscmp(file_data.cFileName, L"..") != 0 )
{
find_files( wrkdir + L"\\" + file_data.cFileName );
}
else if( file_data.dwFileAttributes != FILE_ATTRIBUTE_HIDDEN &&
file_data.dwFileAttributes != FILE_ATTRIBUTE_SYSTEM )
{
results << wrkdir << "\\" << file_data.cFileName << endl;
}
}
}
}
After changing those, the program doesn't enumerate the remaining files left..
For example, if there is a sub folder named test, it enumerates everything inside test but doesn't finish enumerating the files inside the original directory specified.
From the FindFirstFile documentation:
If the function fails or fails to
locate files from the search string in
the lpFileName parameter, the return
value is INVALID_HANDLE_VALUE and the
contents of lpFindFileData are
indeterminate.
You should only exit from the one iteration not the whole program:
if( fHandle == INVALID_HANDLE_VALUE )
{
return;
}
And this may solve your other problem:
else if( file_data.dwFileAttributes != FILE_ATTRIBUTE_HIDDEN &&
file_data.dwFileAttributes != FILE_ATTRIBUTE_SYSTEM &&
wcscmp(file_data.cFileName, L".") != 0 &&
wcscmp(file_data.cFileName, L"..") != 0
)
{
results << wrkdir << "\\" << file_data.cFileName << endl;
}
Also see #fretje's answer as well. It gives another problem that your code has.
Updated new: You need to use fHandle as a local variable as well, not global variable.
Change to:
HANDLE fHandle = FindFirstFile( temp.c_str(), &file_data );
You are changing the value of your local wrkdir variable:
wrkdir = wrkdir + L"\\" + file_data.cFileName;
find_files( wrkdir );
I think you have to call find_files there like this:
find_files( wrkdir + L"\\" + file_data.cFileName );
and not change the value of wrkdir.
There are still several bugs in your code. Try this instead:
void find_files( wstring wrkdir )
{
wstring wrkdirtemp = wrkdir;
if( !wrkdirtemp.empty() && (wrkdirtemp[wrkdirtemp.length()-1] != L'\\') )
{
wrkdirtemp += L"\\";
}
WIN32_FIND_DATA file_data = {0};
HANDLE hFile = FindFirstFile( (wrkdirtemp + L"*").c_str(), &file_data );
if( hFile == INVALID_HANDLE_VALUE )
{
return;
}
do
{
if( file_data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY )
{
if( (wcscmp(file_data.cFileName, L".") != 0) &&
(wcscmp(file_data.cFileName, L"..") != 0) )
{
find_files( wrkdirtemp + file_data.cFileName );
}
}
else
{
if( (file_data.dwFileAttributes & (FILE_ATTRIBUTE_HIDDEN | FILE_ATTRIBUTE_SYSTEM)) == 0 )
{
results << wrkdirtemp << file_data.cFileName << endl;
}
}
}
while( FindNextFile( hFile, &file_data );
FindClose( hFile );
}
Recursive file search with dirent.h
#include <iostream>
#include <dirent.h>
#include <string.h>
bool isUpDirecory(const char* directory) {
if (strcmp(directory, "..") == 0 || strcmp(directory, ".") == 0)
return true;
else
return false;
}
bool findFile(const std::string& fileName, const std::string& path,
std::string& resultPath) {
dirent* entry;
DIR* dir = opendir(path.c_str());
if (dir == NULL)
return false;
while ((entry = readdir(dir)) != NULL) {
if (entry->d_type == DT_REG) {
if (fileName.compare(entry->d_name) == 0) {
resultPath = path + "/" + entry->d_name;
closedir(dir);
return true;
}
}
}
rewinddir(dir);
while ((entry = readdir(dir)) != NULL) {
if (entry->d_type == DT_DIR) {
if (!isUpDirecory(entry->d_name)) {
std::string nextDirectoryPath = path + "/" + entry->d_name;
bool result = findFile(fileName, nextDirectoryPath, resultPath);
if (result == true) {
closedir(dir);
return true;
}
}
}
}
closedir(dir);
return false;
}
int main() {
std::string path;
bool result = findFile("text.txt", "/home/lamerman/", path);
std::cout << path << std::endl;
return 0;
}
Also, check out the implementation of the CFileFind MFC class.
You still have errors in your code:
you ignore the results of the first search. you call FindFirstFile and handle if it fails. But if it succeeds you do not process already fetched file_data and overwrite it with FindNextFile.
You don't close the search handle. Use FindClose for that.
From your existing code it seems that fHandle is global - it shouldn't. It would break your recursion.
Also I think that you can resolve all the issues in your code by paying more attention to MSDN sample provided in FindFirstFile documentation.