How recursively find files with Unicode names in C++? - c++

I found and modify solution from here:
#include <iostream>
#include <windows.h>
#include <vector>
#include <fstream>
using namespace std;
//wofstream out;
void FindFile(const std::wstring &directory)
{
std::wcout << endl << endl << endl << "FindFile(" << directory << ")" << std::endl;
std::wstring tmp = directory + L"\\*";
WIN32_FIND_DATAW file;
HANDLE search_handle = FindFirstFileW(tmp.c_str(), &file);
if (search_handle != INVALID_HANDLE_VALUE)
{
std::vector<std::wstring> directories;
do
{
std::wcout << std::endl;
std::wcout << " [" << file.cFileName << "]" << std::endl;
tmp = directory + L"\\" + std::wstring(file.cFileName);
if (file.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)
{
if ((!lstrcmpW(file.cFileName, L".")) || (!lstrcmpW(file.cFileName, L".."))) {
std::wcout << "continuing..." << std::endl;
}
else {
std::wcout << "saving path to this directory" << std::endl;
directories.push_back(tmp);
}
} else {
std::wcout << "save [" << tmp << "] as file" << std::endl;
}
//std::wcout << tmp << std::endl;
//out << tmp << std::endl;
}
while (FindNextFileW(search_handle, &file));
std::wcout << "all items inside current directory was worked out. close it's handle." << std::endl;
FindClose(search_handle);
for(std::vector<std::wstring>::iterator iter = directories.begin(), end = directories.end(); iter != end; ++iter) {
std::wcout << "recursively find in next directory: [" << *iter << "]" << std::endl;
FindFile(*iter);
}
} else {
std::wcout << "invalid handle value" << std::endl;
}
}
int main()
{
//out.open("C:\\temp\\found.txt");
FindFile(L"C:\\test");
//out.close();
cout << "The end" << endl;
string str;
cin >> str;
return 0;
}
but this code isn't working with folders or files with cyrillic names (but I use Unicode versions of all types and functions!)
Update: Application just finish, without any exceptions, as if all commands was executed.
Update-2 (print-screen):
Who had the same problem? Thanks for any help.
SOLVED
Thanks a lot to #zett42 !
After some refactoring working code looks like:
#include <iostream>
#include <windows.h>
#include <vector>
#include <fstream>
#include <io.h>
#include <fcntl.h>
using namespace std;
vector<wstring> FindFiles(const std::wstring &directory) {
vector<wstring> files;
std::vector<std::wstring> directories;
std::wstring fullPath = directory + L"\\*";
WIN32_FIND_DATAW file;
HANDLE search_handle = FindFirstFileW(fullPath.c_str(), &file);
if (search_handle == INVALID_HANDLE_VALUE)
return files;
do
{
fullPath = directory + L"\\" + std::wstring(file.cFileName);
if (!(file.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY))
files.push_back(fullPath);
else {
if ((lstrcmpW(file.cFileName, L".")) && (lstrcmpW(file.cFileName, L"..")))
directories.push_back(fullPath);
}
}
while (FindNextFileW(search_handle, &file));
FindClose(search_handle);
for(std::vector<std::wstring>::iterator iter = directories.begin(), end = directories.end(); iter != end; ++iter) {
vector<wstring> newFiles = FindFiles(*iter);
files.insert(files.begin(), newFiles.begin(), newFiles.end());
}
return files;
}
int main()
{
_setmode( _fileno(stdout), _O_U16TEXT );
vector<wstring> files = FindFiles(L"E:\\test");
wcout << L"All found files: " << endl;
for (int i = 0; i < files.size(); ++i)
wcout << files[i] << endl;
cout << "The end" << endl;
string str;
cin >> str;
return 0;
}

On Windows, Unicode output to console doesn't work by default, even if you use std::wcout.
To make it work, insert the following line at the beginning of your program:
_setmode( _fileno(stdout), _O_U16TEXT );
_setmode and _fileno are Microsoft specific function.
You may also have to change console font. I'm using Lucida Console which works fine for cyrillic letters.
Complete example:
#include <iostream>
#include <io.h> // _setmode()
#include <fcntl.h> // _O_U16TEXT
int main()
{
// Windows needs a little non-standard magic for Unicode console output.
_setmode( _fileno(stdout), _O_U16TEXT );
std::wcout << L"по русски\n";
}
Example should be saved as UTF-8 encoded file because of the Unicode string literal, but this is not relevant in your case because you don't have Unicode string literals.
I have successfully tested this code under MSVC2015 and MSVC2017 on Win10.

Related

When I run the code, the system gives two error warnings, I wonder what is causing this?

#include <string>
#include <filesystem>
#include <iomanip>
#include <iostream>
#include <regex>
namespace fs = std::filesystem;
using namespace std;
int main(int argc, char *argv[]) {
if (argc != 2 && argc != 3) {
cerr << "Usage: " << argv[0] << " <path> [<regex>]\n";
return 1;
}
fs::path const base = argv[1];
char const* re = ".*";
regex compiled_re(re);
for(auto const& entry : fs::recursive_directory_iterator(base,fs::directory_options::skip_permission_denied)) {
smatch m;
if (regex_match(entry.path().filename().native(), m, compiled_re)) {
string const s = entry.path().lexically_relative(base).native();
if (fs::is_symlink(entry)) {
cout << "LINK: " << quoted(s) << '\n';
}
else if (fs::is_regular_file(entry)) {
cout << "FILE: " << fs::file_size(entry) << ' ' << quoted(s) << '\n';
}
else if (fs::is_directory(entry)) {
cout << "DIR: " << quoted(s) << '\n';
}
else {
cout << "OTHER: " << quoted(s) << '\n';
}
}
}
return 0;
}
when i run the code, it have two error
no matching function for call to 'regex_match(const std::filesystem::__cxx11::path::string_type&, std::__cxx11::smatch&, std::__cxx11::regex&)'
conversion from 'basic_string<wchar_t>' to non-scalar type 'basic_string' requested
What causes these errors to appear?
You are compiling the code on Windows, correct?
On this platform, std::filesystem::path::native() returns a std::wstring. But you are using narrow string regex – that doesn't fit together. You can should probably switch to using wide-string regex:
wchar_t const* re = L".*";
wregex compiled_re(re);
for(auto const& entry : fs::recursive_directory_iterator(base,fs::directory_options::skip_permission_denied)) {
wsmatch m;
if (regex_match(entry.path().filename().native(), m, compiled_re)) {
wstring const s = entry.path().lexically_relative(base).native();
But you can't pass these wide string to cout either, but you can fix that by using wcout instead of cout

Understanding ICU ubidi. Direction is always UBIDI_LTR

I have written a piece of sample code referring to ICU reference, to read a line from a file and get it's base direction and see the result of Unicode Bidi algorithm on it.
in my input file I have written فارسی which is a sequence of right to left characters.
but this line std::cout << ubidi_getBaseDirection(us.getBuffer(), us.length()) << std::endl; prints 0 which is UBIDI_LTR.
And no matter what combination of characters (RTL and LTR combinations) I give in the input file, it will always have one run with direction UBIDI_LTR.
Is there something wrong with my code?
#include "unicode/utypes.h"
#include "unicode/uchar.h"
#include "unicode/localpointer.h"
#include "unicode/ubidi.h"
#include <unicode/unistr.h>
#include<string>
#include<iostream>
#include <fstream>
#include "unicode/ustream.h"
using namespace icu;
using icu::UnicodeString;
int main(int argc, char* argv[])
{
std::string input;
std::string output;
std::ifstream MyReadFile("in.txt");
getline(MyReadFile, input);
UnicodeString us(input.c_str());
UBiDi* bidi = ubidi_open();
UErrorCode errorCode = U_ZERO_ERROR;
ubidi_setPara(bidi, us.getBuffer(), us.length(), UBIDI_RTL, nullptr, &errorCode);
std::cout << (ubidi_getBaseDirection(us.getBuffer(), us.length()) == UBIDI_LTR) << std::endl;
std::ofstream MyFile;
MyFile.open("out.txt");
if (U_SUCCESS(errorCode))
{
UnicodeString Ustring(ubidi_getText(bidi));
std::string Ustr;
Ustring.toUTF8String(Ustr);
int32_t count = ubidi_countRuns(bidi, &errorCode);
int32_t logicalStart, length;
if (count > 0)
MyFile << "VisualRun \t" << "direction" << "\t" << "s" << '\t' << "l" << '\t' << "output" << std::endl;
for (int32_t i = 0; i < count; i++) {
UBiDiDirection dir = ubidi_getVisualRun(bidi, i, &logicalStart, &length);
std::string dirstr = "UBIDI_LTR";
if (dir == UBIDI_RTL)
dirstr = "UBIDI_RTL";
UnicodeString temp = Ustring.tempSubString(logicalStart, length);
char* dest = (char*)malloc(temp.length());
temp.extract(logicalStart, length, dest, strlen(dest));
output = std::string(dest);
MyFile << "VisualRun \t" << dirstr << "\t" << logicalStart << '\t' << length << '\t' << output << std::endl;
}
}
else
{
std::cout << "Failed" << std::endl;
}
MyFile.close();
return 0;
}

Unknown error in inserting values in multimap

I have the following code, where I try to insert values into a multimap of 2 strings, but I keep getting an error that I cannot understand. I've been trying to solve this for hours.
The whole point of the program is to sort the lines of a dictionary based on the automatic sorting of the multimap insertion.
// sort_entries_of_multiple_dictionaries.cpp : This file contains the 'main' function. Program execution begins and ends there.
//
#include <iostream>
#include <fstream>
#include <vector>
#include <map>
#include <string>
#include <algorithm>
#include <iomanip>
#include <sstream>
// Prototypes
int indexDict(std::multimap<std::string, std::string>& dict);
int main()
{
std::multimap<std::string, std::string> dict;
if(indexDict(dict) == 0)
return 0;
}
int indexDict(std::multimap<std::string, std::string>& dict)
{
std::ifstream inputFile{ "output.txt", std::ios::in };
std::string currentDictEntry{};
size_t currentLine{};
if (!inputFile)
{
std::cerr << "input.txt FILE NOT FOUND in the current directory" << std::endl;
system("pause");
return 0;
}
while (std::getline(inputFile, currentDictEntry))
{
//std::cout << currentDictEntry << std::endl; // TO DELETE
std::string currentWord{};
size_t delimiterPos = currentDictEntry.find('\t', 0);
if (delimiterPos == std::string::npos)
std::cerr << "ERROR. Delimiter \"<b>\" not found in line " << currentLine << std::endl;
else
{
//std::cout << "pos of \\t = " << delimiterPos << std::endl; // TO DELETE
for (char& ch : currentDictEntry)
{
if (ch != '\t')
{
currentWord += ch;
}
else
break;
}
std::cout << currentWord /* << '|' */ << std::endl; // TO DELETE
auto value = currentDictEntry.substr(delimiterPos, std::string::npos);
std::cout << "size= " << value.size() << '|' << value << std::endl;
dict.insert( currentWord, currentWord/*, value*/ );
}
if (currentLine == 50) return 0; // TO DELETE
currentLine++;
}
return 1;
}
if (currentLine == 50) return 0; // TO DELETE
currentLine++;
}
return 1;
}
The error I keep getting is:
unary '++': '_Iter' does not define this operator or a conversion to a type acceptable to the predefined operator
illegal indirection
as #Evg said, it accepts a std::pair
dict.insert(std::make_pair(currentWord, value));
if I understand your intention correctly, you don't want to save the \t into your result, so add 1 after delimiterPos to get the correct value:
auto value = currentDictEntry.substr(delimiterPos + 1, std::string::npos);
test run. output.txt:
4 d
1 a
2 b
3 c
0
output:
"0" - ""
"1" - "a"
"2" - "b"
"3" - "c"
"4" - "d"
full code:
#include <iostream>
#include <fstream>
#include <map>
#include <string>
#include <iomanip>
#include <sstream>
// Prototypes
int indexDict(std::multimap<std::string, std::string>& dict);
int main()
{
std::multimap<std::string, std::string> dict;
if (indexDict(dict) == 0)
return 0;
for (auto& i : dict) {
std::cout << "\"" << i.first << "\" - \"" << i.second << "\"\n";
}
}
int indexDict(std::multimap<std::string, std::string>& dict)
{
std::ifstream inputFile{ "output.txt", std::ios::in };
std::string currentDictEntry{};
size_t currentLine{};
if (!inputFile)
{
std::cerr << "output.txt FILE NOT FOUND in the current directory" << std::endl;
system("pause");
return 0;
}
while (std::getline(inputFile, currentDictEntry))
{
//std::cout << currentDictEntry << std::endl; // TO DELETE
std::string currentWord{};
size_t delimiterPos = currentDictEntry.find('\t', 0);
if (delimiterPos == std::string::npos)
std::cerr << "ERROR. Delimiter \"<b>\" not found in line " << currentLine << std::endl;
else
{
//std::cout << "pos of \\t = " << delimiterPos << std::endl; // TO DELETE
for (char& ch : currentDictEntry)
{
if (ch != '\t')
{
currentWord += ch;
}
else
break;
}
std::cout << currentWord /* << '|' */ << std::endl; // TO DELETE
auto value = currentDictEntry.substr(delimiterPos + 1, std::string::npos);
std::cout << "size= " << value.size() << '|' << value << std::endl;
dict.insert(std::make_pair(currentWord, value));
}
if (currentLine == 50) return 0; // TO DELETE
currentLine++;
}
return 1;
}
small mistakes in your code: you don't need <algorithm> and <vector>. also your error message said input.txt instead of output.txt.
I Change dict.insert( currentWord, currentWord/*, value*/ ); To dict.insert({ currentWord,currentWord }); and error Has solved

C++ / Boost:: Propery_Tree - Is it possible to take ini config value with dot in name [duplicate]

"A": "1"
"A.B": "2"
"A.C": "3"
How to get the value of A.B if i iterate through the ptree it works. if i try
to get value of pt.get_child("A\.B").get_value<std::string>(). i get the following exception
terminate called after throwing an instance of boost::exception_detail::clone_impl<boost::exception_detail::error_info_injector<boost::property_tree::ptree_bad_path> >'
what(): No such node
please find the complete code below
#include <boost/property_tree/ptree.hpp>
#include <boost/property_tree/ini_parser.hpp>
#include <iostream>
#include <string>
#include <iterator>
using boost::property_tree::ptree;
/* Indent Json Output */
std::string indent(int level) {
std::string s;
for (int i = 0; i < level; i++) s += " ";
return s;
}
/* Print tree in json format */
void printTree(ptree & pt, int level) {
if (pt.empty()) {
std::cerr << "\"" << pt.data() << "\"";
} else {
if (level) std::cerr << std::endl;
std::cerr << indent(level) << "{" << std::endl;
for (ptree::iterator pos = pt.begin(); pos != pt.end();) {
std::cerr << indent(level + 1) << "\"" << pos-> first << "\": ";
printTree(pos->second, level + 1);
++pos;
if (pos != pt.end()) {
std::cerr << ",";
}
std::cerr << std::endl;
}
std::cerr << indent(level) << " }";
}
return;
}
int main()
{
ptree pt;
read_ini("sample.ini", pt);
printTree(pt, 0);
std::cout << pt.get_child("A.B").get_value<std::string>() << std::endl; //tries to resolve A.B to two nodes
std::cout << pt.get_child("A\\.B").get_value<std::string>() << std::endl; //error
}
sample.ini
A=1
A.B=2
A.C=3
You can use alternative path delimiters, but it's a bit tricky and not very well documented.
You have to temporarily specify an alternative path separator:
Live On Coliru
#include <boost/property_tree/ini_parser.hpp>
#include <iostream>
using boost::property_tree::ptree;
int main() {
ptree pt;
pt.put("a.b", "first");
pt.put(ptree::path_type("a|complicated.name", '|'), "second");
write_ini(std::cout, pt);
}
Prints
[a]
b=first
complicated.name=second

write Unicode strings into a txt file

I made my simple txt scanner who writes the text into a file that matches my selection. The problem is writing to file when instead of the pen writes, for example, 洀漀. On picture you can see for example:
#include <iostream>
#include <fstream>
#include <string>
using namespace std;
int main()
{
int offset;
wstring DBSearchLine, ScanLine;
wifstream ScanFile, DBSearchFile;
wofstream ResultFile;
ScanFile.open("ScanFile.txt", ios_base::binary);
ResultFile.open("ResultFile.txt", ios::out, ios_base::binary);
if (ScanFile.is_open())
{
while (!ScanFile.eof())
{
DBSearchFile.open("DBSearchFile.txt", ios_base::binary);
if (!DBSearchFile.is_open())
{
cout << "Error open DBSearchFile.txt" << "\n";
break;
}
getline(ScanFile, ScanLine);
wcout << "Scan line is - " << ScanLine << "\n";
while (!DBSearchFile.eof())
{
getline(DBSearchFile, DBSearchLine);
wcout << "DBSearchLine is -" << DBSearchLine << "\n";
if ((offset = ScanLine.find(DBSearchLine, 0)) != string::npos)
{
ResultFile << ScanLine << L"\n";
}
}
DBSearchFile.close();
}
ScanFile.close();
}
else
{
cout << "Error open ScanFile.txt" << "\n";
}
system("PAUSE");
return 0;
}
#include <iostream>
#include <fstream>
#include <string>
#include <locale>
#include <codecvt>
using namespace std;
int main()
{
/* via http://stackoverflow.com/a/5105192/4005233
changes the encoding of the console and all subsequently opened
files */
std::locale::global(std::locale(""));
wifstream ScanFile;
ScanFile.open("ScanFile.txt", ios_base::binary);
if (!ScanFile.is_open()) {
cout << "Error open ScanFile.txt" << "\n";
return 1;
}
wofstream ResultFile("ResultFile.txt", ios::out);
while (!ScanFile.eof())
{
wifstream DBSearchFile;
DBSearchFile.open("DBSearchFile.txt", ios_base::binary);
if (!DBSearchFile.is_open())
{
cout << "Error open DBSearchFile.txt" << "\n";
break;
}
wstring ScanLine;
getline(ScanFile, ScanLine);
wcout << "Scan line is - " << ScanLine << "\n";
do
{
wstring DBSearchLine;
getline(DBSearchFile, DBSearchLine);
// have all lines been read?
if(!DBSearchLine.length())
break;
wcout << "DBSearchLine is -" << DBSearchLine << "\n";
if (ScanLine.find(DBSearchLine, 0) != string::npos)
{
ResultFile << ScanLine << L"\n";
break; // found a match, no need to search further
}
}while(1);
DBSearchFile.close();
}
ScanFile.close();
return 0;
}
This was tested using files with and without a BOM.
The innermost loop had to be changed to handle files with a newline character at the end; if I hadn't done that it would have match with an empty string which is always true.
(I've also changed a few other things according to my coding style, the important change is the one right at the top)