Find the last added file (boost) - c++

I'm trying to build a function that finds the last added file with a specific extension.
Here's what I did:
void getLastAdded(const fs::path path)
{
const string& ext = ".xml";
fs::path last;
vector<fs::path> files;
fs::recursive_directory_iterator it(path);
fs::recursive_directory_iterator endit;
while (it != endit)
{
if (fs::is_regular_file(*it) && it->path().extension() == ext)
files.push_back(it->path());
++it;
}
for (size_t i = 0; i < files.size(); i++) {
if (i == 0)
last = files[i];
if (fs::last_write_time(last) <= fs::last_write_time(files[i]))
last = files[i];
}
cout << "Last:" << last.filename() << endl;
}
Is there any better way to accomplish this?

Instead of building a (potentially huge) vector of filenames that you won't use/need, I'd filter for the max modification time on-the-fly.
Moreover, don't forget to handle errors:
#include <boost/filesystem.hpp>
#include <boost/date_time/posix_time/posix_time.hpp>
#include <iostream>
namespace fs = boost::filesystem;
fs::path getLastAdded(const fs::path path, std::string const& ext = ".xml") {
std::vector<fs::path> files;
namespace pt = boost::posix_time;
pt::ptime max = {pt::neg_infin};
fs::path last;
for (fs::recursive_directory_iterator it(path), endit; it != endit; ++it)
if (fs::is_regular_file(*it) && it->path().extension() == ext)
{
try {
auto stamp = pt::from_time_t(fs::last_write_time(*it));
if (stamp >= max) {
last = *it;
max = stamp;
}
} catch(std::exception const& e) {
std::cerr << "Skipping: " << *it << " (" << e.what() << ")\n";
}
}
return last; // empty if no file matched
}
int main() {
std::cout << "Last: " << getLastAdded(".") << "\n";
}
With some debug information on Coliru:
Live On Coliru
Prints
DEBUG: "./i.xml"
DEBUG: "./z.xml"
DEBUG: "./q.xml"
DEBUG: "./c.xml"
DEBUG: "./v.xml"
DEBUG: "./f.xml"
DEBUG: "./t.xml"
DEBUG: "./d.xml"
DEBUG: "./a.xml"
DEBUG: "./b.xml"
DEBUG: "./e.xml"
DEBUG: "./u.xml"
DEBUG: "./p.xml"
DEBUG: "./g.xml"
DEBUG: "./x.xml"
DEBUG: "./y.xml"
DEBUG: "./j.xml"
DEBUG: "./h.xml"
DEBUG: "./o.xml"
DEBUG: "./m.xml"
DEBUG: "./s.xml"
DEBUG: "./w.xml"
DEBUG: "./l.xml"
DEBUG: "./n.xml"
DEBUG: "./r.xml"
DEBUG: "./k.xml"
Last: "./k.xml"

Related

open file by std::ifstream occasionaly fails on folder detected by FindFirstChangeNotification (Windows)

I have an application that must monitor some folders (in Windows) to detect if a file was created in that folder (real use is to detect incoming FTP files).
If a file is detected , it is read, then deleted .
Occasionally, I get a file reading error on a file that was detected.
Question is: Why?
To simulate the error, I created a simple program to reproduce it:
std::vector<std::filesystem::path> watch;
void main()
{
watch.push_back("D:\\test1"); //must exist
watch.push_back("D:\\test2");
watch_dir();
}
this example monitors 2 folders.
To simulate incoming files on the folder, another program copies files to that folder
continuously at configurable intervals (say 100 milliseconds).
To detect folder changes , WIN32 API functions FindFirstChangeNotification and WaitForMultipleObjects are used, based on this Microsoft example
https://learn.microsoft.com/en-us/windows/win32/fileio/obtaining-directory-change-notifications
detection function adapted from the example (Note: WaitForMultipleObjects blocks until a change is detected)
void watch_dir()
{
HANDLE handle[2];
memset(handle, 0, 2 * sizeof(HANDLE));
for (size_t idx = 0; idx < watch.size(); idx++)
{
std::string str = watch.at(idx).string();
LPTSTR path = (LPTSTR)str.c_str();
std::cout << "watch path " << path << std::endl;
handle[idx] = FindFirstChangeNotification(
path, // directory to watch
FALSE, // do not watch subtree
FILE_NOTIFY_CHANGE_FILE_NAME); // watch file name changes
if (handle[idx] == INVALID_HANDLE_VALUE)
{
assert(0);
ExitProcess(GetLastError());
}
}
while (TRUE)
{
std::cout << "Waiting for notification..." << std::endl;
DWORD wait_status = WaitForMultipleObjects(watch.size(), handle, FALSE, INFINITE);
std::cout << "Directory " << watch.at(wait_status) << " changed" << std::endl;
if (FindNextChangeNotification(handle[wait_status]) == FALSE)
{
assert(0);
ExitProcess(GetLastError());
}
std::filesystem::path path = watch.at(wait_status);
send_files_in_path(path);
}
}
Once a change is detected by the function above, then all files in the folder are listed
and read, by these functions
void send_files_in_path(const std::filesystem::path& ftp_path)
{
std::vector<std::filesystem::path> list = get_files(ftp_path);
for (size_t idx = 0; idx < list.size(); idx++)
{
std::string buf;
read_file(list.at(idx).string(), buf);
std::this_thread::sleep_for(std::chrono::milliseconds(10));
std::filesystem::remove(list.at(idx));
}
}
/////////////////////////////////////////////////////////////////////////////////////////////////////
//get_files
//get all ".txt" files inside a FTP folder
/////////////////////////////////////////////////////////////////////////////////////////////////////
std::vector<std::filesystem::path> get_files(const std::filesystem::path& base_archive_path)
{
std::vector<std::filesystem::path> list;
try
{
for (const auto& entry : std::filesystem::recursive_directory_iterator(base_archive_path))
{
std::filesystem::path path = entry.path();
if (!entry.is_regular_file())
{
continue;
}
std::string fname = entry.path().filename().string();
size_t len = fname.size();
size_t pos = len - 4;
//check if last 4 characters are ".txt"
if (fname.find(".txt", pos) == std::string::npos && fname.find(".TXT", pos) == std::string::npos)
{
continue;
}
SPDLOG_INFO("loading: " + entry.path().string());
list.push_back(path);
}//this path
} //try
catch (const std::exception& e)
{
SPDLOG_ERROR(e.what());
}
return list;
}
The function where the error happens is
int read_file(const std::string& fname, std::string& buf)
{
std::ifstream ifs;
std::ios_base::iostate mask = ifs.exceptions() | std::ios::failbit;
ifs.exceptions(mask);
std::this_thread::sleep_for(std::chrono::milliseconds(0));
std::cout << "opening : " << fname << std::endl;
try
{
ifs.open(fname);
if (!ifs.is_open())
{
std::cout << "open fail: " << fname << std::endl;
return -1;
}
}
catch (const std::exception& e)
{
std::cout << e.what() << std::endl;
return -1;
}
std::stringstream ss;
ss << ifs.rdbuf();
ifs.close();
buf = ss.str();
return 0;
}
the try/catch block, again, occasionally , is triggered with the error
ios_base::failbit set: iostream stream error
removing the try/catch block, and the open mask (just to try), then
ifs.is_open
fails.
A temporary solution was to detect the cases where the open() failed and repeat it.. which succeeds, because the file does exist.
Calling this with a small delay before the open call has the effect of reducing the open fails
std::this_thread::sleep_for(std::chrono::milliseconds(10));
ifs.open(fname);
But still would like to find out the reason for the occasional failure

state is "downloading", but torrent_file() returns NULL?

I recently tried to write a scraper capable of downloading 3.5 million
torrent files based on their magnet URL. I decided to start by hacking
an example from libtorrent's tutorial webpage, but while it works well
with just one torrent file, it fails segfaults in create_torrent() when
I try to download more than one file. Here's my code:
#include <thread>
#include <chrono>
#include <fstream>
#include <sstream>
#include <string>
#include <libtorrent/session.hpp>
#include <libtorrent/add_torrent_params.hpp>
#include <libtorrent/create_torrent.hpp>
#include <libtorrent/torrent_handle.hpp>
#include <libtorrent/alert_types.hpp>
#include <libtorrent/bencode.hpp>
#include <libtorrent/torrent_status.hpp>
#include <libtorrent/torrent_info.hpp>
namespace lt = libtorrent;
using clk = std::chrono::steady_clock;
int torrents_left = 0;
int save_file(std::string const& filename, std::vector<char>& v)
{
FILE* f = std::fopen(filename.c_str(), "wb");
if (f == nullptr)
return -1;
int w = int(std::fwrite(&v[0], 1, v.size(), f));
std::fclose(f);
if (w < 0) return -1;
if (w != int(v.size())) return -3;
return 0;
}
void add_torrent_url(std::string url, lt::session& ses) {
// std::cerr << "DEBUG: Will download '" << url << "'" << std::endl;
lt::add_torrent_params atp;
atp.url = url;
atp.save_path = "."; // save in current dir
ses.async_add_torrent(atp);
torrents_left++;
}
void add_torrents_from_stdin(lt::session& ses) {
std::cerr << "DEBUG: reading stdin." << std::endl;
std::string url;
while(std::getline(std::cin, url)) {
add_torrent_url(url, ses);
}
std::cerr << "DEBUG: done reading stdin." << std::endl;
}
int main(int argc, char const* argv[])
{
lt::settings_pack pack;
pack.set_int(lt::settings_pack::alert_mask
, lt::alert::error_notification
| lt::alert::storage_notification
| lt::alert::status_notification);
lt::session ses(pack);
lt::add_torrent_params atp;
//add_torrent_url(argv[1]);
add_torrent_url("magnet:?xt=urn:btih:3E37CFE29B1049E03F858758A73EFD85BA170BE8", ses);
add_torrent_url("magnet:?xt=urn:btih:8FCDE178E3F9A24EA40856826C4E8A625A931B73", ses);
//add_torrents_from_stdin(ses);
// this is the handle we'll set once we get the notification of it being
// added
lt::torrent_handle h;
for (;;) {
std::vector<lt::alert*> alerts;
ses.pop_alerts(&alerts);
for (lt::alert const* a : alerts) {
if (auto at = lt::alert_cast<lt::add_torrent_alert>(a)) {
h = at->handle;
}
// if we receive the finished alert or an error, we're done
if (lt::alert_cast<lt::torrent_finished_alert>(a)) {
std::cout << "torrent finished or error." << std::endl;
goto done;
}
if (lt::alert_cast<lt::torrent_error_alert>(a)) {
std::cout << a->message() << std::endl;
goto done;
}
if (auto st = lt::alert_cast<lt::state_update_alert>(a)) {
if (st->status.empty()) continue;
// we only have a single torrent, so we know which one
// the status is for
lt::torrent_status const& s = st->status[0];
if (s.state == lt::torrent_status::downloading)
{
std::cout << "Hi!" << std::endl;
std::shared_ptr<const lt::torrent_info> ti = h.torrent_file();
if (ti == 0) {
std::cerr << "ERROR: ti == NULL" << std::endl;
goto done;
}
ses.remove_torrent(h, lt::session::delete_files);
lt::create_torrent new_torrent(*ti);
std::vector<char> out;
lt::bencode(std::back_inserter(out), new_torrent.generate());
std::stringstream ss;
ss << "downloaded/" << (*ti).info_hash() << ".torrent";
save_file(ss.str(), out);
h.pause();
torrents_left--;
std::cerr << "DEBUG: Done (" << torrents_left << " left): " << (*ti).info_hash() << std::endl;
if (torrents_left == 0)
goto done;
}
}
}
std::this_thread::sleep_for(std::chrono::milliseconds(200));
// ask the session to post a state_update_alert, to update our
// state output for the torrent
ses.post_torrent_updates();
}
done:
{}
}
I suspect it's related to this part:
// we only have a single torrent, so we know which one
// the status is for
lt::torrent_status const& s = st->status[0];
But according to my debugger, when torrent_file() gives NULL, st->status[] only contains one element anyway.
What's happening here? How do I fix it?
It looks like I made wrong assumptions about what "h" points to in the example. Here's a diff that fixes the code in question:
--- scrape_rasterbar.cpp 2017-01-07 21:00:39.565636805 +0100
+++ scrape_rasterbar_old.cpp 2017-01-07 21:05:53.339718098 +0100
## -1,4 +1,3 ##
-#include <iostream>
#include <thread>
#include <chrono>
#include <fstream>
## -94,17 +93,18 ##
if (auto st = lt::alert_cast<lt::state_update_alert>(a)) {
if (st->status.empty()) continue;
- for (auto &s : st->status) {
// we only have a single torrent, so we know which one
// the status is for
+ lt::torrent_status const& s = st->status[0];
if (s.state == lt::torrent_status::downloading)
{
- std::shared_ptr<const lt::torrent_info> ti = s.handle.torrent_file();
+ std::cout << "Hi!" << std::endl;
+ std::shared_ptr<const lt::torrent_info> ti = h.torrent_file();
if (ti == 0) {
std::cerr << "ERROR: ti == NULL" << std::endl;
goto done;
}
- ses.remove_torrent(s.handle, lt::session::delete_files);
+ ses.remove_torrent(h, lt::session::delete_files);
lt::create_torrent new_torrent(*ti);
std::vector<char> out;
lt::bencode(std::back_inserter(out), new_torrent.generate());
## -112,7 +112,7 ##
std::stringstream ss;
ss << "downloaded/" << (*ti).info_hash() << ".torrent";
save_file(ss.str(), out);
- s.handle.pause();
+ h.pause();
torrents_left--;
std::cerr << "DEBUG: Done (" << torrents_left << " left): " << (*ti).info_hash() << std::endl;
if (torrents_left == 0)
## -120,7 +120,6 ##
}
}
}
- }
std::this_thread::sleep_for(std::chrono::milliseconds(200));
// ask the session to post a state_update_alert, to update our

List all text files in directory in C++

I am trying to store the name of all txt files in a directory in a string and print them out. I need to count the number of txt files in the directory and then print the names. The part of counting is working, but I can't seem to get the name working. I have found some examples but they don't work in visual studio which is what I'm using.
Here is my code.
int main() {
bool x = true;
int i = 0;
wchar_t* file = L"../Menu/Circuitos/*.txt";
WIN32_FIND_DATA FindFileData;
HANDLE hFind;
hFind = FindFirstFile(file, &FindFileData);
if (hFind != INVALID_HANDLE_VALUE) {
i++;
while ((x = FindNextFile(hFind, &FindFileData)) == TRUE) {
i++;
}
}
cout << "number of files " << i << endl;
return 0;
}
FindFirstFile already has the first valid handle. If you immediately call FindNextFile then the first handle is lost. The file count in your example would be wrong.
Use do-while loop istead.
Also, the handle obtained from FindFirstFile must be closed with FindClose
HANDLE hFind;
hFind = FindFirstFile(file, &FindFileData);
if (hFind != INVALID_HANDLE_VALUE)
{
do {
wcout << FindFileData.cFileName << "\n";
i++;
} while (FindNextFile(hFind, &FindFileData));
FindClose(hFind);
}
cout << "number of files " << i << endl;
Use std::vector and std::wstring to store the items
#include <string>
#include <vector>
...
std::vector<std::wstring> vs;
HANDLE hFind;
hFind = FindFirstFile(file, &FindFileData);
if (hFind != INVALID_HANDLE_VALUE)
{
do {
vs.push_back(FindFileData.cFileName);
} while (FindNextFile(hFind, &FindFileData));
FindClose(hFind);
}
std::cout << "count:" << vs.size() << "\n";
for (auto item : vs)
std::wcout << item << "\n";
For some older compilers auto may not be available, use this instead
for (int i = 0; i < vs.size(); i++)
std::wcout << vs[i] << "\n";
Note, Windows API works with c-strings. In many cases you have to use .c_str() to obtain character array. For example:
if (vs.size())
{
std::wstring str = vs[0];
MessageBox(0, str.c_str(), 0, 0);
}
Here is a portable version using the new ISO Standard Filesystem Library TS (technical specification) for those with compilers that support it:
#include <vector>
#include <iostream>
#include <algorithm>
#include <experimental/filesystem>
// for readability
namespace fs = std::experimental::filesystem;
/**
* Function object to test directory entries
* for a specific file extension.
*/
struct file_extension_is
{
std::string ext;
file_extension_is(std::string const& ext): ext(ext) {}
bool operator()(fs::directory_entry const& entry) const
{
return entry.path().extension() == ext;
}
};
int main(int, char* argv[])
{
try
{
// directory supplied on the command line if present
// else current directory
fs::path dir = argv[1] ? argv[1] : ".";
// place to store the results
std::vector<fs::directory_entry> entries;
// copy directory entries that have file extension ".txt"
// to the results
fs::directory_iterator di(dir);
fs::directory_iterator end;
std::copy_if(di, end, std::back_inserter(entries),
file_extension_is(".txt"));
// print it all out
std::cout << "Number of files: " << entries.size() << '\n';
for(auto const& entry: entries)
std::cout << entry.path().string() << '\n';
}
catch(std::exception const& e)
{
std::cerr << e.what() << '\n';
}
catch(...)
{
std::cerr << "Unknown exception." << '\n';
}
}

reading files in a directory C++

I'm reading files in a directory and passing it to a function, I think I'm doing it in a wrong way, not able to figure out.
Here is my code first it reads files in a folder and send it to a function for further operations.
#include <dirent.h>
#include <stdio.h>
#include <vector>
#include <string>
#include <iostream>
using namespace std;
std::vector<std::string> fileName;
int main(void)
{
DIR *d;
struct dirent *dir;
vector<string> fileList;
int i=0;
d = opendir("files");
if (d)
{
while ((dir = readdir(d)) != NULL)
{
i++;
fileList.push_back(dir->d_name);
}
for(int i=0;i<fileList.size();i++) {
cout<<fileList[i]<<endl;
doSomething(fileList[i]);
}
closedir(d);
}
return(0);
}
int doSomething(fileName) {
//do something
}
Error
main.cpp: In function ‘int main()’:
main.cpp:29:28: error: ‘doSomething’ was not declared in this scope
doSomething(fileList[i]);
^
main.cpp: At global scope:
main.cpp:37:26: error: cannot convert ‘std::vector<std::basic_string<char> >’ to ‘int’ in initialization
int doSomething(fileName) {
^
main.cpp:37:28: error: expected ‘,’ or ‘;’ before ‘{’ token
int doSomething(fileName) {
^
Since your doSomething function is defined after main, it is not visible, that causes the first error. The correct way would be to at least declare the function first:
int doSomething(); //declaration
int main()
{
doSomething(); //now the function is declared
}
//definition
int doSomething()
{
}
Now, the second and third errors is emited because you didn't include the fileName parameter's type in your function definition. Based on your code, it should be a string:
int doSomething(string fileName)
{
}
I also noticed that, while this function returns int, you are not using it's returned value. Nevertheless, don't forget to return something from doSomething, otherwise it will cause undefined behavior.
Yes, Boost is great, but it's a bit bloaty. So, just for completenessapplied to reading images in a directory for OpenCV:
// you need these includes for the function
//#include <windows.h> // for windows systems
#include <dirent.h> // for linux systems
#include <sys/stat.h> // for linux systems
#include <algorithm> // std::sort
#include <opencv2/opencv.hpp>
#include <iostream> //cout
using namespace std;
/* Returns a list of files in a directory (except the ones that begin with a dot) */
int readFilenames(std::vector<string> &filenames, const string &directory)
{
#ifdef WINDOWS
HANDLE dir;
WIN32_FIND_DATA file_data;
if ((dir = FindFirstFile((directory + "/*").c_str(), &file_data)) == INVALID_HANDLE_VALUE)
return; /* No files found */
do {
const string file_name = file_data.cFileName;
const string full_file_name = directory + "/" + file_name;
const bool is_directory = (file_data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) != 0;
if (file_name[0] == '.')
continue;
if (is_directory)
continue;
filenames.push_back(full_file_name);
} while (FindNextFile(dir, &file_data));
FindClose(dir);
#else
DIR *dir;
class dirent *ent;
class stat st;
dir = opendir(directory.c_str());
while ((ent = readdir(dir)) != NULL) {
const string file_name = ent->d_name;
const string full_file_name = directory + "/" + file_name;
if (file_name[0] == '.')
continue;
if (stat(full_file_name.c_str(), &st) == -1)
continue;
const bool is_directory = (st.st_mode & S_IFDIR) != 0;
if (is_directory)
continue;
// filenames.push_back(full_file_name); // returns full path
filenames.push_back(file_name); // returns just filename
}
closedir(dir);
#endif
std::sort (filenames.begin(), filenames.end()); //optional, sort the filenames
return(filenames.size()); //Return how many we found
} // GetFilesInDirectory
void help(const char **argv) {
cout << "\n\n"
<< "Call:\n" << argv[0] << " <directory path>\n\n"
<< "Given a directory of images, create a vector of\n"
<< "their names, read and display them. Filter out\n"
<< "non-images\n"
<< endl;
}
int main( int argc, const char** argv )
{
if(argc != 2) {
cerr << "\nIncorrect number of parameters: " << argc << ", should be 2\n" << endl;
help(argv);
return -1;
}
string folder = argv[1];
cout << "Reading in directory " << folder << endl;
vector<string> filenames;
int num_files = readFilenames(filenames, folder);
cout << "Number of files = " << num_files << endl;
cv::namedWindow( "image", 1 );
for(size_t i = 0; i < filenames.size(); ++i)
{
cout << folder + filenames[i] << " #" << i << endl;
cv::Mat src = cv::imread(folder + filenames[i]);
if(!src.data) { //Protect against no file
cerr << folder + filenames[i] << ", file #" << i << ", is not an image" << endl;
continue;
}
cv::imshow("image", src);
cv::waitKey(250); //For fun, wait 250ms, or a quarter of a second, but you can put in "0" for no wait or -1 to wait for keypresses
/* do whatever you want with your images here */
}
}

Is there an easier way to pop off a directory from boost::filesystem::path?

I have a relative path (e.g. "foo/bar/baz/quux.xml") and I want to pop a directory off so that I will have the subdirectory + file (e.g. "bar/baz/quux.xml").
You can do this with path iterators, but I was hoping there was something I was missing from the documentation or something more elegant. Below is the code that I used.
#include <boost/filesystem/path.hpp>
#include <boost/filesystem/operations.hpp>
#include <boost/filesystem/convenience.hpp>
#include <boost/filesystem/exception.hpp>
#include <boost/assign.hpp>
boost::filesystem::path pop_directory(const boost::filesystem::path& path)
{
list<string> parts;
copy(path.begin(), path.end(), back_inserter(parts));
if (parts.size() < 2)
{
return path;
}
else
{
boost::filesystem::path pathSub;
for (list<string>::iterator it = ++parts.begin(); it != parts.end(); ++it)
{
pathSub /= *it;
}
return pathSub;
}
}
int main(int argc, char* argv)
{
list<string> test = boost::assign::list_of("foo/bar/baz/quux.xml")
("quux.xml")("foo/bar.xml")("./foo/bar.xml");
for (list<string>::iterator i = test.begin(); i != test.end(); ++i)
{
boost::filesystem::path p(*i);
cout << "Input: " << p.native_file_string() << endl;
boost::filesystem::path p2(pop_directory(p));
cout << "Subdir Path: " << p2.native_file_string() << endl;
}
}
The output is:
Input: foo/bar/baz/quux.xml
Subdir Path: bar/baz/quux.xml
Input: quux.xml
Subdir Path: quux.xml
Input: foo/bar.xml
Subdir Path: bar.xml
Input: ./foo/bar.xml
Subdir Path: foo/bar.xml
What I was hoping for was something like:
boost::filesystem::path p1(someString);
boost::filesystem::path p2(p2.pop());
If you look at some test code on codepad.org, I have tried branch_path (returns "foo/bar/baz") and relative_path (returns "foo/bar/baz/quux.xml").
Here is something that a co-worker figured out just using string::find with boost::filesystem::slash. I like this that it doesn't require iterate over the entire path breaking it up, but also using the path's OS-independent definition of the path separation character. Thanks Bodgan!
boost::filesystem::path pop_front_directory(const boost::filesystem::path& path)
{
string::size_type pos = path.string().find(boost::filesystem::slash<boost::filesystem::path>::value);
if (pos == string::npos)
{
return path;
}
else
{
return boost::filesystem::path(path.string().substr(pos+1));
}
}