How can I take a value from thread without future? - c++

I wrote the program to count all words in .log files in the different threads and output the result on the screen. First argument in command line is dir to find all .log files and then count words in this files. Second argument in command line is number of threads (by default = 4)
I used the ThreadPool for this program
ThreadPool.h
#ifndef THREAD_POOL_H
#define THREAD_POOL_H
#include <boost/thread/condition_variable.hpp>
#include <boost/thread.hpp>
#include <future> // I don't how to work with boost future
#include <queue>
#include <vector>
#include <functional>
class ThreadPool
{
public:
using Task = std::function<void()>; // Our task
explicit ThreadPool(int num_threads)
{
start(num_threads);
}
~ThreadPool()
{
stop();
}
template<class T>
auto enqueue(T task)->std::future<decltype(task())>
{
// packaged_task wraps any Callable target
auto wrapper = std::make_shared<std::packaged_task<decltype(task()) ()>>(std::move(task));
{
boost::unique_lock<boost::mutex> lock{ mutex_p };
tasks_p.emplace([=] {
(*wrapper)();
});
}
event_p.notify_one();
return wrapper->get_future();
}
//void enqueue(Task task)
//{
// {
// boost::unique_lock<boost::mutex> lock { mutex_p };
// tasks_p.emplace(std::move(task));
// event_p.notify_one();
// }
//}
private:
std::vector<boost::thread> threads_p; // num of threads
std::queue<Task> tasks_p; // Tasks to make
boost::condition_variable event_p;
boost::mutex mutex_p;
bool isStop = false;
void start(int num_threads)
{
for (int i = 0; i < num_threads; ++i)
{
// Add to the end our thread
threads_p.emplace_back([=] {
while (true)
{
// Task to do
Task task;
{
boost::unique_lock<boost::mutex> lock(mutex_p);
event_p.wait(lock, [=] { return isStop || !tasks_p.empty(); });
// If we make all tasks
if (isStop && tasks_p.empty())
break;
// Take new task from queue
task = std::move(tasks_p.front());
tasks_p.pop();
}
// Execute our task
task();
}
});
}
}
void stop() noexcept
{
{
boost::unique_lock<boost::mutex> lock(mutex_p);
isStop = true;
event_p.notify_all();
}
for (auto& thread : threads_p)
{
thread.join();
}
}
};
#endif
main.cpp
#include "ThreadPool.h"
#include <iostream>
#include <iomanip>
#include <Windows.h>
#include <vector>
#include <map>
#include <boost/filesystem.hpp>
#include <boost/thread.hpp>
namespace bfs = boost::filesystem;
int count_words(const std::string& filename)
{
int counter = 0;
std::ifstream file(filename);
std::string buffer;
while (file >> buffer)
{
++counter;
}
return counter;
}
int main(int argc, const char* argv[])
{
bfs::path path = argv[1];
// If this path is exist and if this is dir
if (bfs::exists(path) && bfs::is_directory(path))
{
// Number of threads. Default = 4
int n = (argc == 3 ? atoi(argv[2]) : 4);
ThreadPool pool(n);
// Container to store all filenames and number of words inside them
std::map<bfs::path, int> all_files_and_sums;
// Iterate all files in dir
for (auto& p : bfs::directory_iterator(path)) {
// Takes only .txt files
if (p.path().extension() == ".log") {
// Future for taking value from here
auto fut = pool.enqueue([&p, &all_files_and_sums]() {
// In this lambda function I count all words in file and return this value
int result = count_words(p.path().string());
std::cout << "TID " << GetCurrentThreadId() << "\n";
return result;
});
// "filename = words in this .txt file"
all_files_and_sums[p.path()] = fut.get();
}
}
int result = 0;
for (auto& k : all_files_and_sums)
{
std::cout << k.first << "- " << k.second << "\n";
result += k.second;
}
std::cout << "Result: " << result << "\n";
}
else
std::perror("Dir is not exist");
}
And this solution works correctly. But if in the directory many files this solution works so slow. I think it's because of the futures. How can I take values from different threads without futures.
(P.S)
Sorry for my english

Related

Why thread pool works slow?

I have the program to count all words in all .log files in given directory using N threads.
I wrote something like this.
ThreadPool.h
#ifndef THREAD_POOL_H
#define THREAD_POOL_H
#include <boost/thread/condition_variable.hpp>
#include <boost/thread.hpp>
#include <future> // I don't how to work with boost future
#include <queue>
#include <vector>
#include <functional>
class ThreadPool
{
public:
using Task = std::function<void()>; // Our task
explicit ThreadPool(int num_threads)
{
start(num_threads);
}
~ThreadPool()
{
stop();
}
template<class T>
auto enqueue(T task)->std::future<decltype(task())>
{
// packaged_task wraps any Callable target
auto wrapper = std::make_shared<std::packaged_task<decltype(task()) ()>>(std::move(task));
{
boost::unique_lock<boost::mutex> lock{ mutex_p };
tasks_p.emplace([=] {
(*wrapper)();
});
}
event_p.notify_one();
return wrapper->get_future();
}
/*void enqueue(Task task)
{
{
boost::unique_lock<boost::mutex> lock { mutex_p };
tasks_p.emplace(std::move(task));
event_p.notify_one();
}
}*/
private:
std::vector<boost::thread> threads_p; // num of threads
std::queue<Task> tasks_p; // Tasks to make
boost::condition_variable event_p;
boost::mutex mutex_p;
bool isStop = false;
void start(int num_threads)
{
for (int i = 0; i < num_threads; ++i)
{
// Add to the end our thread
threads_p.emplace_back([=] {
while (true)
{
// Task to do
Task task;
{
boost::unique_lock<boost::mutex> lock(mutex_p);
event_p.wait(lock, [=] { return isStop || !tasks_p.empty(); });
// If we make all tasks
if (isStop && tasks_p.empty())
break;
// Take new task from queue
task = std::move(tasks_p.front());
tasks_p.pop();
}
// Execute our task
task();
}
});
}
}
void stop() noexcept
{
{
boost::unique_lock<boost::mutex> lock(mutex_p);
isStop = true;
}
event_p.notify_all();
for (auto& thread : threads_p)
{
thread.join();
}
}
};
#endif
main.cpp
#include "ThreadPool.h"
#include <iostream>
#include <iomanip>
#include <Windows.h>
#include <chrono>
#include <vector>
#include <map>
#include <boost/filesystem.hpp>
#include <boost/thread.hpp>
#include <locale.h>
namespace bfs = boost::filesystem;
//int count_words(boost::filesystem::ifstream& file)
//{
// int counter = 0;
// std::string buffer;
// while (file >> buffer)
// {
// ++counter;
// }
//
// return counter;
//}
//
int count_words(boost::filesystem::path filename)
{
boost::filesystem::ifstream ifs(filename);
return std::distance(std::istream_iterator<std::string>(ifs), std::istream_iterator<std::string>());
}
int main(int argc, const char* argv[])
{
std::cin.tie(0);
std::ios_base::sync_with_stdio(false);
bfs::path path = argv[1];
// If this path is exist and if this is dir
if (bfs::exists(path) && bfs::is_directory(path))
{
// Number of threads. Default = 4
int n = (argc == 3 ? atoi(argv[2]) : 4);
ThreadPool pool(n);
// Container to store all filenames and number of words inside them
//std::map<bfs::path, std::future<int>> all_files_and_sums;
std::vector<std::future<int>> futures;
auto start = std::chrono::high_resolution_clock::now();
// Iterate all files in dir
for (auto& p : bfs::directory_iterator(path)) {
// Takes only .txt files
if (p.path().extension() == ".log") {
// Future for taking value from here
auto fut = pool.enqueue([p]() {
// In this lambda function I count all words in file and return this value
int result = count_words(p.path());
static int count = 0;
++count;
std::ostringstream oss;
oss << count << ". TID, " << GetCurrentThreadId() << "\n";
std::cout << oss.str();
return result;
});
// "filename = words in this .txt file"
futures.emplace_back(std::move(fut));
}
}
int result = 0;
for (auto& f : futures)
{
result += f.get();
}
auto stop = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::seconds>(stop - start);
std::cout << "Result: " << result << "\n";
std::cout << duration.count() << '\n';
}
else
std::perror("Dir is not exist");
}
Variable N is 4(Number of threads). I've 320 .log files in my directory and I need count words in this files. Everything works fine but when variable "count" is 180 - the program stops for a while and then continues but much slower.
What could be the reason?
CPU - Xeon e5430 (I have tested this program on another CPU - the result is the same).
It depends on how you measure "slow" but basically you are using one of the worst models possible:
one task queue shared between all threads.
The problem with this approach is blocking in each thread on the shared queue.
A much better model is something like
task stealing - you can try creating a task queue pro thread and then use try_lock (which doesnt block) with enabling each thread "stealing" work from some other thread's tasks if it has nothing else to do.
This is very nice explained in excellent Sean Parent Talk about Concurrency.

Threads keep running and executing commands

I want to make mutilple threads in C++ that keep running and waiting for main thread commands and execute them accordingly. Here's the code I wrote and I know it causes the spinning problem. So the question is how can I let the CPU stop running the worker thread untill I change the command. I know there are future and promise, but they seem not suitable for this situation.
[edit] I'm a C++ noob and this is so complicated! If anyone can share me some tutorials or libraries to solve this, that would be much appreciated!
#include <iostream>
#include <thread>
#include <mutex>
class empty
{
public:
empty() {}
void work1()
{
std::cout << "work1" << std::endl;
}
void work2()
{
std::cout << "work2" << std::endl;
}
};
enum CMD
{
WAIT,
CMD1,
CMD2,
DONE
};
void worker(CMD &cmd, empty &e)
{
std::mutex mutex;
while (cmd != DONE)
{
switch (cmd) {
case WAIT:
break;
case CMD1:
e.work1(); // excute cmd 1
mutex.lock();
cmd = WAIT; // change cmd to WAIT
mutex.unlock();
break;
case CMD2:
e.work2();
mutex.lock();
cmd = WAIT;
mutex.unlock();
break;
default:
break;
}
}
}
int main(int argc, const char * argv[]) {
empty e1 = empty();
empty e2 = empty();
CMD cmd = WAIT;
// mutilple thread working on mutilple empty object
std::thread wokerThread1 = std::thread(worker, std::ref(cmd), std::ref(e1));
std::thread wokerThread2 = std::thread(worker, std::ref(cmd), std::ref(e2));
... //some other code
cmd = CMD1;
...
cmd = CMD2;
...
cmd = CMD1;
...
cmd = DONE;
wokerThread1.join();
wokerThread2.join();
return 0;
}
One why of doing this is by using a concurrent_bounded_queue. You can use TBB's implementation for this or implement it using std::queue and std::condition_variable.
Implementation using only std;
#include <queue>
#include <chrono>
#include <thread>
#include <mutex>
#include <iostream>
#include <condition_variable>
std::mutex g_m;
std::condition_variable g_cv;
enum CMD
{
CMD1,
CMD2,
DONE
};
void push_cmd(std::queue<CMD>& tasks, CMD cmd) {
const std::lock_guard<std::mutex> lock(g_m);
tasks.push(cmd);
g_cv.notify_one();
}
CMD pop_cmd(std::queue<CMD>& tasks) {
std::unique_lock<std::mutex> lk(g_m);
g_cv.wait(lk, [&tasks]{ return !tasks.empty(); });
CMD cmd = tasks.front();
tasks.pop();
return cmd;
}
void execute_cmd(int cmd) {
std::cout << std::this_thread::get_id() << ": cmd [" << cmd << "]" << std::endl;
std::this_thread::sleep_for(std::chrono::seconds(cmd));
}
void worker(std::queue<CMD>& tasks) {
CMD cmd = pop_cmd(tasks);
while (true)
{
switch (cmd) {
case CMD1:
execute_cmd(1);
break;
case CMD2:
execute_cmd(2);
break;
case DONE:
default:
return;
}
}
}
int main(int argc, const char * argv[]) {
std::queue<CMD> tasks;
std::thread wokerThread1 = std::thread(worker, std::ref(tasks));
std::thread wokerThread2 = std::thread(worker, std::ref(tasks));
push_cmd(tasks, CMD1);
push_cmd(tasks, CMD2);
// push `DONE` for each worker
push_cmd(tasks, DONE);
push_cmd(tasks, DONE);
wokerThread1.join();
wokerThread2.join();
return 0;
}
Implementation using tbb::concurrent_bounded_queue;
#include <tbb/concurrent_queue.h>
void worker(tbb::concurrent_bounded_queue<CMD>& tasks) {
while (true) {
CMD cmd;
tasks.pop(cmd);
switch (cmd) {
case CMD1:
// excute cmd 1
break;
case CMD2:
// excute cmd 2
break;
case DONE:
default:
return;
}
}
}
int main(int argc, const char * argv[]) {
tbb::concurrent_bounded_queue<CMD> tasks;
std::thread wokerThread1 = std::thread(worker, std::ref(tasks));
std::thread wokerThread2 = std::thread(worker, std::ref(tasks));
...
tasks.push(CMD1);
tasks.push(CMD2);
...
}
Note you want to run the same task multiple times you can create a Worker that wraps everything up as follows;
#include <chrono>
#include <queue>
#include <thread>
#include <mutex>
#include <iostream>
#include <condition_variable>
enum CMD
{
CMD1,
CMD2,
DONE
};
void executeCmd(int cmd) {
printf("exec %u: cmd[%d]\n", std::this_thread::get_id(), cmd);
std::this_thread::sleep_for(std::chrono::seconds(cmd));
}
class Worker
{
public:
Worker()
: _thread(std::thread(&Worker::work, this))
{
}
void pushCmd(CMD cmd) {
printf("push %u: cmd[%d]\n", std::this_thread::get_id(), cmd);
const std::lock_guard<std::mutex> lock(_m);
_tasks.push(cmd);
_cv.notify_one();
}
void finish() {
pushCmd(DONE);
_thread.join();
}
private:
std::thread _thread;
std::mutex _m;
std::queue<CMD> _tasks;
std::condition_variable _cv;
CMD popCmd() {
std::unique_lock<std::mutex> lk(_m);
_cv.wait(lk, [&]{ return !_tasks.empty(); });
CMD cmd = _tasks.front();
printf("pop %u: cmd[%d]\n", std::this_thread::get_id(), cmd);
_tasks.pop();
return cmd;
}
void work() {
while (true) {
CMD cmd = popCmd();
switch (cmd) {
case CMD1:
executeCmd(1);
break;
case CMD2:
executeCmd(2);
break;
case DONE:
default:
return;
}
}
}
};
int main(int argc, const char * argv[]) {
Worker w1, w2;
w1.pushCmd(CMD1);
w2.pushCmd(CMD1);
w1.pushCmd(CMD2);
w2.pushCmd(CMD2);
w1.finish();
w2.finish();
return 0;
}
It looks like you could use Observers with async operations here. The idea is that your main control thread updates the CMD for all interested observers and then the observers execute particular operation depending on the CMD. In this example I made "update" operation blocking (previous job must be finished before you start a new one) and returning void. However, you might think about other possibilities, like returning false if previous operation is still ongoing.
#include <chrono>
#include <future>
#include <iostream>
#include <memory>
#include <thread>
#include <vector>
enum CMD
{
WAIT,
CMD1,
CMD2,
DONE
};
class SomeSystem
{
public:
SomeSystem() = default;
void work1()
{
// let's pretend this work takes some time
std::this_thread::sleep_for(std::chrono::milliseconds(1));
std::cout << "work1" << std::endl;
}
void work2()
{
std::this_thread::sleep_for(std::chrono::milliseconds(1));
std::cout << "work2" << std::endl;
}
};
class CmdObserver
{
public:
CmdObserver(std::shared_ptr<SomeSystem> system, int id): system_(system), id_(id)
{
std::cout << "observer[" << id_ << "] CTor" << std::endl;
}
void update(CMD cmd)
{
if (work_.valid() && work_.wait_for(std::chrono::seconds(0)) == std::future_status::timeout)
{
std::cout << "observer[" << id_ << "] blocking until previous work is finished" << std::endl;
}
work_ = std::async(std::launch::async, [this, cmd]() { doTheJob(cmd); });
}
private:
void doTheJob(CMD cmd)
{
std::cout << "observer[" << id_ << "] going to execute cmd " << cmd << std::endl;
switch (cmd)
{
case CMD1: system_->work1(); break;
case CMD2: system_->work2(); break;
default: std::cout << cmd << std::endl;
}
}
std::shared_ptr<SomeSystem> system_;
// id_ is just for demonstration purposes
int id_;
std::future<void> work_;
};
int main()
{
int observerId = 0;
std::vector<std::shared_ptr<SomeSystem> > systems({
std::make_shared<SomeSystem>(),
std::make_shared<SomeSystem>(),
std::make_shared<SomeSystem>(),
std::make_shared<SomeSystem>(),
std::make_shared<SomeSystem>()
});
std::vector<CmdObserver> observers;
for (auto system : systems)
{
observers.push_back(CmdObserver(system, observerId));
observerId++;
}
for (auto& observer : observers)
{
observer.update(CMD1);
}
for (auto& observer : observers)
{
observer.update(CMD2);
}
// let's pretend we do some long operation here
std::this_thread::sleep_for(std::chrono::seconds(1));
for (auto& observer : observers)
{
observer.update(CMD1);
}
}
As you mentioned you are a c++ noob, just make sure you are not making any of these mistakes "https://www.acodersjourney.com/top-20-cplusplus-multithreading-mistakes/"

thread pooling in c++ - how to end the program

I've implemented thread pooling following the answer of Kerrek SB in this question.
I've implemented MPMC queue for the functions and vector threads for the threads.
Everything worked perfectly, except that I don't know how to terminate the program, in the end if I just do thread.join since the thread is still waiting for more tasks to do, it will not join and the main thread will not continue.
Any idea how to end the program correctly?
For completeness, this is my code:
function_pool.h
#pragma once
#include <queue>
#include <functional>
#include <mutex>
#include <condition_variable>
class Function_pool
{
private:
std::queue<std::function<void()>> m_function_queue;
std::mutex m_lock;
std::condition_variable m_data_condition;
public:
Function_pool();
~Function_pool();
void push(std::function<void()> func);
std::function<void()> pop();
};
function_pool.cpp
#include "function_pool.h"
Function_pool::Function_pool() : m_function_queue(), m_lock(), m_data_condition()
{
}
Function_pool::~Function_pool()
{
}
void Function_pool::push(std::function<void()> func)
{
std::unique_lock<std::mutex> lock(m_lock);
m_function_queue.push(func);
// when we send the notification immediately, the consumer will try to
get the lock , so unlock asap
lock.unlock();
m_data_condition.notify_one();
}
std::function<void()> Function_pool::pop()
{
std::unique_lock<std::mutex> lock(m_lock);
m_data_condition.wait(lock, [this]() {return !m_function_queue.empty();
});
auto func = m_function_queue.front();
m_function_queue.pop();
return func;
// Lock will be released
}
main.cpp
#include "function_pool.h"
#include <string>
#include <iostream>
#include <mutex>
#include <functional>
#include <thread>
#include <vector>
Function_pool func_pool;
void example_function()
{
std::cout << "bla" << std::endl;
}
void infinite_loop_func()
{
while (true)
{
std::function<void()> func = func_pool.pop();
func();
}
}
int main()
{
std::cout << "stating operation" << std::endl;
int num_threads = std::thread::hardware_concurrency();
std::cout << "number of threads = " << num_threads << std::endl;
std::vector<std::thread> thread_pool;
for (int i = 0; i < num_threads; i++)
{
thread_pool.push_back(std::thread(infinite_loop_func));
}
//here we should send our functions
func_pool.push(example_function);
for (int i = 0; i < thread_pool.size(); i++)
{
thread_pool.at(i).join();
}
int i;
std::cin >> i;
}
Your problem is located in infinite_loop_func, which is an infinite loop and by result doesn't terminate. I've read the previous answer which suggests throwing an exception, however, I don't like it since exceptions should not be used for the regular control flow.
The best way to solve this is to explicitly deal with the stop condition. For example:
std::atomic<bool> acceptsFunctions;
Adding this to the function pool allows you to clearly have state and to assert that no new functions being added when you destruct.
std::optional<std::function<void()>> Function_pool::pop()
Returning an empty optional (or function in C++14 and before), allows you to deal with an empty queue. You have to, as condition_variable can do spurious wakeups.
With this, m_data_condition.notify_all() can be used to wake all threads.
Finally we have to fix the infinite loop as it doesn't cover overcommitment and at the same time allows you to execute all functions still in the queue:
while (func_pool.acceptsFunctions || func_pool.containsFunctions())
{
auto f = func_pool.pop();
If (!f)
{
func_pool.m_data_condition.wait_for(1s);
continue;
}
auto &function = *f;
function ();
}
I'll leave it up to you to implement containsFunctions() and clean up the code (infinite_loop_func as member function?) Note that with a counter, you could even deal with background task being spawned.
You can always use a specific exception type to signal to infinite_loop_func that it should return...
class quit_worker_exception: public std::exception {};
Then change infinite_loop_func to...
void infinite_loop_func ()
{
while (true) {
std::function<void()> func = func_pool.pop();
try {
func();
}
catch (quit_worker_exception &ex) {
return;
}
}
}
With the above changes you could then use (in main)...
/*
* Enqueue `thread_pool.size()' function objects whose sole job is
* to throw an instance of `quit_worker_exception' when invoked.
*/
for (int i = 0; i < thread_pool.size(); i++)
func_pool.push([](){ throw quit_worker_exception(); });
/*
* Now just wait for each worker to terminate having received its
* quit_worker_exception.
*/
for (int i = 0; i < thread_pool.size(); i++)
thread_pool.at(i).join();
Each instance of infinite_loop_func will dequeue one function object which, when called, throws a quit_worker_exception causing it to return.
Follwoing [JVApen](https://stackoverflow.com/posts/51382714/revisions) suggestion, I copy my code in case anyone will want a working code:
function_pool.h
#pragma once
#include <queue>
#include <functional>
#include <mutex>
#include <condition_variable>
#include <atomic>
#include <cassert>
class Function_pool
{
private:
std::queue<std::function<void()>> m_function_queue;
std::mutex m_lock;
std::condition_variable m_data_condition;
std::atomic<bool> m_accept_functions;
public:
Function_pool();
~Function_pool();
void push(std::function<void()> func);
void done();
void infinite_loop_func();
};
function_pool.cpp
#include "function_pool.h"
Function_pool::Function_pool() : m_function_queue(), m_lock(), m_data_condition(), m_accept_functions(true)
{
}
Function_pool::~Function_pool()
{
}
void Function_pool::push(std::function<void()> func)
{
std::unique_lock<std::mutex> lock(m_lock);
m_function_queue.push(func);
// when we send the notification immediately, the consumer will try to get the lock , so unlock asap
lock.unlock();
m_data_condition.notify_one();
}
void Function_pool::done()
{
std::unique_lock<std::mutex> lock(m_lock);
m_accept_functions = false;
lock.unlock();
// when we send the notification immediately, the consumer will try to get the lock , so unlock asap
m_data_condition.notify_all();
//notify all waiting threads.
}
void Function_pool::infinite_loop_func()
{
std::function<void()> func;
while (true)
{
{
std::unique_lock<std::mutex> lock(m_lock);
m_data_condition.wait(lock, [this]() {return !m_function_queue.empty() || !m_accept_functions; });
if (!m_accept_functions && m_function_queue.empty())
{
//lock will be release automatically.
//finish the thread loop and let it join in the main thread.
return;
}
func = m_function_queue.front();
m_function_queue.pop();
//release the lock
}
func();
}
}
main.cpp
#include "function_pool.h"
#include <string>
#include <iostream>
#include <mutex>
#include <functional>
#include <thread>
#include <vector>
Function_pool func_pool;
class quit_worker_exception : public std::exception {};
void example_function()
{
std::cout << "bla" << std::endl;
}
int main()
{
std::cout << "stating operation" << std::endl;
int num_threads = std::thread::hardware_concurrency();
std::cout << "number of threads = " << num_threads << std::endl;
std::vector<std::thread> thread_pool;
for (int i = 0; i < num_threads; i++)
{
thread_pool.push_back(std::thread(&Function_pool::infinite_loop_func, &func_pool));
}
//here we should send our functions
for (int i = 0; i < 50; i++)
{
func_pool.push(example_function);
}
func_pool.done();
for (unsigned int i = 0; i < thread_pool.size(); i++)
{
thread_pool.at(i).join();
}
}

c++ threading: cv.notify_one() blocks?

I wrote the following structure to implement a simple single producer / multi consumer synchronization. I'm using two integers available_index and consumed_index, access to consumed_index is protected by the condition variable cv. Here's the code:
#include <iostream>
#include <mutex>
#include <condition_variable>
#include <vector>
#include <thread>
struct ParserSync {
std::mutex worker_lock;
std::condition_variable cv;
int consumed_index = -1;
int available_index = -1;
bool exit_flag = false;
int consume_index() {
int ret = -1;
// get worker_lock
std::unique_lock<std::mutex> w_lock(worker_lock);
// wait for exit_flag or new available index
cv.wait(w_lock, [this] { return exit_flag || available_index > consumed_index; });
if (available_index > consumed_index) {
consumed_index++;
ret = consumed_index;
}
// Unlock mutex and notify another thread
w_lock.unlock();
cv.notify_one();
return ret;
}
void publish_index() {
available_index++;
std::cout << "before" << std::endl;
cv.notify_one();
std::cout << "after" << std::endl;
}
void set_exit() {
exit_flag = true;
cv.notify_all();
}
};
I tested my implementation using the following code (just a simple example to show the problem):
void producer(ParserSync &ps){
for (int i=0;i<5000;i++){
ps.publish_index();
std::this_thread::sleep_for(std::chrono::milliseconds(1));
}
ps.set_exit();
std::cout << "Producer finished!" << std::endl;
}
void consumer(ParserSync &ps){
while (true){
int idx = ps.consume_index();
if (idx == -1)
break;
std::this_thread::sleep_for(std::chrono::milliseconds(4));
}
std::cout << "Consumer finished!" << std::endl;
}
int main() {
ParserSync ps{};
const int num_consumers = 4;
std::vector<std::thread> consumer_threads(num_consumers);
// start consumers
for (int i = 0; i < num_consumers; ++i) {
consumer_threads[i] = std::thread{consumer, std::ref(ps)};
}
// start producer
std::thread producer_thread = std::thread{producer, std::ref(ps)};
for (int i = 0; i < num_consumers; ++i) {
consumer_threads[i].join();
}
producer_thread.join();
std::cout << "Program finished" << std::endl;
return 0;
}
I would expect that producer thread produces 5000 indices and exits afterwards, but unfortunately, it gets stuck at some random iteration. I used print statements to find the code line that blocks and tracked it down to cv.notify_one();. This is the (shortened) console output:
...
before
after
before
after
before
Does anyone know why the call to cv.notify_one(); blocks?
I'm using MinGW (x86_64-6.2.0-posix-seh-rt_v5-rev1) on Windows 10.
Thanks in advance!
EDIT:
When compiling the exact same code with Visual Studio, the program works as expected and doesn't lock itself up. Unfortunately, I need to use MinGW for other reasons.

std::thread to std::async makes HUGE performance gain. How it can be possible?

I`ve made a test code between std::thread and std::async.
#include <iostream>
#include <mutex>
#include <fstream>
#include <string>
#include <memory>
#include <thread>
#include <future>
#include <functional>
#include <boost/noncopyable.hpp>
#include <boost/lexical_cast.hpp>
#include <boost/filesystem.hpp>
#include <boost/date_time/posix_time/posix_time.hpp>
#include <boost/asio.hpp>
namespace fs = boost::filesystem;
namespace pt = boost::posix_time;
namespace as = boost::asio;
class Log : private boost::noncopyable
{
public:
void LogPath(const fs::path& filePath) {
boost::system::error_code ec;
if(fs::exists(filePath, ec)) {
fs::remove(filePath);
}
this->ofStreamPtr_.reset(new fs::ofstream(filePath));
};
void WriteLog(std::size_t i) {
assert(*this->ofStreamPtr_);
std::lock_guard<std::mutex> lock(this->logMutex_);
*this->ofStreamPtr_ << "Hello, World! " << i << "\n";
};
private:
std::mutex logMutex_;
std::unique_ptr<fs::ofstream> ofStreamPtr_;
};
int main(int argc, char *argv[]) {
if(argc != 2) {
std::cout << "Wrong argument" << std::endl;
exit(1);
}
std::size_t iter_count = boost::lexical_cast<std::size_t>(argv[1]);
Log log;
log.LogPath("log.txt");
std::function<void(std::size_t)> func = std::bind(&Log::WriteLog, &log, std::placeholders::_1);
auto start_time = pt::microsec_clock::local_time();
////// Version 1: use std::thread //////
// {
// std::vector<std::shared_ptr<std::thread> > threadList;
// threadList.reserve(iter_count);
// for(std::size_t i = 0; i < iter_count; i++) {
// threadList.push_back(
// std::make_shared<std::thread>(func, i));
// }
//
// for(auto it: threadList) {
// it->join();
// }
// }
// pt::time_duration duration = pt::microsec_clock::local_time() - start_time;
// std::cout << "Version 1: " << duration << std::endl;
////// Version 2: use std::async //////
start_time = pt::microsec_clock::local_time();
{
for(std::size_t i = 0; i < iter_count; i++) {
auto result = std::async(func, i);
}
}
duration = pt::microsec_clock::local_time() - start_time;
std::cout << "Version 2: " << duration << std::endl;
////// Version 3: use boost::asio::io_service //////
// start_time = pt::microsec_clock::local_time();
// {
// as::io_service ioService;
// as::io_service::strand strand{ioService};
// {
// for(std::size_t i = 0; i < iter_count; i++) {
// strand.post(std::bind(func, i));
// }
// }
// ioService.run();
// }
// duration = pt::microsec_clock::local_time() - start_time;
// std::cout << "Version 3: " << duration << std::endl;
}
With 4-core CentOS 7 box(gcc 4.8.5), Version 1(using std::thread) is about 100x slower compared to other implementations.
Iteration Version1 Version2 Version3
100 0.0034s 0.000051s 0.000066s
1000 0.038s 0.00029s 0.00058s
10000 0.41s 0.0042s 0.0059s
100000 throw 0.026s 0.061s
Why threaded version is so slow? I thought each thread won't take long time to complete Log::WriteLog function.
The function may never be called. You are not passing an std::launch policy in Version 2, so you are relying on the default behavior of std::async (emphasis mine):
Behaves the same as async(std::launch::async | std::launch::deferred, f, args...). In other words, f may be executed in another thread or it may be run synchronously when the resulting std::future is queried for a value.
Try re-running your benchmark with this minor change:
auto result = std::async(std::launch::async, func, i);
Alternatively, you could call result.wait() on each std::future in a second loop, similar to how you call join() on all of the threads in Version 1. This forces evaluation of the std::future.
Note that there is a major, unrelated, problem with this benchmark. func immediately acquires a lock for the full duration of the function call, which makes parallelism impossible. There is no advantage to using threads here - I suspect that it will be significantly slower (due to thread creation and locking overhead) than a serial implementation.