Thread Pool: Block destruction until all work is done - c++

I have the following thread pool implementation:
template<typename... event_args>
class thread_pool{
public:
using handler_type = std::function<void(event_args...)>;
thread_pool(handler_type&& handler, std::size_t N = 4, bool finish_before_exit = true) : _handler(std::forward<handler_type&&>(handler)),_workers(N),_running(true),_finish_work_before_exit(finish_before_exit)
{
for(auto&& worker: _workers)
{
//worker function
worker = std::thread([this]()
{
while (_running)
{
//wait for work
std::unique_lock<std::mutex> _lk{_wait_mutex};
_cv.wait(_lk, [this]{
return !_events.empty() || !_running;
});
//_lk unlocked
//check to see why we woke up
if (!_events.empty()) {//was it new work
std::unique_lock<std::mutex> _readlk(_queue_mutex);
auto data = _events.front();
_events.pop();
_readlk.unlock();
invoke(std::move(_handler), std::move(data));
_cv.notify_all();
}else if(!_running){//was it a signal to exit
break;
}
//or was it spurious and we should just ignore it
}
});
//end worker function
}
}
~thread_pool()
{
if(_finish_work_before_exit)
{//block destruction until all work is done
std::condition_variable _work_remains;
std::mutex _wr;
std::unique_lock<std::mutex> lk{_wr};
_work_remains.wait(lk,[this](){
return _events.empty();
});
}
_running=false;
//let all workers know to exit
_cv.notify_all();
//attempt to join all workers
for(auto&& _worker: _workers)
{
if(_worker.joinable())
{
_worker.join();
}
}
}
handler_type& handler()
{
return _handler;
}
void propagate(event_args&&... args)
{
//lock before push
std::unique_lock<std::mutex> _lk(_queue_mutex);
{
_events.emplace(std::make_tuple(args...));
}
_lk.unlock();//explicit unlock
_cv.notify_one();//let worker know that data is available
}
private:
bool _finish_work_before_exit;
handler_type _handler;
std::queue<std::tuple<event_args...>> _events;
std::vector<std::thread> _workers;
std::atomic_bool _running;
std::condition_variable _cv;
std::mutex _wait_mutex;
std::mutex _queue_mutex;
//helpers used to unpack tuple into function call
template<typename Func, typename Tuple, std::size_t... I>
auto invoke_(Func&& func, Tuple&& t, std::index_sequence<I...>)
{
return func(std::get<I>(std::forward<Tuple&&>(t))...);
}
template<typename Func, typename Tuple, typename Indicies = std::make_index_sequence<std::tuple_size<Tuple>::value>>
auto invoke(Func&& func, Tuple&& t)
{
return invoke_(std::forward<Func&&>(func), std::forward<Tuple&&>(t), Indicies());
}
};
I recently added this section to the destructor:
if(_finish_work_before_exit)
{//block destruction until all work is done
std::condition_variable _work_remains;
std::mutex _wr;
std::unique_lock<std::mutex> lk{_wr};
_work_remains.wait(lk,[this](){
return _events.empty();
});
}
The intent was to have the destructor block until the work queue was fully consumed.
But it seems to put the program into deadlock. aAll of the work does get completed, but the wait does not seem to end when the work is done.
Consider this example main:
std::mutex writemtx;
thread_pool<int> pool{
[&](int i){
std::unique_lock<std::mutex> lk{writemtx};
std::cout<<i<<" : "<<std::this_thread::get_id()<<std::endl;
},
8//threads
};
for (int i=0; i<8192; ++i) {
pool.propagate(std::move(i));
}
How can I have the destructor wait for the completion of the work without causing deadlock?

The reason your code is deadlocked is that _work_remains is a condition variable which is not "notified" by any part of your code. You would need to make that a class attribute and have it notified by any thread that picks up the last event from the _events.

Related

Where is std::this_thread for jthread?

Can't figure out where is std::this_thread for jthread?
I have a function that theoretically makes a jthread sleep until a cancellation is requested:
template<typename Rep, typename Period>
void sleep_for(const std::chrono::duration<Rep, Period>& d, const std::stop_token& token)
{
std::condition_variable cv;
std::mutex mutex;
std::unique_lock<std::mutex> lock{ mutex };
std::stop_callback stop_wait{ token, [&cv]()
{
cv.notify_one(); }
};
cv.wait_for(lock, d, [&token]()
{
return token.stop_requested();
});
}
How do I call it on jthread?
Theoretically the program below exits within 1 second:
int main()
{
std::jthread t([]()
{
//where do I get `stop_token`?
sleep_for(std::chrono::seconds(5), std::this_jthread::get_stop_token());
});
std::this_thread::sleep_for(std::chrono::seconds(1));
t.request_stop();
return 0;
}
The jthread constructor accepts a function that takes a std::stop_token
as its first argument, which will be passed in by the jthread from its
internal stop_source.
Here is an example:
std::jthread t([](std::stop_token stop_token)
{
while(!stop_token.stop_requested()) {
//Process data...
std::this_thread::sleep_for(std::chrono::seconds(5));
}
});
std::this_thread::sleep_for(std::chrono::seconds(1));
t.request_stop();
live on Godbolt.

MPSC Queue Design Issue (thread cannot join)

I have the following MpscQueue implementation
EDIT: added an is_running atomic, but problem still persists.
template<typename T>
class MpscQueue {
public:
MpscQueue() = default;
MpscQueue(MpscQueue&&) = delete;
bool wait_and_pop(T& val, std::atomic<bool>& is_running) {
std::unique_lock<std::mutex> lock(mutex);
cond_var.wait(lock,
[this, &is_running]{ return queue.size() > 0 || !is_running; });
if (!is_running) return false;
val = std::move(queue.front());
queue.pop();
return true;
}
template<typename U>
void push(U&& val) {
auto const is_empty = [&]{
auto const lock = std::unique_lock(mutex);
auto const res = queue.empty();
queue.push(std::forward<U>(val));
return res;
}();
if (is_empty) cond_var.notify_one();
}
private:
std::queue<T> queue;
std::mutex mutex;
std::condition_variable cond_var;
};
I am attempting to pop a value like this
// At some point earlier
MpscQueue<Message> mailbox;
std::atomic<boo> is_running{true}; // Is set to false at a later time
void run_once() {
Message m;
mailbox.wait_and_pop(m, is_running);
// process_message(std::move(m));
}
The above code run_once is being fed into the thread constructor. My issue is that if I attempt to join the thread that this is on, it gets stuck in the condition variable wait condition. What would be the best way to solve this? I tried passing an atomic by reference as a parameter into wait_and_pop but it did not seem to be updating and also did not seem like a smart implementation decision.

Add a std::packaged_task to an existing thread?

Is there an standard way to add a std::packaged_task to an existing thread? There's a nontrivial amount of overhead that must happen before the task is run, so I want to do that once, then keep the thread running and waiting for tasks to execute. I want to be able to use futures so I can optionally get the result of the task and catch exceptions.
My pre-C++11 implementation requires my tasks to inherit from an abstract base class with a Run() method (a bit of a pain, can't use lambdas), and having a std::deque collection of those that I add to in the main thread and dequeue from in the worker thread. I have to protect that collection from simultaneous access and provide a signal to the worker thread that there's something to do so it isn't spinning or sleeping. Enqueing something returns a "result" object with a synchronization object to wait for the task to complete, and a result value. It all works well but it's time for an upgrade if there's something better.
Here is a toy thread pool:
template<class T>
struct threaded_queue {
using lock = std::unique_lock<std::mutex>;
void push_back( T t ) {
{
lock l(m);
data.push_back(std::move(t));
}
cv.notify_one();
}
boost::optional<T> pop_front() {
lock l(m);
cv.wait(l, [this]{ return abort || !data.empty(); } );
if (abort) return {};
auto r = std::move(data.back());
data.pop_back();
return std::move(r);
}
void terminate() {
{
lock l(m);
abort = true;
data.clear();
}
cv.notify_all();
}
~threaded_queue()
{
terminate();
}
private:
std::mutex m;
std::deque<T> data;
std::condition_variable cv;
bool abort = false;
};
struct thread_pool {
thread_pool( std::size_t n = 1 ) { start_thread(n); }
thread_pool( thread_pool&& ) = delete;
thread_pool& operator=( thread_pool&& ) = delete;
~thread_pool() = default; // or `{ terminate(); }` if you want to abandon some tasks
template<class F, class R=std::result_of_t<F&()>>
std::future<R> queue_task( F task ) {
std::packaged_task<R()> p(std::move(task));
auto r = p.get_future();
tasks.push_back( std::move(p) );
return r;
}
template<class F, class R=std::result_of_t<F&()>>
std::future<R> run_task( F task ) {
if (threads_active() >= total_threads()) {
start_thread();
}
return queue_task( std::move(task) );
}
void terminate() {
tasks.terminate();
}
std::size_t threads_active() const {
return active;
}
std::size_t total_threads() const {
return threads.size();
}
void clear_threads() {
terminate();
threads.clear();
}
void start_thread( std::size_t n = 1 ) {
while(n-->0) {
threads.push_back(
std::async( std::launch::async,
[this]{
while(auto task = tasks.pop_front()) {
++active;
try{
(*task)();
} catch(...) {
--active;
throw;
}
--active;
}
}
)
);
}
}
private:
std::vector<std::future<void>> threads;
threaded_queue<std::packaged_task<void()>> tasks;
std::atomic<std::size_t> active;
};
copied from another answer of mine.
A thread_pool with 1 thread matches your description pretty much.
The above is only a toy, a real thread pool I'd replace the std::packaged_task<void()> with a move_only_function<void()>, which is all I use it for. (A packaged_task<void()> can hold a packaged_task<R()> amusingly, if inefficiencly).
You will have to reason about shutdown and make a plan. The above code locks up if you try to shut it down without first clearing the threads.

Thread pool (presumably) locking issue with condition variable and mutex

I'm working on a thread pool and ran into a weird issue regarding condition variables and mutexes. I suspect there might be a locking problem since it sometimes works, sometimes it doesn't. This is the relevant part of the code (removed non-relevant bits):
class ThreadPool {
private:
std::atomic<bool> running;
std::atomic<size_t> unfinished_tasks;
std::queue<std::function<void(void)>> task_queue;
std::condition_variable cv_work;
std::mutex mtx_queue;
std::vector<std::thread> threads;
public:
ThreadPool(size_t num_threads = std::thread::hardware_concurrency());
~ThreadPool();
template<class T, class Fn>
std::future<T> queueTask(Fn&& fn);
};
ThreadPool::ThreadPool(size_t num_threads) :
running(true), unfinished_tasks(0) {
auto thread_loop = [&] {
while (running.load()) {
std::unique_lock<std::mutex> lock(mtx_queue);
if (!task_queue.empty()) {
auto work = task_queue.front();
task_queue.pop();
lock.unlock();
work();
unfinished_tasks--;
} else {
std::cout << std::this_thread::get_id() << " going to sleep..." << std::endl;
cv_work.wait(lock);
}
}};
threads.reserve(num_threads);
for (size_t i = 0; i < num_threads; i++) {
threads.push_back(std::thread(thread_loop));
}
}
template<class T, class Fn>
inline std::future<T> ThreadPool::queueTask(Fn&& fn) {
// func = lambda containing packaged task with fn
mtx_queue.lock();
task_queue.push(func);
mtx_queue.unlock();
unfinished_tasks++;
cv_work.notify_one();
return future;
}
As soon as I comment out the line containing the debug output, adding lots of small tasks to the thread pool will make it lock up at some point, with the debug output in place, it will finish all tasks properly. I'm not really sure where the issue could be here.
You have a race condition. queueTask can notify cv_work before your thread function is waiting. Don't unlock mtx_queue until after you call cv_work.notify_one().

C++ packaged_task hang in Concurrent Wrapper

I am implementing a concurrent wrapper as introduced by Herb Sutter presented in his talk "C++ and Beyond 2012".
template <typename T>
class ConcurrentWrapper {
private:
std::deque<std::unique_ptr<std::function<void()>>> _tasks;
std::mutex _mutex;
std::condition_variable _cond;
T _object;
std::thread _worker;
std::atomic<bool> _done {false};
public:
template <typename... ArgsT>
ConcurrentWrapper(ArgsT&&... args) :
_object {std::forward<ArgsT>(args)...},
_worker {
[&]() {
typename decltype(_tasks)::value_type task;
while(!_done) {
{
std::unique_lock<std::mutex> lock(_mutex);
while(_tasks.empty()) {
_cond.wait(lock);
}
task = std::move(_tasks.front());
_tasks.pop_front();
}
(*task)();
}
}
} {
}
~ConcurrentWrapper() {
{
std::unique_lock<std::mutex> lock(_mutex);
_tasks.push_back(std::make_unique<std::function<void()>>(
[&](){_done = true;}
));
}
_cond.notify_one();
_worker.join();
}
template <typename F, typename R = std::result_of_t<F(T&)>>
std::future<R> operator()(F&& f) {
std::packaged_task<R(T&)> task(std::forward<F>(f));
auto fu = task.get_future();
{
std::unique_lock<std::mutex> lock(_mutex);
_tasks.push_back(std::make_unique<std::function<void()>>(
[this, task=MoveOnCopy<decltype(task)>(std::move(task))]() {
task.object(this->_object);
}
));
}
_cond.notify_one();
return fu;
}
};
Basically, the idea is to wrap an object and provide thread-safe access in FIFO order using operation (). However, in some runs (not always happen), the following program hanged:
ConcurrentWrapper<std::vector<int>> results;
results(
[&](std::vector<T>& data) {
std::cout << "sorting...\n";
std::sort(data.begin(), data.end());
std::cout << "done ...\n";
EXPECT_EQ(data, golden);
}
).get();
However, the program work correctly without explicitly calling get() method.
results(
[&](std::vector<T>& data) {
std::cout << "sorting...\n";
std::sort(data.begin(), data.end());
std::cout << "done ...\n";
EXPECT_EQ(data, golden);
}
); // Function correctly without calling get
What could the be problem? Did I implement something wrong? I noticed a posted here saying that "a packaged_task needs to be invoked before you call f.get(), otherwise you program will freeze as the future will never become ready." Is this true? If yes, how can I get this problem solved?
I was compiling the code using -std=c++1z -pthread with G++ 6.1