Thread Pool join hangs when oversubscribing threads - c++

I'm having an issue with a thread hanging when joining my threads in a thread pool I have created. The issue only occurs if I loop over the thread pool execution a large number of times.
I have a thread pool class like the following;
#include <queue>
#include <mutex>
#include <condition_variable>
#include <functional>
#include <atomic>
#include <vector>
#include <thread>
#include <iostream>
class ThreadPool
{
public:
ThreadPool()
{
m_shutdown.store(false, std::memory_order_relaxed);
createThreads(1);
}
ThreadPool(std::size_t numThreads)
{
m_shutdown.store(false, std::memory_order_relaxed);
createThreads(numThreads);
}
void add_job(std::function<void()> new_job)
{
{
std::scoped_lock<std::mutex> lock(m_jobMutex);
m_jobQueue.push(new_job);
}
m_notifier.notify_one();
}
void waitFinished()
{
{
std::unique_lock<std::mutex> lock(m_jobMutex);
m_finished.wait(lock, [this] {return m_jobQueue.empty(); }); //&& busy == 0
}
m_shutdown.store(true, std::memory_order_relaxed);
m_notifier.notify_all();
for (std::thread& th : m_threads)
{
th.join();
}
m_threads.clear();
}
private:
using Job = std::function<void()>;
std::vector<std::thread> m_threads;
std::queue<Job> m_jobQueue;
std::condition_variable m_notifier;
std::condition_variable m_finished;
std::mutex m_jobMutex;
std::atomic<bool> m_shutdown;
void createThreads(std::size_t numThreads)
{
// Settup threads
m_threads.reserve(numThreads);
for (int i = 0; i != numThreads; ++i)
{
m_threads.emplace_back(std::thread([this]()
{
// Infinite loop to consume tasks from queue and execute
while (true)
{
Job job;
{
std::unique_lock<std::mutex> lock(m_jobMutex);
m_notifier.wait(lock, [this] {return !m_jobQueue.empty() || m_shutdown.load(std::memory_order_relaxed); });
if (m_shutdown.load(std::memory_order_relaxed) || m_jobQueue.empty())
{
break;
}
job = std::move(m_jobQueue.front());
m_jobQueue.pop();
}
job();
m_finished.notify_one();
}
}));
}
}
};
I run this in a simple manner, like the following;
void threader (int x) {
std::cout<<"In threaded function: "<<x<<std::endl;
}
int main()
{
//outer loop
for (auto i = 0; i < 10000; i++) {
//Thread pool
int num_threads = std::thread::hardware_concurrency();
ThreadPool test_pool(num_threads);
// Assign work
for (int j = 0; j < 48; j++) {
test_pool.add_job(std::bind(threader, j));
}
test_pool.waitFinished();
std::cout<<"Thread Pool Done"<<std::endl;
}
}
After a number of outer loop iterations the join in the waitFinished hangs for a thread. The error only seems to occur after a number of iterations of the outer loop large enough. I have investigated this and can see that the threader function get's called 48 times, so looks like all threads complete. It seems to be the joining of threads in the waitFinished function of the ThreadPool that is causing the hang.
Is there something obvious I'm doing wrong ?
Many thanks!

Related

How I can run two threads parallelly one by one?

In C++ how i can write two parallel threads which will work one by one.For example in below code it need to print 0 t 100 sequentially.In below code the numbers are printing ,but all are not sequential.I need to print like 1,2,3,4,5,6.....99.If any body know , try to add with sample code also.
#pragma once
#include <mutex>
#include <iostream>
#include <vector>
#include <thread>
#include <condition_variable>
using namespace std;
class CuncurrentThread
{
public:
mutex mtx;
condition_variable cv;
static bool ready;
static bool processed;
void procThread1()
{
for (int i = 0; i < 100; i += 2)
{
unique_lock<mutex> lk(mtx);
cv.notify_one();
if(lk.owns_lock())
cv.wait(lk);
cout << "procThread1 : " << i << "\n";
lk.unlock();
cv.notify_one();
}
};
void procThread2()
{
for (int i = 1; i < 100; i += 2)
{
unique_lock<mutex> lk(mtx);
cv.notify_one();
if (lk.owns_lock())
cv.wait(lk);
cout << "procThread2 : " << i << "\n";
lk.unlock();
cv.notify_one();
}
};
static void ThreadDriver(CuncurrentThread* thr)
{
vector<thread> threads;
threads.push_back(thread(&CuncurrentThread::procThread1, thr));
threads.push_back(thread(&CuncurrentThread::procThread2, thr));
for (auto& thread : threads)
thread.join();
};
};
bool CuncurrentThread::ready = false;
int main()
{
CuncurrentThread tr;
CuncurrentThread::ThreadDriver(&tr);
}
Assuming you have a valid use case for using two threads like this, here is an example. I prefer using std::async over std::thread it has better abstraction and information exchange with the main thread.
The example is written for 2 threads but can easily be changed to more threads.
Live demo here : https://onlinegdb.com/eQex9o_nMz
#include <future>
#include <condition_variable>
#include <iostream>
// Setup a helper class that sets up
// the three things needed to correctly
// use a condition variable
// 1) a mutex
// 2) a variable
// 3) a condition_variable (which is more of a signal then a variable)
//
// also give this class some functions
// so the the code becomes more self-explaining
class thread_switcher_t
{
public:
void thread1_wait_for_turn()
{
std::unique_lock<std::mutex> lock{ m_mtx };
m_cv.wait(lock, [&] {return (thread_number==0); });
}
void thread2_wait_for_turn()
{
std::unique_lock<std::mutex> lock{ m_mtx };
m_cv.wait(lock, [&] {return (thread_number==1); });
}
void next_thread()
{
std::unique_lock<std::mutex> lock{ m_mtx };
thread_number = (thread_number + 1) % 2;
m_cv.notify_all();
}
private:
std::size_t thread_number{ 0 };
std::mutex m_mtx;
std::condition_variable m_cv;
};
int main()
{
thread_switcher_t switcher;
auto future1 = std::async(std::launch::async, [&]
{
for(std::size_t n = 0; n <= 100; n+=2)
{
switcher.thread1_wait_for_turn();
std::cout << "thread 1 : " << n << "\n";
switcher.next_thread();
}
});
auto future2 = std::async(std::launch::async, [&]
{
for (std::size_t n = 1; n <= 100; n += 2)
{
switcher.thread2_wait_for_turn();
std::cout << "thread 2 : " << n << "\n";
switcher.next_thread();
}
});
future1.get();
future2.get();
return 0;
}
You can use ready variable as a condition for condition variable.
#include <mutex>
#include <iostream>
#include <vector>
#include <thread>
#include <condition_variable>
using namespace std;
class CuncurrentThread
{
public:
mutex mtx;
condition_variable cv;
static bool ready;
//static bool processed;
void procThread1()
{
for (int i = 0; i < 100; i += 2)
{
unique_lock<mutex> lk(mtx);
// cv.notify_one();
// if(lk.owns_lock())
// wait until this condition is true i.e. until ready is false
cv.wait(lk, [&]() { return !ready; });
cout << "procThread1 : " << i << "\n";
// set ready to true and notify waiting thread
ready = true;
lk.unlock();
cv.notify_one();
}
};
void procThread2()
{
for (int i = 1; i < 100; i += 2)
{
unique_lock<mutex> lk(mtx);
// cv.notify_one();
// if (lk.owns_lock())
// wait until this condition is true i.e. until ready is true
cv.wait(lk, [&]() { return ready; });
cout << "procThread2 : " << i << "\n";
// set ready to false and notify waiting thread
ready = false;
lk.unlock();
cv.notify_one();
}
};
static void ThreadDriver(CuncurrentThread* thr)
{
vector<thread> threads;
threads.push_back(thread(&CuncurrentThread::procThread1, thr));
threads.push_back(thread(&CuncurrentThread::procThread2, thr));
for (auto& thread : threads)
thread.join();
};
};
bool CuncurrentThread::ready = false;
int main()
{
CuncurrentThread tr;
CuncurrentThread::ThreadDriver(&tr);
}
Link where I tested this: https://godbolt.org/z/4jEns16oq

Sync queue between two threads

This is a simple program which has a function start() which waits for user to enter something(using infinite loop) and stores it in queue. start() runs in a separate thread. After user enters some value, the size of queue remains zero in main. How can the queue be synchronized?
code: source.cpp
#include <iostream>
#include "kl.h"
using namespace std;
int main()
{
std::thread t1(start);
while (1)
{
if (q.size() > 0)
{
std::cout << "never gets inside this if\n";
std::string first = q.front();
q.pop();
}
}
t1.join();
}
code: kl.h
#include <queue>
#include <iostream>
#include <string>
void start();
static std::queue<std::string> q;
code: kl.cpp
#include "kl.h"
using namespace std;
void start()
{
char i;
string str;
while (1)
{
for (i = 0; i <= 1000; i++)
{
//other stuff and str input
q.push(str);
}
}
}
Your code contains a race - by me it crashed; both threads are potentially modifying a shared queue. (Also, you're looping with char i for values up to 1000 - not a good idea, probably.)
You should protect your shared queue with a std::mutex, and use a std::condition_variable to notify that there is a reason to check the queue.
Specifically, you should consider the following (which is very common for your case of a producer consumer):
Access the queue only when holding the mutex.
Use the condition variable to notify that you've pushed something into it.
Use the condition variable to specify a condition on when there's a point to continue processing.
Here is a rewrite of your code:
#include <iostream>
#include <queue>
#include <thread>
#include <condition_variable>
#include <mutex>
using namespace std;
std::queue<std::string> q;
std::mutex m;
std::condition_variable cv;
void start()
{
string str;
for (std::size_t i = 0; i <= 1000; i++) {
//other stuff and str input
std::cout << "here" << std::endl;
std::unique_lock<std::mutex> lk(m);
q.push(str);
lk.unlock();
cv.notify_one();
}
}
int main()
{
std::thread t1(start);
for (std::size_t i = 0; i <= 1000; i++)
{
std::unique_lock<std::mutex> lk(m);
cv.wait(lk, []{return !q.empty();});
std::string first = q.front();
q.pop();
}
t1.join();
}
My synced queue class example and its usage:
template<typename T>
class SyncQueue
{
std::queue<T> m_Que;
std::mutex m_Lock;
std::condition_variable m_ConVar;
public:
void enque(T item)
{
std::unique_lock<std::mutex> lock(m_Lock);
m_Que.push(item);
lock.unlock();
m_ConVar.notify_all();
}
T deque()
{
std::unique_lock<std::mutex> lock(m_Lock);
do
{
m_ConVar.wait(lock);
} while(m_Que.size() == 0); // extra check from spontaneous notifications
auto ret = m_Que.front();
m_Que.pop();
return ret;
}
};
int main()
{
using namespace std::chrono_literals;
SyncQueue<int> sq;
std::thread consumer([&sq]()
{
std::cout << "consumer" << std::endl;
for(;;)
{
std::cout << sq.deque() << std::endl;
}
});
std::thread provider([&sq]()
{
std::this_thread::sleep_for(1s);
sq.enque(1);
std::this_thread::sleep_for(3s);
sq.enque(2);
std::this_thread::sleep_for(5s);
sq.enque(3);
});
consumer.join();
return 0;
}
/* Here I have a code snippate with Separate class for
Producing and Consuming along with buffer class */
#include <iostream>
#include <mutex>
#include <condition_variable>
#include <thread>
#include <deque>
#include <vector>
using namespace std;
mutex _mutex_1,_mutex_2;
condition_variable cv;
template <typename T>
class Queue
{
deque<T> _buffer;
const unsigned int max_size = 10;
public:
Queue() = default;
void push(const T& item)
{
while(1)
{
unique_lock<mutex> locker(_mutex_1);
cv.wait(locker,[this](){ return _buffer.size() < max_size; });
_buffer.push_back(item);
locker.unlock();
cv.notify_all();
return;
}
}
T pop()
{
while(1)
{
unique_lock<mutex> locker(_mutex_1);
cv.wait(locker,[this](){ return _buffer.size() > 0; });
int back = _buffer.back();
_buffer.pop_back();
locker.unlock();
cv.notify_all();
return back;
}
}
};
class Producer
{
Queue<int>* _buffer;
public:
Producer(Queue<int>* _buf)
{
this->_buffer = _buf;
}
void run()
{
while(1)
{
auto num = rand()%100;
_buffer->push(num);
_mutex_2.lock();
cout<<"Produced:"<<num<<endl;
this_thread::sleep_for(std::chrono::milliseconds(50));
_mutex_2.unlock();
}
}
};
class Consumer
{
Queue<int>* _buffer;
public:
Consumer(Queue<int>* _buf)
{
this->_buffer = _buf;
}
void run()
{
while(1)
{
auto num = _buffer->pop();
_mutex_2.lock();
cout<<"Consumed:"<<num<<endl;
this_thread::sleep_for(chrono::milliseconds(50));
_mutex_2.unlock();
}
}
};
void client()
{
Queue<int> b;
Producer p(&b);
Consumer c(&b);
thread producer_thread(&Producer::run, &p);
thread consumer_thread(&Consumer::run, &c);
producer_thread.join();
consumer_thread.join();
}
int main()
{
client();
return 0;
}

What is the best way to realize a synchronization barrier between threads

Having several threads running I need to guaranty that every of my threads reached a certain point before proceeding. I need to implement a kind of barrier. Consider a function func which can be run from several threads:
void func()
{
operation1();
// wait till all threads reached this point
operation2();
}
What is best way to realise this barrier using C++ 11 and VS12, considering boost if needed.
You could use boost::barrier
Unfortunately, the thread barrier concept itself is not part of c++11 or visual c++.
In pure c++11 you could use a condition variable and a counter.
#include <iostream>
#include <condition_variable>
#include <thread>
#include <chrono>
class my_barrier
{
public:
my_barrier(int count)
: thread_count(count)
, counter(0)
, waiting(0)
{}
void wait()
{
//fence mechanism
std::unique_lock<std::mutex> lk(m);
++counter;
++waiting;
cv.wait(lk, [&]{return counter >= thread_count;});
cv.notify_one();
--waiting;
if(waiting == 0)
{
//reset barrier
counter = 0;
}
lk.unlock();
}
private:
std::mutex m;
std::condition_variable cv;
int counter;
int waiting;
int thread_count;
};
int thread_waiting = 3;
my_barrier barrier(3);
void func1()
{
std::this_thread::sleep_for(std::chrono::seconds(3));
barrier.wait();
std::cout << "I have awakened" << std::endl;
}
void func2()
{
barrier.wait();
std::cout << "He has awakened!!" << std::endl;
}
int main() {
std::thread t1(func1);
std::thread t2(func2);
std::thread t3(func2);
t1.join();
t2.join();
t3.join();
}
Each thread wait till a predicate is met. The last thread will make the predicate valid, and allow the waiting threads to proceed. If you want to reuse
the barrier (for instance call the function multiple times), you need another
variable to reset the counter.
This current implementation is limited. A calling func();func(); twice may not make threads wait the second time.
An option could be the use of OpenMP framework.
#include <omp.h>
void func()
{
#pragma omp parallel num_threads(number_of_threads)
{
operation1();
#pragma omp barrier
// wait till all threads reached this point
operation2();
}
}
Compile the code with -fopenmp
Solution:
#include <cassert>
#include <condition_variable>
class Barrier
{
public:
Barrier(std::size_t nb_threads)
: m_mutex(),
m_condition(),
m_nb_threads(nb_threads)
{
assert(0u != m_nb_threads);
}
Barrier(const Barrier& barrier) = delete;
Barrier(Barrier&& barrier) = delete;
~Barrier() noexcept
{
assert(0u == m_nb_threads);
}
Barrier& operator=(const Barrier& barrier) = delete;
Barrier& operator=(Barrier&& barrier) = delete;
void Wait()
{
std::unique_lock< std::mutex > lock(m_mutex);
assert(0u != m_nb_threads);
if (0u == --m_nb_threads)
{
m_condition.notify_all();
}
else
{
m_condition.wait(lock, [this]() { return 0u == m_nb_threads; });
}
}
private:
std::mutex m_mutex;
std::condition_variable m_condition;
std::size_t m_nb_threads;
};
Example:
#include <chrono>
#include <iostream>
#include <thread>
Barrier barrier(2u);
void func1()
{
std::this_thread::sleep_for(std::chrono::seconds(3));
barrier.Wait();
std::cout << "t1 awakened" << std::endl;
}
void func2()
{
barrier.Wait();
std::cout << "t2 awakened" << std::endl;
}
int main()
{
std::thread t1(func1);
std::thread t2(func2);
t1.join();
t2.join();
return 0;
}
Try It Online: WandBox

std::condition_variable calling notify_all more than once

First, let me introduce you to my problem.
My code looks like this:
#include <iostream>
#include <thread>
#include <condition_variable>
std::mutex mtx;
std::mutex cvMtx;
std::mutex mtx2;
bool ready{false};
std::condition_variable cv;
int threadsFinishedCurrentLevel{0};
void tfunc() {
for(int i = 0; i < 5; i++) {
//do something
for (int j = 0; j < 10000; j++) {
std::cout << j << std::endl;
}
//this is i-th level
mtx2.lock();
threadsFinishedCurrentLevel++;
if (threadsFinishedCurrentLevel == 2) {
//this is last thread in current level
threadsFinishedCurrentLevel = 0;
cvMtx.unlock();
}
mtx2.unlock();
{
//wait for notify
unique_lock<mutex> lck(mtx);
while (!ready) cv_.wait(lck);
}
}
}
int main() {
cvMtx.lock(); //init
std::thread t1(tfunc);
std::thread t2(tfunc);
for (int i = 0; i < 5; i++) {
cvMtx.lock();
{
unique_lock<mutex> lck(mtx);
ready = true;
cv.notify_all();
}
}
t1.join();
t2.join();
return 0;
}
I have 2 threads. My computation consists of levels(for this example, lets say we have 5 levels). On the same level, computation can be divided to threads. Each thread then calculates part of a problem. When i want to step to the next(higher) level, lower level must be first done. So my idea is something like this. When last thread on the current level is done, it unlocks main thread, so it can notify all of the threads to continue to next level. But this notify has to be called more then once. Because there are plenty of these levels. Can this condition_variable be restarted or something? Or do I need for each level one condition_variable? So for example, when i have 1000 levels, i need to allocate dynamically 1000x condition_variable?
Is it just me or you are trying to block the main thread with a mutex (which is your way of trying to notify it when all threads are done?), I mean that's not the task of a mutex. That's where the condition variable should be used.
// New condition_variable, to nofity main thread when child is done with level
std::condition_variable cv2;
// When a child is done, it will update this counter
int counter = 0; // This is already protected by cvMtx, otherwise it could be atomic.
// This is to sync cout
std::mutex cout_mutex;
void tfunc()
{
for (int i = 0; i < 5; i++)
{
{
std::lock_guard<std::mutex> l(cout_mutex);
std::cout << "Level " << i + 1 << " " << std::this_thread::get_id() << std::endl;
}
{
std::lock_guard<std::mutex> l(cvMtx);
counter++; // update counter &
}
cv2.notify_all(); // notify main thread we are done.
{
//wait for notify
unique_lock<mutex> lck(mtx);
cv.wait(lck);
// Note that I've removed the "ready" flag here
// That's because u would need multiple ready flags to make that work
}
}
}
int main()
{
std::thread t1(tfunc);
std::thread t2(tfunc);
for (int i = 0; i < 5; i++)
{
{
unique_lock<mutex> lck(cvMtx);
// Wait takes a predicate which u can take advantage of
cv2.wait(lck, [] { return (counter == 2); });
counter = 0;
// This thread will get notified multiple times
// But it only will wake up when counter matches 2
// Which equals to how many threads we've created.
}
// Sleeping a bit to know the code is working
std::this_thread::sleep_for(std::chrono::milliseconds(1000));
// Wake up all threds and continue to next level.
unique_lock<mutex> lck(mtx);
cv.notify_all();
}
t1.join();
t2.join();
return 0;
}
The synchronization can be done with a single counter, threads increment the counter under lock and check for the counter to reach a multiple of the number of concurrent threads. This greatly simplifies the logic. I've made this change and also grouped the shared variables into a class, and provided member functions to access them. To avoid false sharing I've ensured that variables that are read-only are separate from those that are read-write by the threads, and also separated read-write variables by usage. The use of global variables is discouraged, see C++ Core Guidelines for this and other good advice.
The simplified code follows, you can see it live in ideone. Note: it looks like there isn't true concurrency in ideone, you'll have to run this on a multi-core environment to actually test hardware concurrency.
//http://stackoverflow.com/questions/35318942/stdcondition-variable-calling-notify-all-more-than-once
#include <iostream>
#include <functional>
#include <thread>
#include <mutex>
#include <vector>
#include <condition_variable>
static constexpr size_t CACHE_LINE_SIZE = 64;
static constexpr size_t NTHREADS = 2;
static constexpr size_t NLEVELS = 5;
static constexpr size_t NITERATIONS = 100;
class Synchronize
{
alignas(CACHE_LINE_SIZE) // read/write while threads are busy working
std::mutex mtx_std_cout;
alignas(CACHE_LINE_SIZE) // read/write while threads are synchronizing at level
std::mutex cvMtx;
std::condition_variable cv;
size_t threadsFinished{0};
alignas(CACHE_LINE_SIZE) // read-only parameters
const size_t n_threads;
const size_t n_levels;
public: // class Synchronize owns unique resources:
// - must be explicitly constructed
// - disallow default ctor,
// - disallow copy/move ctor and
// - disallow copy/move assignment
Synchronize( Synchronize const& ) = delete;
Synchronize & operator=( Synchronize const& ) = delete;
explicit Synchronize( size_t nthreads, size_t nlevels )
: n_threads{nthreads}, n_levels{nlevels}
{}
size_t nlevels() const { return n_levels; }
std::mutex & std_cout_mutex() { return mtx_std_cout; }
void level_done_wait_all( size_t level )
{
std::unique_lock<std::mutex> lk(cvMtx);
threadsFinished++;
cv.wait(lk, [&]{return threadsFinished >= n_threads * (level+1);});
cv.notify_all();
}
};
void tfunc( Synchronize & sync )
{
for(size_t i = 0; i < sync.nlevels(); i++)
{
//do something
for (size_t j = 0; j < NITERATIONS; j++) {
std::unique_lock<std::mutex> lck(sync.std_cout_mutex());
if (j == 0) std::cout << '\n';
std::cout << ' ' << i << ',' << j;
}
sync.level_done_wait_all(i);
}
}
int main() {
Synchronize sync{ NTHREADS, NLEVELS };
std::vector<std::thread*> threads(NTHREADS,nullptr);
for(auto&t:threads) t = new std::thread(tfunc,std::ref(sync));
for(auto t:threads) {
t->join();
delete t;
}
std::cout << std::endl;
return 0;
}

Efficiently waiting for all tasks in a threadpool to finish

I currently have a program with x workers in my threadpool. During the main loop y tasks are assigned to the workers to complete, but after the tasks are sent out I must wait for all tasks for finish before preceding with the program. I believe my current solution is inefficient, there must be a better way to wait for all tasks to finish but I am not sure how to go about this
// called in main after all tasks are enqueued to
// std::deque<std::function<void()>> tasks
void ThreadPool::waitFinished()
{
while(!tasks.empty()) //check if there are any tasks in queue waiting to be picked up
{
//do literally nothing
}
}
More information:
threadpool structure
//worker thread objects
class Worker {
public:
Worker(ThreadPool& s): pool(s) {}
void operator()();
private:
ThreadPool &pool;
};
//thread pool
class ThreadPool {
public:
ThreadPool(size_t);
template<class F>
void enqueue(F f);
void waitFinished();
~ThreadPool();
private:
friend class Worker;
//keeps track of threads so we can join
std::vector< std::thread > workers;
//task queue
std::deque< std::function<void()> > tasks;
//sync
std::mutex queue_mutex;
std::condition_variable condition;
bool stop;
};
or here's a gist of my threadpool.hpp
example of what I want to use waitFinished() for:
while(running)
//....
for all particles alive
push particle position function to threadpool
end for
threadPool.waitFinished();
push new particle position data into openGL buffer
end while
so this way I can send hundrends of thousands of particle position tasks to be done in parallel, wait for them to finish and put the new data inside the openGL position buffers
This is one way to do what you're trying. Using two condition variables on the same mutex is not for the light-hearted unless you know what is going on internally. I didn't need the atomic processed member other than my desire to demonstrate how many items were finished between each run.
The sample workload function in this generates one million random int values, then sorts them (gotta heat my office one way or another). waitFinished will not return until the queue is empty and no threads are busy.
#include <iostream>
#include <deque>
#include <functional>
#include <thread>
#include <condition_variable>
#include <mutex>
#include <random>
//thread pool
class ThreadPool
{
public:
ThreadPool(unsigned int n = std::thread::hardware_concurrency());
template<class F> void enqueue(F&& f);
void waitFinished();
~ThreadPool();
unsigned int getProcessed() const { return processed; }
private:
std::vector< std::thread > workers;
std::deque< std::function<void()> > tasks;
std::mutex queue_mutex;
std::condition_variable cv_task;
std::condition_variable cv_finished;
std::atomic_uint processed;
unsigned int busy;
bool stop;
void thread_proc();
};
ThreadPool::ThreadPool(unsigned int n)
: busy()
, processed()
, stop()
{
for (unsigned int i=0; i<n; ++i)
workers.emplace_back(std::bind(&ThreadPool::thread_proc, this));
}
ThreadPool::~ThreadPool()
{
// set stop-condition
std::unique_lock<std::mutex> latch(queue_mutex);
stop = true;
cv_task.notify_all();
latch.unlock();
// all threads terminate, then we're done.
for (auto& t : workers)
t.join();
}
void ThreadPool::thread_proc()
{
while (true)
{
std::unique_lock<std::mutex> latch(queue_mutex);
cv_task.wait(latch, [this](){ return stop || !tasks.empty(); });
if (!tasks.empty())
{
// got work. set busy.
++busy;
// pull from queue
auto fn = tasks.front();
tasks.pop_front();
// release lock. run async
latch.unlock();
// run function outside context
fn();
++processed;
latch.lock();
--busy;
cv_finished.notify_one();
}
else if (stop)
{
break;
}
}
}
// generic function push
template<class F>
void ThreadPool::enqueue(F&& f)
{
std::unique_lock<std::mutex> lock(queue_mutex);
tasks.emplace_back(std::forward<F>(f));
cv_task.notify_one();
}
// waits until the queue is empty.
void ThreadPool::waitFinished()
{
std::unique_lock<std::mutex> lock(queue_mutex);
cv_finished.wait(lock, [this](){ return tasks.empty() && (busy == 0); });
}
// a cpu-busy task.
void work_proc()
{
std::random_device rd;
std::mt19937 rng(rd());
// build a vector of random numbers
std::vector<int> data;
data.reserve(100000);
std::generate_n(std::back_inserter(data), data.capacity(), [&](){ return rng(); });
std::sort(data.begin(), data.end(), std::greater<int>());
}
int main()
{
ThreadPool tp;
// run five batches of 100 items
for (int x=0; x<5; ++x)
{
// queue 100 work tasks
for (int i=0; i<100; ++i)
tp.enqueue(work_proc);
tp.waitFinished();
std::cout << tp.getProcessed() << '\n';
}
// destructor will close down thread pool
return EXIT_SUCCESS;
}
Output
100
200
300
400
500
Best of luck.