how to correctly synchronize worker threads with a main thread if a worker thread can generate another tasks? I've used std::queue to maintain tasks guarded by mutex and atomic variable to track busy threads. Unfortunately I'm facing deadlocks at the end of the execution.
I've extracted code from my project and created a following example (you can easily compile it with g++ or MSVC):
#include <iostream>
#include <thread>
#include <mutex>
#include <condition_variable>
#include <stdexcept>
#include <functional>
#include <stack>
#include <atomic>
#include <queue>
template <class T, class Compare>
class USort {
using Task = std::pair<T*, T*>;
private:
size_t m_ThreadsNum;
std::atomic<bool> m_Finished;
std::atomic<size_t> m_Busy;
std::thread* m_Threads;
std::queue<Task> m_Tasks;
size_t m_Size;
T* m_Data;
Compare m_Comparator;
std::condition_variable m_WaitFinished;
std::condition_variable m_WaitSorter;
std::mutex m_TaskQueueMutex;
private:
const size_t THREAD_THRESHOLD = 1024;
const size_t THREAD_POOL_THRESHOLD = 8192;
bool HasTask() {
std::unique_lock<std::mutex> lock(m_TaskQueueMutex);
return m_Tasks.size() > 0;
}
bool PopTask(T** L, T** R) {
std::unique_lock<std::mutex> lock(m_TaskQueueMutex);
if (m_Tasks.size() == 0) {
*L = *R = nullptr;
return false;
}
*L = m_Tasks.front().first;
*R = m_Tasks.front().second;
m_Tasks.pop();
return true;
}
void PushTask(T* L, T* R) {
std::unique_lock<std::mutex> lock(m_TaskQueueMutex);
m_Tasks.emplace(std::pair<T*, T*>(L, R));
m_WaitSorter.notify_one();
}
void SortThread(size_t Id) {
std::mutex sorter_mutex;
for (;;) {
std::unique_lock<std::mutex> lock(sorter_mutex);
///
/// ----------------------------------> some threads wait here
///
m_WaitSorter.wait(lock, [this]() { return m_Finished || HasTask(); });
if (m_Finished) break;
m_Busy++;
T *left, *right;
while (PopTask(&left, &right)) {
Sort(left, right);
}
if (--m_Busy == 0) {
m_WaitFinished.notify_one();
}
}
}
// just simulate work
void Sort(T* Left, T* Right) {
if (Right - Left > 10) {
PushTask(Left, Right-10);
}
}
void WaitForSortingIsFinished() {
std::mutex finished;
std::unique_lock<std::mutex> lock(finished);
m_WaitFinished.wait(lock, [this]() { return m_Busy == 0 && !HasTask(); });
}
void FinishThreads() {
m_Finished = true;
m_WaitSorter.notify_all();
}
void ReleaseThreads() {
if (m_Threads) {
for (size_t i = 0; i < m_ThreadsNum; i++) {
///
/// ----------------------------------> main thread stuck here
///
m_Threads[i].join();
}
delete[] m_Threads;
m_Threads = nullptr;
}
}
public:
USort(size_t NumberOfThreads = 0) : m_Comparator(Compare()) {
if (NumberOfThreads == 0) {
static const unsigned int max_concurrency = std::thread::hardware_concurrency();
NumberOfThreads = max_concurrency;
if (NumberOfThreads == 0) NumberOfThreads = 4;
}
m_Finished = false;
m_ThreadsNum = NumberOfThreads;
m_Threads = nullptr;
}
~USort() {
ReleaseThreads();
}
void Sort(T* Data, size_t Size) {
// build thread pool
m_Threads = new std::thread[m_ThreadsNum];
for (size_t i = 0; i < m_ThreadsNum; i++) {
m_Threads[i] = std::thread(&USort::SortThread, this, i);
}
// process data
PushTask(Data, Data + Size - 1);
WaitForSortingIsFinished();
FinishThreads();
}
};
template <class T, class Compare>
void usort(T* Data, size_t Size, size_t NumberOfThreads = 0) {
USort<T, Compare> mt_sorter(NumberOfThreads);
mt_sorter.Sort(Data, Size);
}
const size_t ARR_SIZE = 0x00010000;
struct comp {
bool operator()(const int& L, const int& R) const {
return L < R;
}
};
int main()
{
int* arr = new int[ARR_SIZE];
for (int i = 0; i < ARR_SIZE; i++) {
arr[i] = rand() % 3200000;
}
usort<int, comp>(arr, ARR_SIZE, 16);
delete[] arr;
return 0;
}
The thing is, that in my example threads aren't always finished. From time to time some thread pending in m_WaitSorter.wait() and therefore main thread pending in m_Threads[i].join();. Where is the flaw in the logic. Why the calling to FinishThreads() doesn't finish all threads?
EDIT:
Basically I'd like to implement multithread sorting algorithm.
The main thread creates thread pool, push first task(sort whole array) to a task queue and waits for sorting to be finished
The pool thread takes task, divide it to smaller tasks(1-3). One of this task is immediatelly processed by the current pool thread, others are push to the queue
The pool thread musn't finish until the whole data set is sorted(there are no task in the queue and all pool threads are pending)
When the sorting is finished the main thread should be woken
Main thread should finish pending threads
So for this, from my perspective, I need two conditional_variabes with predicate "all threads are pending && has no task in queue" in main thread and "has task in queue || finish thread" in pool thread.
OK, I've read the documentation through carefully and found a bug in my code. Calls to notify_one(), notify_all() and wait() have to be controlled via the same mutext. With that in mind I've update and little bit simplified my code:
bool WaitAndPopTask(T** L, T** R) {
std::unique_lock<std::mutex> lock(m_TaskQueueMutex);
m_WaitSorter.wait(lock, [this]() { return m_Finished || !m_Tasks.empty(); });
if (m_Finished) return false;
m_Busy++;
*L = m_Tasks.front().first;
*R = m_Tasks.front().second;
m_Tasks.pop();
return true;
}
void SortThread(size_t Id) {
for (;;) {
T *left, *right;
if (!WaitAndPopTask(&left, &right)) break;
Sort(left, right);
std::lock_guard<std::mutex> lk(m_TaskQueueMutex);
if (--m_Busy == 0 && m_Tasks.empty()) {
FinishThreads();
}
}
}
void Sort(T* Data, size_t Size) {
// build thread pool
m_Threads = new std::thread[m_ThreadsNum];
for (size_t i = 0; i < m_ThreadsNum; i++) {
m_Threads[i] = std::thread(&USort::SortThread, this, i);
}
// process data
PushTask(Data, Data + Size - 1);
ReleaseThreads();
}
Related
I'm making a single producer, multiple consumers program in C++. I begin by calling consumer threads and then I add elements to an array.
Everything works fine, but in the end the consumer threads are not joining, because they're stuck waiting on condition variable and the program freezes.
I think the problem is that threads are constantly called in the loop because currentSize is not protected and they just can't exit out of the condition variable, but I don't know how to fix it.
struct Item {
public:
string name;
int time;
double height;
};
struct Monitor {
private:
Item items[12];
int currentSize;
bool finished;
mutex lock;
condition_variable cv;
public:
Monitor() {
finished = false;
currentSize = 0;
}
void put(Item item) {
unique_lock<mutex> guard(lock);
cv.wait(guard, [&] { return (currentSize < 12); });
items[currentSize] = item;
currentSize++;
cv.notify_all();
}
Item get() {
unique_lock<mutex> guard(lock);
cv.wait(guard, [&] { return (currentSize > 0); });
Item item = items[currentSize - 1];
currentSize--;
return item;
}
bool get_finished() {
return finished;
}
void set_finished() {
finished = true;
}
int get_size() {
return currentSize;
}
};
int main() {
vector<Item> items = read_file(file);
Monitor monitor;
vector<thread> threads;
vector<Item> results;
for (int i = 0; i < 4; i++) {
threads.emplace_back([&] {
while (!monitor.get_finished()) {
if (monitor.get_size() > 0) {
Item item = monitor.get();
results.push_back(item);
}
}
});
}
for (int i = 0; i < items.size(); i++) {
monitor.put(items[i]);
}
monitor.set_finished();
for_each(threads.begin(), threads.end(), mem_fn(&thread::join));
return 0;
}
Why the consumer threads block?
I have tested your code, and it turns out to be the producer thread blocking on the put() method. Why?
Imagine the following scenario: there are 13 items in the vector items.
The main thread (producer) happily loads the first 12 items, and waits on cv for the currentSize to become lower than 12.
The consumer threads are notified, and happily consume the first 12 items, and then wait on cv for currentSize to become greater than 0.
But wait! Now everyone is waiting on something, with no one notifying. Thus, all threads would block. You need to notify the producer when currentSize becomes lower than 12.
I noticed a few issues. made the member variables atomic, notify_all in get api. However there was al logic error as well. Imagine that you have 4 threads currently running and 5 items were in queue. At this point lets say each of the thread is able to get one out of the queue and now there are 4 threads and only one item in the queue. One of the thread takes the last one out and now there is 0 items in there however other three threads still waiting on the condition variable. So a solution is if the last item is out everythread should be notified and if there is no other elemnet get back from the API.
#include <iostream>
#include <vector>
#include <condition_variable>
#include <thread>
#include <algorithm>
#include <atomic>
using namespace std;
using Item = int;
struct Monitor {
private:
Item items[12];
std::atomic<int> currentSize;
std::atomic<bool> finished;
mutex lock;
condition_variable cv;
public:
Monitor() {
finished = false;
currentSize = 0;
}
void put(Item item) {
unique_lock<mutex> guard(lock);
cv.wait(guard, [&] { return (currentSize < 12); });
items[currentSize] = item;
currentSize++;
cv.notify_all();
std::cerr << "+ " << currentSize << std::endl ;
}
Item get() {
unique_lock<mutex> guard(lock);
cv.wait(guard, [&] { return (currentSize >= 0 ); });
Item item;
if (currentSize > 0 ){
currentSize--;
item = items[currentSize];
cv.notify_all();
std::cerr << "- " << currentSize << std::endl ;
}
return item;
}
bool get_finished() {
return finished;
}
void set_finished() {
finished = true;
}
int get_size() {
return currentSize;
}
};
int main() {
vector<Item> items(200);
std::fill ( items.begin() , items.end(), 100);
Monitor monitor;
vector<thread> threads;
vector<Item> results;
for (int i = 0; i < 10; i++) {
threads.emplace_back([&] {
while ( !monitor.get_finished() ) {
if (monitor.get_size() > 0) {
Item item = monitor.get();
results.push_back(item);
}
}
});
}
for (int i = 0; i < items.size(); i++) {
monitor.put(items[i]);
}
monitor.set_finished();
for_each(threads.begin(), threads.end(), mem_fn(&thread::join));
return 0;
}
I want to implement semaphore class. And a user on stackoverflow has noted that my implementation is not working correct.
At the first I done it like this:
class sem_t {
int count;
public:
sem_t(int _count = 0) : count(_count) {};
void up() {
this->count++;
}
void down() {
while (this->count == 0)
std::this_thread::yield();
this->count--;
}
};
Then a user on stackoverflow noted that this implementation is faulty cause I read and write variable count out of any synchronization primitive and at some point the value can become incorrect and in case of compiler optimization compiler can assume that the variable count can not be modified by another thread. So, I tried to add mutex to this construction and I've done it like this:
class sem_t {
int count;
std::mutex mutualExclusion;
public:
sem_t(int _count = 0) : count(_count) {};
void up() {
this->mutualExclusion.lock();
this->count++;
this->mutualExclusion.unlock();
}
void down() {
this->mutualExclusion.lock();
while (this->count == 0)
std::this_thread::yield();
this->count--;
this->mutualExclusion.unlock();
}
};
But in case of using this approach when I try to detach thread i got an error saying that mutex has been destroyed while busy, cause one thread can hold mutex and then yield after which the thread is detached and error occurs (Is this solution ok?).
Then I tried to modify this code and I stoped on following construction:
class sem_t {
int count;
std::mutex mutualExclusion;
public:
sem_t(int _count = 0) : count(_count) {};
void up() {
this->mutualExclusion.lock();
this->count++;
this->mutualExclusion.unlock();
}
void down() {
while (this->count == 0)
std::this_thread::yield();
this->mutualExclusion.lock();
this->count--;
this->mutualExclusion.unlock();
}
};
But I think that this solution is faulty too, cause it can lead the same problem as the first solution.
So, whats the correct implementation?
I wanna note that I tried implementation with condition variable, but i am trying to implement semaphore without condition variable and if you want to suggest some solution with condition variable please describe how wait method of condition variable is working.
[Edit]
My full code, using self implemented semaphore:
#include "pch.h"
#include <iostream>
#include <vector>
#include <mutex>
#include <thread>
#include <chrono>
class sem_t {
int count;
std::mutex mutualExc;
public:
sem_t(int _count = 0) : count(_count) {};
void up() {
mutualExc.lock();
this->count++;
mutualExc.unlock();
}
void down() {
mutualExc.lock();
while (this->count == 0) {
mutualExc.unlock();
std::this_thread::yield();
mutualExc.lock();
}
this->count--;
mutualExc.unlock();
}
};
#define N 5
#define THINKING 0
#define HUNGRY 1
#define EATING 2
std::mutex mx;
std::mutex coutMX;
char philosopherState[N] = { THINKING };
sem_t philosopherSemaphores[N] = { 0 };
void testSetState(short i) {
if (philosopherState[i] == HUNGRY && philosopherState[(i + 1) % N] != EATING && philosopherState[(i + N - 1) % N] != EATING) {
philosopherState[i] = EATING;
philosopherSemaphores[i].up();
}
}
void take_forks(short i) {
::mx.lock();
philosopherState[i] = HUNGRY;
testSetState(i);
::mx.unlock();
philosopherSemaphores[i].down();
}
void put_forks(short i) {
::mx.lock();
philosopherState[i] = THINKING;
testSetState((i + 1) % N);
testSetState((i + N - 1) % N);
::mx.unlock();
}
void think(short p) {
for (short i = 0; i < 5; i++) {
coutMX.lock();
std::cout << "Philosopher N" << p << " is thinking!" << std::endl;
coutMX.unlock();
std::this_thread::sleep_for(std::chrono::milliseconds(500));
}
}
void eat(short p) {
for (short i = 0; i < 5; i++) {
coutMX.lock();
std::cout << "Philosopher N" << p << " is eating!" << std::endl;
coutMX.unlock();
std::this_thread::sleep_for(std::chrono::milliseconds(500));
}
}
void philosopher(short i) {
while (1) {
think(i);
take_forks(i);
eat(i);
put_forks(i);
}
}
int main()
{
std::vector<std::thread*> threadsVector;
for (int i = 0; i < N; i++) {
threadsVector.push_back(new std::thread([i]() { philosopher(i); }));
}
std::this_thread::sleep_for(std::chrono::milliseconds(15000));
for (int i = 0; i < N; i++) {
threadsVector[i]->detach();
}
return 0;
}
Error that is occurring (only occurs when running program in release or debug mode in visual studio)
The last attempt is indeed not correct because it may happend that several threads call down at the same time and all successfully pass
while (this->count == 0)
std::this_thread::yield();
lines and then they will all decrement the counter to possiby negative value:
this->mutualExclusion.lock();
this->count--;
this->mutualExclusion.unlock();
So, the counter value check and update must be performed atomically.
If you want to keep busy loop the easiest way would be just to call unlock before yield and lock after, so compare and decrement will be performed under the same lock:
void down() {
this->mutualExclusion.lock();
while (this->count == 0) {
this->mutualExclusion.unlock();
std::this_thread::yield();
this->mutualExclusion.lock();
}
this->count--;
this->mutualExclusion.unlock();
}
Also, you can use std::unique_lock guard which locks provided mutex in constructor and unlocks in destructor, so the mutex will not be accidentaly left in locked state:
void down() {
std::unique_lock<std::mutex> lock(this->mutualExclusion);
while (this->count == 0) {
lock.unlock();
std::this_thread::yield();
lock.lock();
}
this->count--;
}
To deal with "muxed destroyed while busy" error you need either to have a flag to stop background threads and wait for them to complete with join instead of detach:
#include <atomic>
...
std::atomic<bool> stopped{ false };
void philosopher(short i) {
while (!stopped) {
...
}
}
...
int main()
{
...
stopped = true;
for (int i = 0; i < N; i++) {
threadsVector[i]->join();
}
return 0;
}
or if you really don't want to care of releasing static resources you can call std::quick_exit instead of detach and return:
int main()
{
...
std::this_thread::sleep_for(std::chrono::milliseconds(15000));
std::quick_exit(0);
}
I wrote the following code to implement a concurrent queue.
template <typename T>
class ConcurrentQueue
{
// Internal storage for a queue element
struct Element
{
T m_elem;
std::mutex m_mtx;
std::condition_variable m_cv;
bool m_hasElement = false;
};
public:
// The number of enqueued elements cannot go beyond p_capacity.
ConcurrentQueue(size_t p_capacity) :
m_elements(p_capacity),
m_approxCount(0),
m_actualCount(0),
m_front(0),
m_back(0)
{}
// Enqueues an element to the queue. Returns true on success and false
// if the enqueue failed due to the capacity being reached.
bool Enqueue(T p_element)
{
if (++m_approxCount > m_elements.size())
{
--m_approxCount;
return false;
}
++m_actualCount;
size_t slot = m_back.fetch_add(1) % m_elements.size();
auto& element = m_elements[slot];
std::lock_guard<std::mutex> lk(element.m_mtx);
element.m_elem = std::move(p_element);
element.m_hasElement = true;
element.m_cv.notify_one();
return true;
}
// Dequeues an element from the queue. Returns true on success and false
// if the dequeue failed due to the queue being empty.
bool Dequeue(T& p_element)
{
size_t count = m_actualCount.load();
if (count == 0)
{
return false;
}
while (!m_actualCount.compare_exchange_strong(count, count - 1))
{
if (count == 0)
{
return false;
}
}
size_t slot = m_front.fetch_add(1) % m_elements.size();
auto& element = m_elements[slot];
std::unique_lock<std::mutex> lk(element.m_mtx);
element.m_cv.wait(lk, [&element] { return element.m_hasElement; });
p_element = std::move(element.m_elem);
element.m_hasElement = false;
--m_approxCount;
return true;
}
private:
// Fixed size vector that stores the elements
std::vector<Element> m_elements;
// Approx count of number of elements in the queue.
std::atomic<size_t> m_approxCount;
// Actual count of the number of elements in the queue
std::atomic<size_t> m_actualCount;
// Index to the front of the queue
std::atomic<size_t> m_front;
// Index to the back of the queue
std::atomic<size_t> m_back;
};
and the following test to verify its functionality
int main()
{
int numElements = 1000;
int numThreads = 10;
ConcurrentQueue<int> q(numElements * numThreads / 2);
std::vector<std::thread> enqueueThreads;
for (int i = 0; i < numThreads; ++i)
{
enqueueThreads.emplace_back([&q, i, numElements]
{
for (int j = 0; j < numElements; ++j)
{
while (!q.Enqueue(i * numElements + j));
}
});
}
std::atomic<int> toDequeue = numElements * numThreads;
std::vector<std::thread> dequeueThreads;
for (int i = 0; i < numThreads; ++i)
{
dequeueThreads.emplace_back([&q, &toDequeue]
{
while (toDequeue > 0)
{
int element;
if (q.Dequeue(element))
{
--toDequeue;
}
}
});
}
for (auto& t : enqueueThreads)
{
t.join();
}
for (auto& t : dequeueThreads)
{
t.join();
}
}
In the debug build (VS2017), the test runs fine, but in the retail build, the main function doesn't return (the Dequeue threads don't complete) indicating a bug in the ConcurrentQueue implementation. What is the bug in the Enqueue or Dequeue method?
The Enqueue method needs to wait for slot to be free if the dequeuer hasn't freed it up.
The following code fixed the problem.
template <typename T>
bool ConcurrentQueue<T>::Enqueue(T p_element)
{
if (++m_approxCount > m_elements.size())
{
--m_approxCount;
return false;
}
size_t slot = m_back.fetch_add(1) % m_elements.size();
auto& element = m_elements[slot];
{
std::unique_lock<std::mutex> lk(element.m_mtx);
element.m_cv.wait(lk, [&element] { return !element.m_hasElement; });
element.m_elem = std::move(p_element);
element.m_hasElement = true;
element.m_cv.notify_all();
}
++m_actualCount;
return true;
}
template <typename T>
bool ConcurrentQueue<T>::Dequeue(T& p_element)
{
size_t count = UINT64_MAX;
while (!m_actualCount.compare_exchange_strong(count, count - 1))
{
if (count == 0)
{
return false;
}
}
size_t slot = m_front.fetch_add(1) % m_elements.size();
auto& element = m_elements[slot];
{
std::unique_lock<std::mutex> lk(element.m_mtx);
element.m_cv.wait(lk, [&element] { return element.m_hasElement; });
p_element = std::move(element.m_elem);
element.m_hasElement = false;
element.m_cv.notify_all();
}
--m_approxCount;
return true;
}
I have used std::sthread for background image loading. I create background job as this:
if (bgThreads.size() > MAX_THREADS_COUNT){
fclose(file);
return;
}
if (bgThreads.find(id) != bgThreads.end()){
fclose(file);
return;
}
std::shared_ptr<BackgroundPNGLoader> bg = std::make_shared<BackgroundPNGLoader>(file, id);
bgThreads[id] = bg;
bg->thread = std::thread(&BackgroundPNGLoader::Run, bg);
In BackgroundPNGLoader, I have:
struct BackgroundPNGLoader{
std::atomic<bool> finished;
FILE * f;
int id;
BackgroundPNGLoader() : finished(false) {}
void Run(){
///.... load data
finished.store(true);
}
}
In my main app, I have Update - Render loop running in main thread. In Update, I have:
std::list<int> finished;
for (auto & it : bgThreads)
{
if (it.second->finished)
{
if (it.second->thread.joinable())
{
it.second->thread.join();
}
finished.push_back(it.first);
//fill image data to texture or whatever need to be done on main thread
fclose(it.second->f);
}
}
for (auto & it : finished)
{
bgThreads.erase(it);
}
Is this considered safe? I am a little worried about spawning new threads every time I need new file open, there is no max limit.
First, avoid fopen/fclose and use C++' file I/O instead to avoid potential resource leaks when exceptions are thrown. Further, using a raw std::thread isn't necessary in most cases. For asynchronous tasks, std::async combined with futures is is the thing to go with.
std::async returns a potential result within a std::future, which can be retrieved later:
#include <future>
#include <thread>
#include <chrono>
#include <array>
#include <iostream>
#include <random>
size_t doHeavyWork(size_t arg)
{
std::mt19937 rng;
rng.seed(std::random_device()());
std::uniform_int_distribution<unsigned int> rnd(333, 777);
//simulate heavy work...
std::this_thread::sleep_for(std::chrono::milliseconds(rnd(rng)));
return arg * 33;
}
//wrapper function for checking whether a task has finished and
//the result can be retrieved by a std::future
template<typename R>
bool isReady(const std::future<R> &f)
{
return f.wait_for(std::chrono::seconds(0)) == std::future_status::ready;
}
int main()
{
constexpr size_t numTasks = 5;
std::array<std::future<size_t>, numTasks> futures;
size_t index = 1;
for(auto &f : futures)
{
f = std::async(std::launch::async, doHeavyWork, index);
index++;
}
std::array<bool, numTasks> finishedTasks;
for(auto &i : finishedTasks)
i = false;
size_t numFinishedTasks = 0;
do
{
for(size_t i = 0; i < numTasks; ++i)
{
if(!finishedTasks[i] && isReady(futures[i]))
{
finishedTasks[i] = true;
numFinishedTasks++;
std::cout << "task " << i << " ended with result " << futures[i].get() << '\n';
}
}
}
while(numFinishedTasks < numTasks);
std::cin.get();
}
std::async may launch separate threads using a thread pool.
First, you better do not spawn more threads than cores.
Here is a simple example using detach and letting threads informing about their statuses:
#include<thread>
#include<atomic>
struct task_data
{
};
void doHeavyWork(task_data to_do, std::atomic<bool>& done)
{
//... do work
//---
//--
done = true;
}
int main()
{
unsigned int available_cores = std::thread::hardware_concurrency();//for real parallelism you should not spawn more threads than cores
std::vector<std::atomic<bool>> thread_statuses(available_cores);
for (auto& b : thread_statuses)//initialize with all cores/threads are free to use
b = true;
const unsigned int nb_tasks = 100;//how many?
std::vector<task_data> tasks_to_realize(nb_tasks);
for (auto t : tasks_to_realize)//loop on tasks to spawn a thread for each
{
bool found = false;
unsigned int status_id = 0;
while (!found) //loop untill you find a core/thread to use
{
for (unsigned int i = 0; i < thread_statuses.size(); i++)
{
if (thread_statuses[i])
{
found = true;
status_id = i;
thread_statuses[i] = false;
break;
}
}
}
//spawn thread for this task
std::thread task_thread(doHeavyWork, std::move(t), std::ref(thread_statuses[status_id]));
task_thread.detach();//detach it --> you will get information it is done by it set done to true!
}
//wait till all are done
bool any_thread_running = true;
while (any_thread_running)//keep untill all are done
{
for (unsigned int i = 0; i < thread_statuses.size(); i++)
{
if (false == thread_statuses[i])
{
any_thread_running = true;
break;
}
any_thread_running = false;
}
}
return 0;
}
Until now, when using threads, I've always started them right away in my program and then made them wait on notification from a main control thread.
std::vector<std::thread> threads;
for(int i = 0; i != thread_count; ++i) {
threads.push_back(std::thread(&MyClass::myfunction, this));
}
/* some time later in the code */
for(auto& t : threads) {
t.join();
}
Now I want to start threads on demand from a function run by my control thread, but I'm unsure how to handle the thread objects and their joining.
The following would push a new thread object onto the vector for each call, which strikes me as not ideal:
std::vector<std::thread> threads;
while(accumulating_data) {
if(buffer_full) {
threads.push_back(std::thread(&MyClass::myfunction, this));
}
}
Having the vector hold no more than the maximum number on consecutively running threads seems preferable. I also don't know how to join the threads here without blocking the control thread.
If I do something like this instead:
// dummy code, in my real code I have a queue of idle IDs
std::vector<std::thread> threads(thread_count);
while(accumulating_data) {
if(buffer_full) {
threads[thread_id] = std::thread(&MyClass::myfunction, this);
if(++thread_id == thread_count) { thread_id = 0; }
}
}
...I quickly crash, probably because I haven't joined or am reassigning to a vector element that already contains a std::thread object.
Any hints on how I can accomplish my goal of starting threads on demand, instead of having them wait?
Update:
I managed to get the code to run without crashing by introducing a std::thread.joinable() check. I'm still open to opinions on how to handle this more elegantly, so I won't make it the answer to my own question:
std::vector<std::thread> threads(thread_count);
while(accumulating_data) {
if(buffer_full) {
if(threads[thread_id].joinable()) {
threads[thread_id].join(); }
}
threads[thread_id] = std::thread(&MyClass::myfunction, this);
if(++thread_id == thread_count) { thread_id = 0; }
}
}
I managed to get the code to run without crashing by introducing a std::thread.joinable() check. I'm still open to opinions on how to handle this more elegantly :)
std::vector<std::thread> threads(thread_count);
while(accumulating_data) {
if(buffer_full) {
/* the following check returns false prior to an assignment */
if(threads[thread_id].joinable()) {
threads[thread_id].join(); }
}
threads[thread_id] = std::thread(&MyClass::myfunction, this);
if(++thread_id == thread_count) { thread_id = 0; }
}
}
Not sure if this is what you want..
#include <thread>
#include <tuple>
#include <vector>
#include <stdexcept>
class ThreadGroup
{
private:
std::uint32_t max_threads;
std::vector<std::tuple<std::thread::native_handle_type, std::thread, bool*>> data;
public:
ThreadGroup() : max_threads(std::thread::hardware_concurrency()), data(max_threads) {}
ThreadGroup(std::uint32_t max_threads) : max_threads(max_threads), data(max_threads) {}
~ThreadGroup();
template<class Function, class... Args>
std::thread::native_handle_type insert(bool &terminate, Function&& f, Args&&... args);
bool remove(std::thread::native_handle_type id);
};
ThreadGroup::~ThreadGroup()
{
for (auto &it : data)
{
if (std::get<0>(it))
{
if (!*std::get<2>(it))
{
std::get<1>(it).detach();
continue;
}
std::get<1>(it).join();
}
}
}
template<class Function, class... Args>
std::thread::native_handle_type ThreadGroup::insert(bool &terminate, Function&& f, Args&&... args)
{
int i = 0;
for (auto &it : data)
{
if (std::get<0>(it) == 0)
{
auto &&t = std::thread(std::forward<Function>(f), std::forward(args)...);
auto &&tup = std::make_tuple(t.native_handle(), std::forward<std::thread>(t), &terminate);
data[i] = std::move(tup);
return std::get<0>(data[i]);
}
++i;
}
throw std::length_error("Maximum thread limit reached.");
}
bool ThreadGroup::remove(std::thread::native_handle_type id)
{
for (auto it = data.begin(); it != data.end(); ++it)
{
if (std::get<0>(*it) == id)
{
if (std::get<1>(*it).joinable() && *std::get<2>(*it))
{
std::get<1>(*it).join();
std::get<0>(*it) = 0;
std::get<2>(*it) = nullptr;
//data.erase(it);
return true;
}
std::get<1>(*it).detach();
std::get<0>(*it) = 0;
std::get<2>(*it) = nullptr;
//data.erase(it);
return false;
}
}
return false;
}
Then I used it like:
#include <chrono>
#include <iostream>
#include <thread>
bool terminate1 = false, terminate2 = false, terminate3 = false, terminate4 = false, terminate5 = false;
void func1()
{
while(!terminate1)
{
std::cout<<"T1 ";
std::this_thread::sleep_for(std::chrono::seconds(1));
}
}
void func2()
{
while(!terminate2)
{
std::cout<<"T2 ";
std::this_thread::sleep_for(std::chrono::seconds(1));
}
}
void func3()
{
while(!terminate3)
{
std::cout<<"T3 ";
std::this_thread::sleep_for(std::chrono::seconds(1));
}
}
void func4()
{
while(!terminate4)
{
std::cout<<"T4 ";
std::this_thread::sleep_for(std::chrono::seconds(1));
}
}
void func5()
{
while(!terminate5)
{
std::cout<<"T5 ";
std::this_thread::sleep_for(std::chrono::seconds(1));
}
}
int main()
{
ThreadGroup group;
auto id1 = group.insert(terminate1, func1);
auto id2 = group.insert(terminate2, func2);
auto id3 = group.insert(terminate3, func3);
auto id4 = group.insert(terminate4, func4);
try
{
auto id5 = group.insert(terminate5, func5); //limit in my case is 4. inserting 5 will throw max limit exception..
}
catch(std::exception &e)
{
std::cout<<"\n\n"<<e.what()<<"\n\n";
}
std::this_thread::sleep_for(std::chrono::seconds(3));
terminate1 = true; //allow the thread to join..
group.remove(id1); //joins if the thread is finished..
std::this_thread::sleep_for(std::chrono::seconds(3));
group.remove(id2); //remove another thread (detaches if the thread isn't finished)..
auto id5 = group.insert(terminate5, func5); //insert a new thread in any of the old slots..
std::this_thread::sleep_for(std::chrono::seconds(3));
}