Concurrent program does not terminate - c++

I have the following concurrent program:
template <typename T>
class CircularBuffers{
public:
CircularBuffers(const T &size):_buffer(size),_start(0),_end(0){
assert((size > 1 && "size must be greater than 1"));
}
CircularBuffers(std::initializer_list<T> l):_buffer(l),_start(0),_end(l.size()-1){}
void addValue(const T &value)
{
std::unique_lock<std::mutex> uLock(_mutex);
_end = (_end+1) % _buffer.size();
if (_end==_start){_start = (_start+1) % _buffer.size();}
_buffer[_start] = value;
}
T poll()
{
std::unique_lock<std::mutex> uLock(_mutex);
_read_cond.wait(uLock,[&](){return !isEmpty();});
T element = _buffer[_start];
_start = (_start + 1) % _buffer.size();
return element;
}
bool isFull(){
return _start == (_end+1) % _buffer.size();
}
bool isEmpty()
{
return _start == _end;
}
private:
std::vector<T> _buffer;
int _start;
int _end;
std::mutex _mutex;
std::condition_variable _read_cond;
};
int main()
{
std::shared_ptr<CircularBuffers<int>> circ = std::make_shared<CircularBuffers<int>>(7);
std::vector<std::future<void>> futures_add;
std::vector<std::future<int>> futures_poll;
for (int i=0;i<10;i++)
{
futures_add.emplace_back(std::async(&CircularBuffers<int>::addValue,circ,i));
}
for (int i=0;i<10;i++)
{
futures_poll.emplace_back(std::async(&CircularBuffers<int>::poll,circ));
}
std::for_each(futures_add.begin(),futures_add.end(),[](std::future<void> &ftr){ftr.wait();});
std::for_each(futures_poll.begin(),futures_poll.end(),[](std::future<int> &ftr){ftr.wait();});
}
It never ends and it is because of the concurrency but I cannot manage to see why.
My questions are:
Why does not end? It goes over the first set of waits but for the waits from poll it gets stacked.
How could I adapt this code to make it work forever, in the sense that an infinite while for writing and polling is created BUT every time a thread gives a result, it gets deleted to leave space to new thread in the respective future vector, in order not to end up creating infinite threads.

Related

clearing a grid with multhithreading

I am trying to clear a (game) grid, but whenever I multithread it, the time it takes to clear the grid increases with 3 seconds.
To my own logic this should not be the case since each Y value of the array hold a lot of X values (the X values store a class) which then should iterate through a so called objects property and perform objects.clear() on it, which also iterates through every element.
My code:
const int NUM_OF_AVAIL_THREADS = std::thread::hardware_concurrency() * 2;
ThreadPool* pool = new ThreadPool(NUM_OF_AVAIL_THREADS);
vector<future<void>> threads;
void Terrain::clear_grid()
{
for (int y = 0; y < tiles.size(); y++)
{
threads.push_back(pool->enqueue([&]()
{
array<TerrainTile, terrain_width>& h = tiles.at(y);
for (int x = 0; x < h.size(); x++)
{
h.at(x).objects.clear();
}
}));
}
pool->wait_and_clear_threads(threads);
}
TerrainTile looks like this:
class TerrainTile
{
public:
//TerrainTile *up, *down, *left, *right;
vector<TerrainTile*> exits;
bool visited = false;
size_t position_x;
size_t position_y;
TileType tile_type;
vector<TerrainTile*> neighbors;
vector<MovingAsset*> objects;
vector<Tank*> tanks;
vector<MovingAsset*> beams;
vector<MovingAsset*> get_collidable_assets();
void add_collidable_assets(MovingAsset* asset);
void add_neighbor(TerrainTile* neighbor);
};
How the tiles array looks like:
static constexpr size_t terrain_width = 80;
static constexpr size_t terrain_height = 45;
std::array<std::array<TerrainTile, terrain_width>, terrain_height> tiles;
am I missing out on something crucial here, or does the cost of creating a thread simply outweigh the time it takes to iterate through the arrays?
EDIT: THIS IS THE THREADPOOL
#pragma once
namespace Tmpl8
{
class ThreadPool; //Forward declare
class Worker;
class Worker
{
public:
//Instantiate the worker class by passing and storing the threadpool as a reference
Worker(ThreadPool& s) : pool(s) {}
inline void operator()();
private:
ThreadPool& pool;
};
class ThreadPool
{
public:
ThreadPool(size_t numThreads) : stop(false)
{
for (size_t i = 0; i < numThreads; ++i)
workers.push_back(std::thread(Worker(*this)));
}
~ThreadPool()
{
stop = true; // stop all threads
condition.notify_all();
for (auto& thread : workers)
thread.join();
}
void wait_and_clear_threads(vector<future<void>>& threads)
{
for (future<void>& t : threads)
{
t.wait();
}
threads.clear();
}
template <class T>
auto enqueue(T task) -> std::future<decltype(task())>
{
//Wrap the function in a packaged_task so we can return a future object
auto wrapper = std::make_shared<std::packaged_task<decltype(task())()>>(std::move(task));
//Scope to restrict critical section
{
//lock our queue and add the given task to it
std::unique_lock<std::mutex> lock(queue_mutex);
tasks.push_back([=]
{
(*wrapper)();
});
}
//Wake up a thread to start this task
condition.notify_one();
return wrapper->get_future();
}
private:
friend class Worker; //Gives access to the private variables of this class
std::vector<std::thread> workers;
std::deque<std::function<void()>> tasks;
std::condition_variable condition; //Wakes up a thread when work is available
std::mutex queue_mutex; //Lock for our queue
bool stop = false;
};
inline void Worker::operator()()
{
std::function<void()> task;
while (true)
{
//Scope to restrict critical section
//This is important because we don't want to hold the lock while executing the task,
//because that would make it so only one task can be run simultaneously (aka sequantial)
{
std::unique_lock<std::mutex> locker(pool.queue_mutex);
//Wait until some work is ready or we are stopping the threadpool
//Because of spurious wakeups we need to check if there is actually a task available or we are stopping
pool.condition.wait(locker, [=] { return pool.stop || !pool.tasks.empty(); });
if (pool.stop) break;
task = pool.tasks.front();
pool.tasks.pop_front();
}
task();
}
}
} // namespace Tmpl8

C++ STL Producer multiple consumer where producer waits for free consumer before producing next value

My little consumer-producer problem had me stumped for some time. I didn't want an implementation where one producer pushes some data round-robin to the consumers, filling up their queues of data respectively.
I wanted to have one producer, x consumers, but the producer waits with producing new data until a consumer is free again. In my example there are 3 consumers so the producer creates a maximum of 3 objects of data at any given time. Since I don't like polling, the consumers were supposed to notify the producer when they are done. Sounds simple, but the solution I found doesn't please me. First the code.
#include "stdafx.h"
#include <mutex>
#include <iostream>
#include <future>
#include <map>
#include <atomic>
std::atomic_int totalconsumed;
class producer {
using runningmap_t = std::map<int, std::pair<std::future<void>, bool>>;
// Secure the map of futures.
std::mutex mutex_;
runningmap_t running_;
// Used for finished notification
std::mutex waitermutex_;
std::condition_variable waiter_;
// The magic number to limit the producer.
std::atomic<int> count_;
bool can_run();
void clean();
// Fake a source, e.g. filesystem scan.
int fakeiter;
int next();
bool has_next() const;
public:
producer() : fakeiter(50) {}
void run();
void notify(int value);
void wait();
};
class consumer {
producer& producer_;
public:
consumer(producer& producer) : producer_(producer) {}
void run(int value) {
std::this_thread::sleep_for(std::chrono::milliseconds(42));
std::cout << "Consumed " << value << " on (" << std::this_thread::get_id() << ")" << std::endl;
totalconsumed++;
producer_.notify(value);
}
};
// Only if less than three threads are active, another gets to run.
bool producer::can_run() { return count_.load() < 3; }
// Verify if there's something to consume
bool producer::has_next() const { return 0 != fakeiter; }
// Produce the next value for consumption.
int producer::next() { return --fakeiter; }
// Remove the futures that have reported to be finished.
void producer::clean()
{
for (auto it = running_.begin(); it != running_.end(); ) {
if (it->second.second) {
it = running_.erase(it);
}
else {
++it;
}
}
}
// Runs the producer. Creates a new consumer for every produced value. Max 3 at a time.
void producer::run()
{
while (has_next()) {
if (can_run()) {
auto c = next();
count_++;
auto future = std::async(&consumer::run, consumer(*this), c);
std::unique_lock<std::mutex> lock(mutex_);
running_[c] = std::make_pair(std::move(future), false);
clean();
}
else {
std::unique_lock<std::mutex> lock(waitermutex_);
waiter_.wait(lock);
}
}
}
// Consumers diligently tell the producer that they are finished.
void producer::notify(int value)
{
count_--;
mutex_.lock();
running_[value].second = true;
mutex_.unlock();
std::unique_lock<std::mutex> waiterlock(waitermutex_);
waiter_.notify_all();
}
// Wait for all consumers to finish.
void producer::wait()
{
while (!running_.empty()) {
mutex_.lock();
clean();
mutex_.unlock();
std::this_thread::sleep_for(std::chrono::milliseconds(10));
}
}
// Looks like the application entry point.
int main()
{
producer p;
std::thread pthread(&producer::run, &p);
pthread.join();
p.wait();
std::cout << std::endl << std::endl << "Total consumed " << totalconsumed.load() << std::endl;
return 0;
}
The part I don't like is the list of values mapped to the futures, called running_. I need to keep the future around until the consumer is actually done. I can't remove the future from the map in the notify method or else I'll kill the thread that is currently calling notify.
Am I missing something that could simplify this construct?
template<class T>
struct slotted_data {
std::size_t I;
T t;
};
template<class T>
using sink = std::function<void(T)>;
template<class T, std::size_t N>
struct async_slots {
bool produce( slotted_data<T> data ) {
if (terminate || data.I>=N) return false;
{
auto l = lock();
if (slots[data.I]) return false;
slots[data.I] = std::move(data.t);
}
cv.notify_one();
return true;
}
// rare use of non-lambda cv.wait in the wild!
bool consume(sink<slotted_data<T>> f) {
auto l = lock();
while(!terminate) {
for (auto& slot:slots) {
if (slot) {
auto r = std::move(*slot);
slot = std::nullopt;
f({std::size_t(&slot-slots.data()), std::move(r)}); // invoke in lock
return true;
}
}
cv.wait(l);
}
return false;
}
// easier and safer version:
std::optional<slotted_data<T>> consume() {
std::optional<slotted_data<T>> r;
bool worked = consume([&](auto&& data) { r = std::move(data); });
if (!worked) return {};
return r;
}
void finish() {
{
auto l = lock();
terminate = true;
}
cv.notify_all();
}
private:
auto lock() { return std::unique_lock<std::mutex>(m); }
std::mutex m;
std::condition_variable cv;
std::array< std::optional<T>, N > slots;
bool terminate = false;
};
async_slots provides a fixed number of slots and an awaitable consume. If you try to produce two things in the same slot, the producer function returns false and ignores you.
consume invokes the sink of the data inside the mutex in a continuation passing style. This permits atomic consumption.
We want to invert producer and consumer:
template<class T, std::size_t N>
struct slotted_consumer {
bool consume( std::size_t I, sink<T> sink ) {
std::optional<T> data;
std::condition_variable cv;
std::mutex m;
bool worked = slots.produce(
{
I,
[&](auto&& t){
{
std::unique_lock<std::mutex> l(m);
data.emplace(std::move(t));
}
cv.notify_one();
}
}
);
if (!worked) return false;
std::unique_lock<std::mutex> l(m);
cv.wait(l, [&]()->bool{
return (bool)data;
});
sink( std::move(*data) );
return true;
}
bool produce( T t ) {
return slots.consume(
[&](auto&& f) {
f.t( std::move(t) );
}
);
}
void finish() {
slots.finish();
}
private:
async_slots< sink<T>, N > slots;
};
we have to take some care to execute sink in a context where we are not holding the mutex of async_slots, which is why consume above is so strange.
Live example.
You share a slotted_consumer< int, 3 > slots. The producing thread repeatedly calls slots.produce(42);. It blocks until a new consumer lines up.
Consumer #2 calls slots.consume( 2, [&](int x){ /* code to consume x */ } ), and #1 and #0 pass their slot numbers as well.
All 3 consumers can be waiting for the next production. The above system defaults to feeding #0 first if it is waiting for more work; we could make it "fair" at a cost of keeping a bit more state.

Boost group_threads Maximal number of parallel thread

i want to apply boost group_thread in my program with a maximal number of Threads. For example
int maxNumberOfThreads
boost::thread_group group;
for (int i = 0; i < N; ++i)
//create new if group.size() is smaller then maximal number of threads
group.create_thread(Worker);
group.join_all();
Someone has an idea how i can realize this ?
Because it will be very inefficient when i start N numbers of thread.
Thank you for your help
What you seem to want is a thread pool.
You can use boost::thread::hardware_concurrency() to determine the number of (logical) cores available on your particular system.
Here's one I rolled for an answer last week:
#include <boost/thread.hpp>
#include <boost/phoenix.hpp>
#include <boost/optional.hpp>
using namespace boost;
using namespace boost::phoenix::arg_names;
boost::atomic_size_t counter(0ul);
class thread_pool
{
private:
mutex mx;
condition_variable cv;
typedef function<void()> job_t;
std::deque<job_t> _queue;
thread_group pool;
boost::atomic_bool shutdown;
static void worker_thread(thread_pool& q)
{
while (optional<job_t> job = q.dequeue())
(*job)();
}
public:
thread_pool() : shutdown(false) {
for (unsigned i = 0; i < boost::thread::hardware_concurrency(); ++i)
pool.create_thread(bind(worker_thread, ref(*this)));
}
void enqueue(job_t job)
{
lock_guard<mutex> lk(mx);
_queue.push_back(job);
cv.notify_one();
}
optional<job_t> dequeue()
{
unique_lock<mutex> lk(mx);
namespace phx = boost::phoenix;
cv.wait(lk, phx::ref(shutdown) || !phx::empty(phx::ref(_queue)));
if (_queue.empty())
return none;
job_t job = _queue.front();
_queue.pop_front();
return job;
}
~thread_pool()
{
shutdown = true;
{
lock_guard<mutex> lk(mx);
cv.notify_all();
}
pool.join_all();
}
};
A typical way to use that is also in that answer:
static const size_t bignumber = 1 << 20;
class myClass
{
thread_pool pool; // uses 1 thread per core
public:
void launch_jobs()
{
std::cout << "enqueuing jobs... " << std::flush;
for(size_t i=0; i<bignumber; ++i)
{
for(int j=0; j<2; ++j) {
pool.enqueue(bind(&myClass::myFunction, this, j, i));
}
}
std::cout << "done\n";
}
private:
void myFunction(int i, int j)
{
boost::this_thread::sleep_for(boost::chrono::milliseconds(1));
counter += 1;
}
};
int main()
{
myClass instance;
instance.launch_jobs();
size_t last = 0;
while (counter < (2*bignumber))
{
boost::this_thread::sleep_for(boost::chrono::milliseconds(100));
if ((counter >> 4u) > last)
{
std::cout << "Progress: " << counter << "/" << (bignumber*2) << "\n";
last = counter >> 4u;
}
}
}
For bonus, at that question, in the comments to another answer, I also posted an equivalent solution based on a lock-free job queue implementation:
boost thread throwing exception "thread_resource_error: resource temporarily unavailable"
This is my (imperfect) implementation :
/**
* \author Christophe Dumeunier
* \brief Extension of boost::thread_group managing a maximum number of threads running in parallel
*/
class thread_group_max : public boost::thread_group
{
public:
/**
* \brief Instanciate a group for threads
* \param max_running_threads Maximum number of threads running in parallel, if 0 use the number of cores
* \param max_sleeping_time Maximum sleeping time (seconds) between two checks for finished threads (must be > sleeping_time_start)
* \param sleeping_time_grow Coefficient increasing sleeping time while waiting for finished threads (must be > 1)
* \param sleeping_time_start Initial sleeping time (must be > 0)
*/
explicit thread_group_max(std::size_t max_running_threads = 0, float max_sleeping_time = 1.0f,
float sleeping_time_grow = 1.1f, float sleeping_time_start = 0.001f);
/**
* \brief Destroy the group
* \note Doesn't join the unterminated threads
*/
~thread_group_max();
/** \brief Wait for an available slot and then create a new thread and launch it */
template<typename F>
boost::thread* create_thread(F f);
private:
std::size_t maxRunningThreads; //!< Maximum number of running threads
float maxSleepingTime; //!< Maximum sleeping time between two checks for finished threads
float sleepingTimeStart; //!< Initial sleeping time
float sleepingTimeGrow; //!< Coefficient increasing sleeping time while waiting for finished threads
std::set<boost::thread*> runningThreads; //!< Pointers to running or finished-but-not-removed-yet threads
};
thread_group_max::thread_group_max(std::size_t max_running_threads, float max_sleeping_time, float sleeping_time_grow, float sleeping_time_start) :
boost::thread_group(),
maxRunningThreads(max_running_threads == 0 ? std::max(boost::thread::hardware_concurrency(), 1u) : max_running_threads),
maxSleepingTime(max_sleeping_time),
sleepingTimeStart(sleeping_time_start),
sleepingTimeGrow(sleeping_time_grow),
runningThreads()
{
assert(this->maxRunningThreads > 0);
assert(this->maxSleepingTime >= this->sleepingTimeStart);
assert(this->sleepingTimeStart > 0.0f);
assert(this->sleepingTimeGrow > 1.0f);
}
thread_group_max::~thread_group_max()
{}
template<typename F>
boost::thread* thread_group_max::create_thread(F f)
{
// First, try to clean already finished threads
if(this->runningThreads.size() >= this->maxRunningThreads)
{
for(std::set<boost::thread*>::iterator it = this->runningThreads.begin(); it != this->runningThreads.end();)
{
const std::set<boost::thread*>::iterator jt = it++;
if((*jt)->timed_join(boost::posix_time::milliseconds(0))) /// #todo timed_join is deprecated
this->runningThreads.erase(jt);
}
}
// If no finished thread found, wait for it
if(this->runningThreads.size() >= this->maxRunningThreads)
{
float sleeping_time = this->sleepingTimeStart;
do
{
boost::this_thread::sleep(boost::posix_time::milliseconds((long int)(1000.0f * sleeping_time)));
for(std::set<boost::thread*>::iterator it = this->runningThreads.begin(); it != this->runningThreads.end();)
{
const std::set<boost::thread*>::iterator jt = it++;
if((*jt)->timed_join(boost::posix_time::milliseconds(0))) /// #todo timed_join is deprecated
this->runningThreads.erase(jt);
}
if(sleeping_time < this->maxSleepingTime)
{
sleeping_time *= this->sleepingTimeGrow;
if(sleeping_time > this->maxSleepingTime)
sleeping_time = this->maxSleepingTime;
}
} while(this->runningThreads.size() >= this->maxRunningThreads);
}
// Now, at least 1 slot is available, use it
return *this->runningThreads.insert(this->boost::thread_group::create_thread(f)).first;
}
Example of use:
thread_group_max group(num_threads);
for(std::size_t i = 0; i < jobs.size(); ++i)
group.create_thread(boost::bind(&my_run_job_function, boost::ref(job[i])));
group.join_all();

C++0x has no semaphores? How to synchronize threads?

Is it true that C++0x will come without semaphores? There are already some questions on Stack Overflow regarding the use of semaphores. I use them (posix semaphores) all the time to let a thread wait for some event in another thread:
void thread0(...)
{
doSomething0();
event1.wait();
...
}
void thread1(...)
{
doSomething1();
event1.post();
...
}
If I would do that with a mutex:
void thread0(...)
{
doSomething0();
event1.lock(); event1.unlock();
...
}
void thread1(...)
{
event1.lock();
doSomethingth1();
event1.unlock();
...
}
Problem: It's ugly and it's not guaranteed that thread1 locks the mutex first (Given that the same thread should lock and unlock a mutex, you also can't lock event1 before thread0 and thread1 started).
So since boost doesn't have semaphores either, what is the simplest way to achieve the above?
You can easily build one from a mutex and a condition variable:
#include <mutex>
#include <condition_variable>
class semaphore {
std::mutex mutex_;
std::condition_variable condition_;
unsigned long count_ = 0; // Initialized as locked.
public:
void release() {
std::lock_guard<decltype(mutex_)> lock(mutex_);
++count_;
condition_.notify_one();
}
void acquire() {
std::unique_lock<decltype(mutex_)> lock(mutex_);
while(!count_) // Handle spurious wake-ups.
condition_.wait(lock);
--count_;
}
bool try_acquire() {
std::lock_guard<decltype(mutex_)> lock(mutex_);
if(count_) {
--count_;
return true;
}
return false;
}
};
Based on Maxim Yegorushkin's answer, I tried to make the example in C++11 style.
#include <mutex>
#include <condition_variable>
class Semaphore {
public:
Semaphore (int count_ = 0)
: count(count_) {}
inline void notify()
{
std::unique_lock<std::mutex> lock(mtx);
count++;
cv.notify_one();
}
inline void wait()
{
std::unique_lock<std::mutex> lock(mtx);
while(count == 0){
cv.wait(lock);
}
count--;
}
private:
std::mutex mtx;
std::condition_variable cv;
int count;
};
I decided to write the most robust/generic C++11 semaphore I could, in the style of the standard as much as I could (note using semaphore = ..., you normally would just use the name semaphore similar to normally using string not basic_string):
template <typename Mutex, typename CondVar>
class basic_semaphore {
public:
using native_handle_type = typename CondVar::native_handle_type;
explicit basic_semaphore(size_t count = 0);
basic_semaphore(const basic_semaphore&) = delete;
basic_semaphore(basic_semaphore&&) = delete;
basic_semaphore& operator=(const basic_semaphore&) = delete;
basic_semaphore& operator=(basic_semaphore&&) = delete;
void notify();
void wait();
bool try_wait();
template<class Rep, class Period>
bool wait_for(const std::chrono::duration<Rep, Period>& d);
template<class Clock, class Duration>
bool wait_until(const std::chrono::time_point<Clock, Duration>& t);
native_handle_type native_handle();
private:
Mutex mMutex;
CondVar mCv;
size_t mCount;
};
using semaphore = basic_semaphore<std::mutex, std::condition_variable>;
template <typename Mutex, typename CondVar>
basic_semaphore<Mutex, CondVar>::basic_semaphore(size_t count)
: mCount{count}
{}
template <typename Mutex, typename CondVar>
void basic_semaphore<Mutex, CondVar>::notify() {
std::lock_guard<Mutex> lock{mMutex};
++mCount;
mCv.notify_one();
}
template <typename Mutex, typename CondVar>
void basic_semaphore<Mutex, CondVar>::wait() {
std::unique_lock<Mutex> lock{mMutex};
mCv.wait(lock, [&]{ return mCount > 0; });
--mCount;
}
template <typename Mutex, typename CondVar>
bool basic_semaphore<Mutex, CondVar>::try_wait() {
std::lock_guard<Mutex> lock{mMutex};
if (mCount > 0) {
--mCount;
return true;
}
return false;
}
template <typename Mutex, typename CondVar>
template<class Rep, class Period>
bool basic_semaphore<Mutex, CondVar>::wait_for(const std::chrono::duration<Rep, Period>& d) {
std::unique_lock<Mutex> lock{mMutex};
auto finished = mCv.wait_for(lock, d, [&]{ return mCount > 0; });
if (finished)
--mCount;
return finished;
}
template <typename Mutex, typename CondVar>
template<class Clock, class Duration>
bool basic_semaphore<Mutex, CondVar>::wait_until(const std::chrono::time_point<Clock, Duration>& t) {
std::unique_lock<Mutex> lock{mMutex};
auto finished = mCv.wait_until(lock, t, [&]{ return mCount > 0; });
if (finished)
--mCount;
return finished;
}
template <typename Mutex, typename CondVar>
typename basic_semaphore<Mutex, CondVar>::native_handle_type basic_semaphore<Mutex, CondVar>::native_handle() {
return mCv.native_handle();
}
in acordance with posix semaphores, I would add
class semaphore
{
...
bool trywait()
{
boost::mutex::scoped_lock lock(mutex_);
if(count_)
{
--count_;
return true;
}
else
{
return false;
}
}
};
And I much prefer using a synchronisation mechanism at a convenient level of abstraction, rather than always copy pasting a stitched-together version using more basic operators.
C++20 finally has semaphores - std::counting_semaphore<max_count>.
These have (at least) the following methods:
acquire() (blocking)
try_acquire() (non-blocking, returns immediately)
try_acquire_for() (non-blocking, takes a duration)
try_acquire_until() (non-blocking, takes a time at which to stop trying)
release()
You can read these CppCon 2019 presentation slides, or watch the video. There's also the official proposal P0514R4, but it may not be up-to-date with actual C++20.
You can also check out cpp11-on-multicore - it has a portable and optimal semaphore implementation.
The repository also contains other threading goodies that complement c++11 threading.
You can work with mutex and condition variables. You gain exclusive access with the mutex, check whether you want to continue or need to wait for the other end. If you need to wait, you wait in a condition. When the other thread determines that you can continue, it signals the condition.
There is a short example in the boost::thread library that you can most probably just copy (the C++0x and boost thread libs are very similar).
Also can be useful RAII semaphore wrapper in threads:
class ScopedSemaphore
{
public:
explicit ScopedSemaphore(Semaphore& sem) : m_Semaphore(sem) { m_Semaphore.Wait(); }
ScopedSemaphore(const ScopedSemaphore&) = delete;
~ScopedSemaphore() { m_Semaphore.Notify(); }
ScopedSemaphore& operator=(const ScopedSemaphore&) = delete;
private:
Semaphore& m_Semaphore;
};
Usage example in multithread app:
boost::ptr_vector<std::thread> threads;
Semaphore semaphore;
for (...)
{
...
auto t = new std::thread([..., &semaphore]
{
ScopedSemaphore scopedSemaphore(semaphore);
...
}
);
threads.push_back(t);
}
for (auto& t : threads)
t.join();
I found the shared_ptr and weak_ptr, a long with a list, did the job I needed. My issue was, I had several clients wanting to interact with a host's internal data. Typically, the host updates the data on it's own, however, if a client requests it, the host needs to stop updating until no clients are accessing the host data. At the same time, a client could ask for exclusive access, so that no other clients, nor the host, could modify that host data.
How I did this was, I created a struct:
struct UpdateLock
{
typedef std::shared_ptr< UpdateLock > ptr;
};
Each client would have a member of such:
UpdateLock::ptr m_myLock;
Then the host would have a weak_ptr member for exclusivity, and a list of weak_ptrs for non-exclusive locks:
std::weak_ptr< UpdateLock > m_exclusiveLock;
std::list< std::weak_ptr< UpdateLock > > m_locks;
There is a function to enable locking, and another function to check if the host is locked:
UpdateLock::ptr LockUpdate( bool exclusive );
bool IsUpdateLocked( bool exclusive ) const;
I test for locks in LockUpdate, IsUpdateLocked, and periodically in the host's Update routine. Testing for a lock is as simple as checking if the weak_ptr's expired, and removing any expired from the m_locks list (I only do this during the host update), I can check if the list is empty; at the same time, I get automatic unlocking when a client resets the shared_ptr they are hanging onto, which also happens when a client gets destroyed automatically.
The over all effect is, since clients rarely need exclusivity (typically reserved for additions and deletions only), most of the time a request to LockUpdate( false ), that is to say non-exclusive, succeeds so long as (! m_exclusiveLock). And a LockUpdate( true ), a request for exclusivity, succeeds only when both (! m_exclusiveLock) and (m_locks.empty()).
A queue could be added to mitigate between exclusive and non-exclusive locks, however, I have had no collisions thus far, so I intend to wait until that happens to add the solution (mostly so I have a real-world test condition).
So far this is working well for my needs; I can imagine the need to expand this, and some issues that might arise over expanded use, however, this was quick to implement, and required very little custom code.
There old question but I would like to offer another solution.
It seems you need a not semathore but a event like Windows Events.
Very effective events can be done like following:
#ifdef _MSC_VER
#include <concrt.h>
#else
// pthread implementation
#include <cstddef>
#include <cstdint>
#include <shared_mutex>
namespace Concurrency
{
const unsigned int COOPERATIVE_TIMEOUT_INFINITE = (unsigned int)-1;
const size_t COOPERATIVE_WAIT_TIMEOUT = SIZE_MAX;
class event
{
public:
event();
~event();
size_t wait(unsigned int timeout = COOPERATIVE_TIMEOUT_INFINITE);
void set();
void reset();
static size_t wait_for_multiple(event** _PPEvents, size_t _Count, bool _FWaitAll, unsigned int _Timeout = COOPERATIVE_TIMEOUT_INFINITE);
static const unsigned int timeout_infinite = COOPERATIVE_TIMEOUT_INFINITE;
private:
int d;
std::shared_mutex guard;
};
};
namespace concurrency = Concurrency;
#include <unistd.h>
#include <errno.h>
#include <sys/eventfd.h>
#include <sys/epoll.h>
#include <chrono>
#include "../HandleHolder.h"
typedef CommonHolder<int, close> fd_holder;
namespace Concurrency
{
int watch(int ep_fd, int fd)
{
epoll_event ep_event;
ep_event.events = EPOLLIN;
ep_event.data.fd = fd;
return epoll_ctl(ep_fd, EPOLL_CTL_ADD, fd, &ep_event);
}
event::event()
: d(eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK))
{
}
event::~event()
{
std::unique_lock<std::shared_mutex> lock(guard);
close(d);
d = -1;
}
size_t event::wait(unsigned int timeout)
{
fd_holder ep_fd(epoll_create1(EPOLL_CLOEXEC));
{
std::shared_lock<std::shared_mutex> lock(guard);
if (d == -1 || watch(ep_fd.GetHandle(), d) < 0)
return COOPERATIVE_WAIT_TIMEOUT;
}
epoll_event ep_event;
return epoll_wait(ep_fd.GetHandle(), &ep_event, 1, timeout) == 1 && (ep_event.events & EPOLLIN) ? 0 : COOPERATIVE_WAIT_TIMEOUT;
}
void event::set()
{
uint64_t count = 1;
write(d, &count, sizeof(count));
}
void event::reset()
{
uint64_t count;
read(d, &count, sizeof(count));
}
size_t event::wait_for_multiple(event** _PPEvents, size_t _Count, bool _FWaitAll, unsigned int _Timeout)
{
if (_FWaitAll) // not implemented
std::abort();
const auto deadline = _Timeout != COOPERATIVE_TIMEOUT_INFINITE ? std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now().time_since_epoch()).count() + _Timeout : COOPERATIVE_TIMEOUT_INFINITE;
fd_holder ep_fd(epoll_create1(EPOLL_CLOEXEC));
int fds[_Count];
for (int i = 0; i < _Count; ++i)
{
std::shared_lock<std::shared_mutex> lock(_PPEvents[i]->guard);
fds[i] = _PPEvents[i]->d;
if (fds[i] != -1 && watch(ep_fd.GetHandle(), fds[i]) < 0)
fds[i] = -1;
}
epoll_event ep_events[_Count];
// Вызов epoll_wait может быть прерван сигналом. Ждём весь тайм-аут, так же, как в Windows
int res = 0;
while (true)
{
res = epoll_wait(ep_fd.GetHandle(), &ep_events[0], _Count, _Timeout);
if (res == -1 && errno == EINTR && std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now().time_since_epoch()).count() < deadline)
continue;
break;
}
for (int i = 0; i < _Count; ++i)
{
if (fds[i] == -1)
continue;
for (int j = 0; j < res; ++j)
if (ep_events[j].data.fd == fds[i] && (ep_events[j].events & EPOLLIN))
return i;
}
return COOPERATIVE_WAIT_TIMEOUT;
}
};
#endif
And then just use concurrency::event
Different from other answers, I propose a new version which:
Unblocks all waiting threads before being deleted. In this case, deleting the semaphore will wake up all waiting threads and only after everybody wakes up, the semaphore destructor will exit.
Has a parameter to the wait() call, to automatically unlock the calling thread after the timeout in milliseconds has passed.
Has an options on the construtor to limit available resources count only up to the count the semaphore was initialized with. This way, calling notify() too many times will not increase how many resources the semaphore has.
#include <stdio.h>
#include <thread>
#include <mutex>
#include <condition_variable>
#include <iostream>
std::recursive_mutex g_sync_mutex;
#define sync(x) do { \
std::unique_lock<std::recursive_mutex> lock(g_sync_mutex); \
x; \
} while (false);
class Semaphore {
int _count;
bool _limit;
int _all_resources;
int _wakedup;
std::mutex _mutex;
std::condition_variable_any _condition_variable;
public:
/**
* count - how many resources this semaphore holds
* limit - limit notify() calls only up to the count value (available resources)
*/
Semaphore (int count, bool limit)
: _count(count),
_limit(limit),
_all_resources(count),
_wakedup(count)
{
}
/**
* Unlock all waiting threads before destructing the semaphore (to avoid their segfalt later)
*/
virtual ~Semaphore () {
std::unique_lock<std::mutex> lock(_mutex);
_wakeup(lock);
}
void _wakeup(std::unique_lock<std::mutex>& lock) {
int lastwakeup = 0;
while( _wakedup < _all_resources ) {
lock.unlock();
notify();
lock.lock();
// avoids 100% CPU usage if someone is not waking up properly
if (lastwakeup == _wakedup) {
std::this_thread::sleep_for( std::chrono::milliseconds(10) );
}
lastwakeup = _wakedup;
}
}
// Mutex and condition variables are not movable and there is no need for smart pointers yet
Semaphore(const Semaphore&) = delete;
Semaphore& operator =(const Semaphore&) = delete;
Semaphore(const Semaphore&&) = delete;
Semaphore& operator =(const Semaphore&&) = delete;
/**
* Release one acquired resource.
*/
void notify()
{
std::unique_lock<std::mutex> lock(_mutex);
// sync(std::cerr << getTime() << "Calling notify(" << _count << ", " << _limit << ", " << _all_resources << ")" << std::endl);
_count++;
if (_limit && _count > _all_resources) {
_count = _all_resources;
}
_condition_variable.notify_one();
}
/**
* This function never blocks!
* Return false if it would block when acquiring the lock. Otherwise acquires the lock and return true.
*/
bool try_acquire() {
std::unique_lock<std::mutex> lock(_mutex);
// sync(std::cerr << getTime() << "Calling try_acquire(" << _count << ", " << _limit << ", " << _all_resources << ")" << std::endl);
if(_count <= 0) {
return false;
}
_count--;
return true;
}
/**
* Return true if the timeout expired, otherwise return false.
* timeout - how many milliseconds to wait before automatically unlocking the wait() call.
*/
bool wait(int timeout = 0) {
std::unique_lock<std::mutex> lock(_mutex);
// sync(std::cerr << getTime() << "Calling wait(" << _count << ", " << _limit << ", " << _all_resources << ")" << std::endl);
_count--;
_wakedup--;
try {
std::chrono::time_point<std::chrono::system_clock> timenow = std::chrono::system_clock::now();
while(_count < 0) {
if (timeout < 1) {
_condition_variable.wait(lock);
}
else {
std::cv_status status = _condition_variable.wait_until(lock, timenow + std::chrono::milliseconds(timeout));
if ( std::cv_status::timeout == status) {
_count++;
_wakedup++;
return true;
}
}
}
}
catch (...) {
_count++;
_wakedup++;
throw;
}
_wakedup++;
return false;
}
/**
* Return true if calling wait() will block the calling thread
*/
bool locked() {
std::unique_lock<std::mutex> lock(_mutex);
return _count <= 0;
}
/**
* Return true the semaphore has at least all resources available (since when it was created)
*/
bool freed() {
std::unique_lock<std::mutex> lock(_mutex);
return _count >= _all_resources;
}
/**
* Return how many resources are available:
* - 0 means not free resources and calling wait() will block te calling thread
* - a negative value means there are several threads being blocked
* - a positive value means there are no threads waiting
*/
int count() {
std::unique_lock<std::mutex> lock(_mutex);
return _count;
}
/**
* Wake everybody who is waiting and reset the semaphore to its initial value.
*/
void reset() {
std::unique_lock<std::mutex> lock(_mutex);
if(_count < 0) {
_wakeup(lock);
}
_count = _all_resources;
}
};
Utility to print the current timestamp:
std::string getTime() {
char buffer[20];
#if defined( WIN32 )
SYSTEMTIME wlocaltime;
GetLocalTime(&wlocaltime);
::snprintf(buffer, sizeof buffer, "%02d:%02d:%02d.%03d ", wlocaltime.wHour, wlocaltime.wMinute, wlocaltime.wSecond, wlocaltime.wMilliseconds);
#else
std::chrono::time_point< std::chrono::system_clock > now = std::chrono::system_clock::now();
auto duration = now.time_since_epoch();
auto hours = std::chrono::duration_cast< std::chrono::hours >( duration );
duration -= hours;
auto minutes = std::chrono::duration_cast< std::chrono::minutes >( duration );
duration -= minutes;
auto seconds = std::chrono::duration_cast< std::chrono::seconds >( duration );
duration -= seconds;
auto milliseconds = std::chrono::duration_cast< std::chrono::milliseconds >( duration );
duration -= milliseconds;
time_t theTime = time( NULL );
struct tm* aTime = localtime( &theTime );
::snprintf(buffer, sizeof buffer, "%02d:%02d:%02d.%03ld ", aTime->tm_hour, aTime->tm_min, aTime->tm_sec, milliseconds.count());
#endif
return buffer;
}
Example program using this semaphore:
// g++ -o test -Wall -Wextra -ggdb -g3 -pthread test.cpp && gdb --args ./test
// valgrind --leak-check=full --show-leak-kinds=all --track-origins=yes --verbose ./test
// procdump -accepteula -ma -e -f "" -x c:\ myexe.exe
int main(int argc, char* argv[]) {
std::cerr << getTime() << "Creating Semaphore" << std::endl;
Semaphore* semaphore = new Semaphore(1, false);
semaphore->wait(1000);
semaphore->wait(1000);
std::cerr << getTime() << "Auto Unlocking Semaphore wait" << std::endl;
std::this_thread::sleep_for( std::chrono::milliseconds(5000) );
delete semaphore;
std::cerr << getTime() << "Exiting after 10 seconds..." << std::endl;
return 0;
}
Example output:
11:03:01.012 Creating Semaphore
11:03:02.012 Auto Unlocking Semaphore wait
11:03:07.012 Exiting after 10 seconds...
Extra function which uses a EventLoop to unlock the semaphores after some time:
std::shared_ptr<std::atomic<bool>> autowait(Semaphore* semaphore, int timeout, EventLoop<std::function<void()>>& eventloop, const char* source) {
std::shared_ptr<std::atomic<bool>> waiting(std::make_shared<std::atomic<bool>>(true));
sync(std::cerr << getTime() << "autowait '" << source << "'..." << std::endl);
if (semaphore->try_acquire()) {
eventloop.enqueue( timeout, [waiting, source, semaphore]{
if ( (*waiting).load() ) {
sync(std::cerr << getTime() << "Timeout '" << source << "'..." << std::endl);
semaphore->notify();
}
} );
}
else {
semaphore->wait(timeout);
}
return waiting;
}
Semaphore semaphore(1, false);
EventLoop<std::function<void()>>* eventloop = new EventLoop<std::function<void()>>(true);
std::shared_ptr<std::atomic<bool>> waiting_something = autowait(&semaphore, 45000, eventloop, "waiting_something");
In case someone is interested in the atomic version, here is the implementation. The performance is expected better than the mutex & condition variable version.
class semaphore_atomic
{
public:
void notify() {
count_.fetch_add(1, std::memory_order_release);
}
void wait() {
while (true) {
int count = count_.load(std::memory_order_relaxed);
if (count > 0) {
if (count_.compare_exchange_weak(count, count-1, std::memory_order_acq_rel, std::memory_order_relaxed)) {
break;
}
}
}
}
bool try_wait() {
int count = count_.load(std::memory_order_relaxed);
if (count > 0) {
if (count_.compare_exchange_strong(count, count-1, std::memory_order_acq_rel, std::memory_order_relaxed)) {
return true;
}
}
return false;
}
private:
std::atomic_int count_{0};
};

How to make boost::thread_group execute a fixed number of parallel threads

This is the code to create a thread_group and execute all threads in parallel:
boost::thread_group group;
for (int i = 0; i < 15; ++i)
group.create_thread(aFunctionToExecute);
group.join_all();
This code will execute all threads at once. What I want to do is to execute them all but 4 maximum in parallel. When on is terminated, another one is executed until there are no more to execute.
Another, more efficient solution would be to have each thread callback to the primary thread when they are finished, and the handler on the primary thread could launch a new thread each time. This prevents the repetitive calls to timed_join, as the primary thread won't do anything until the callback is triggered.
I have something like this:
boost::mutex mutex_;
boost::condition_variable condition_;
const size_t throttle_;
size_t size_;
bool wait_;
template <typename Env, class F>
void eval_(const Env &env, const F &f) {
{
boost::unique_lock<boost::mutex> lock(mutex_);
size_ = std::min(size_+1, throttle_);
while (throttle_ <= size_) condition_.wait(lock);
}
f.eval(env);
{
boost::lock_guard<boost::mutex> lock(mutex_);
--size_;
}
condition_.notify_one();
}
I think you are looking for a thread_pool implementation, which is available here.
Additionally I have noticed that if you create a vector of std::future and store futures of many std::async_tasks in it and you do not have any blocking code in the function passed to the thread, VS2013 (atleast from what I can confirm) will launch exactly the appropriate no of threads your machine can handle. It reuses the threads once created.
I created my own simplified interface of boost::thread_group to do this job:
class ThreadGroup : public boost::noncopyable
{
private:
boost::thread_group group;
std::size_t maxSize;
float sleepStart;
float sleepCoef;
float sleepMax;
std::set<boost::thread*> running;
public:
ThreadGroup(std::size_t max_size = 0,
float max_sleeping_time = 1.0f,
float sleeping_time_coef = 1.5f,
float sleeping_time_start = 0.001f) :
boost::noncopyable(),
group(),
maxSize(max_size),
sleepStart(sleeping_time_start),
sleepCoef(sleeping_time_coef),
sleepMax(max_sleeping_time),
running()
{
if(max_size == 0)
this->maxSize = (std::size_t)std::max(boost::thread::hardware_concurrency(), 1u);
assert(max_sleeping_time >= sleeping_time_start);
assert(sleeping_time_start > 0.0f);
assert(sleeping_time_coef > 1.0f);
}
~ThreadGroup()
{
this->joinAll();
}
template<typename F> boost::thread* createThread(F f)
{
float sleeping_time = this->sleepStart;
while(this->running.size() >= this->maxSize)
{
for(std::set<boost::thread*>::iterator it = running.begin(); it != running.end();)
{
const std::set<boost::thread*>::iterator jt = it++;
if((*jt)->timed_join(boost::posix_time::milliseconds((long int)(1000.0f * sleeping_time))))
running.erase(jt);
}
if(sleeping_time < this->sleepMax)
{
sleeping_time *= this->sleepCoef;
if(sleeping_time > this->sleepMax)
sleeping_time = this->sleepMax;
}
}
return *this->running.insert(this->group.create_thread(f)).first;
}
void joinAll()
{
this->group.join_all();
}
void interruptAll()
{
#ifdef BOOST_THREAD_PROVIDES_INTERRUPTIONS
this->group.interrupt_all();
#endif
}
std::size_t size() const
{
return this->group.size();
}
};
Here is an example of use, very similar to boost::thread_group with the main difference that the creation of the thread is a waiting point:
{
ThreadGroup group(4);
for(int i = 0; i < 15; ++i)
group.createThread(aFunctionToExecute);
} // join all at destruction