I'm developing a service. Currently I need to get local hour for every request, since it involves system call, it costs too much.
In my case, some deviation like 200ms is OK for me.
So what's the best way to maintain a variable storing local_hour, and update it every 200ms?
static int32_t GetLocalHour() {
time_t t = std::time(nullptr);
if (t == -1) { return -1; }
struct tm *time_info_ptr = localtime(&t);
return (nullptr != time_info_ptr) ? time_info_ptr->tm_hour : -1;
}
If you want your main thread to spend as little time as possible on getting the current hour you can start a background thread to do all the heavy lifting.
For all things time use std::chrono types.
Here is the example, which uses quite a few (very useful) multithreading building blocks from C++.
#include <chrono>
#include <future>
#include <condition_variable>
#include <mutex>
#include <atomic>
#include <iostream>
// building blocks
// std::future/std::async, to start a loop/function on a seperate thread
// std::atomic, to be able to read/write threadsafely from a variable
// std::chrono, for all things time
// std::condition_variable, for communicating between threads. Basicall a signal that only signals that something has changed that might be interesting
// lambda functions : anonymous functions that are useful in this case for starting the asynchronous calls and to setup predicates (functions returning a bool)
// std::mutex : threadsafe access to a bit of code
// std::unique_lock : to automatically unlock a mutex when code goes out of scope (also needed for condition_variable)
// helper to convert time to start of day
using days_t = std::chrono::duration<int, std::ratio_multiply<std::chrono::hours::period, std::ratio<24> >::type>;
// class that has an asynchronously running loop that updates two variables (threadsafe)
// m_hours and m_seconds (m_seconds so output is a bit more interesting)
class time_keeper_t
{
public:
time_keeper_t() :
m_delay{ std::chrono::milliseconds(200) }, // update loop period
m_future{ std::async(std::launch::async,[this] {update_time_loop(); }) } // start update loop
{
// wait until asynchronous loop has started
std::unique_lock<std::mutex> lock{ m_mtx };
// wait until the asynchronous loop has started.
// this can take a bit of time since OS needs to schedule a thread for that
m_cv.wait(lock, [this] {return m_started; });
}
~time_keeper_t()
{
// threadsafe stopping of the mainloop
// to avoid problems that the thread is still running but the object
// with members is deleted.
{
std::unique_lock<std::mutex> lock{ m_mtx };
m_stop = true;
m_cv.notify_all(); // this will wakeup the loop and stop
}
// future.get will wait until the loop also has finished
// this ensures no member variables will be accessed
// by the loop thread and it is safe to fully destroy this instance
m_future.get();
}
// inline to avoid extra calls
inline int hours() const
{
return m_hours;
}
// inline to avoid extra calls
inline int seconds() const
{
return m_seconds;
}
private:
void update_time()
{
m_now = std::chrono::steady_clock::now();
std::chrono::steady_clock::duration tp = m_now.time_since_epoch();
// calculate back till start of day
days_t days = duration_cast<days_t>(tp);
tp -= days;
// calculate hours since start of day
auto hours = std::chrono::duration_cast<std::chrono::hours>(tp);
tp -= hours;
m_hours = hours.count();
// seconds since start of last hour
auto seconds = std::chrono::duration_cast<std::chrono::seconds>(tp);
m_seconds = seconds.count() % 60;
}
void update_time_loop()
{
std::unique_lock<std::mutex> lock{ m_mtx };
update_time();
// loop has started and has initialized all things time with values
m_started = true;
m_cv.notify_all();
// stop condition for the main loop, put in a predicate lambda
auto stop_condition = [this]()
{
return m_stop;
};
while (!m_stop)
{
// wait until m_cv is signaled or m_delay timed out
// a condition variable allows instant response and thus
// is better then just having a sleep here.
// (imagine a delay of seconds, that would also mean stopping could
// take seconds, this is faster)
m_cv.wait_for(lock, m_delay, stop_condition);
if (!m_stop) update_time();
}
}
std::atomic<int> m_hours;
std::atomic<int> m_seconds;
std::mutex m_mtx;
std::condition_variable m_cv;
bool m_started{ false };
bool m_stop{ false };
std::chrono::steady_clock::time_point m_now;
std::chrono::steady_clock::duration m_delay;
std::future<void> m_future;
};
int main()
{
time_keeper_t time_keeper;
// the mainloop now just can ask the time_keeper for seconds
// or in your case hours. The only time needed is the time
// to return an int (atomic) instead of having to make a full
// api call to get the time.
for (std::size_t n = 0; n < 30; ++n)
{
std::cout << "seconds now = " << time_keeper.seconds() << "\n";
std::this_thread::sleep_for(std::chrono::milliseconds(100));
}
return 0;
}
You don't need to query local time for every request because hour doesn't change every 200ms. Just update the local hour variable every hour
The most correct solution would be registering to a timer event like scheduled task on Windows or cronjobs on Linux that runs at the start of every hour. Alternatively create a timer that runs every hour and update the variable
The timer creation depends on the platform, for example on Windows use SetTimer, on Linux use timer_create. Here's a very simple solution using boost::asio which assumes that you run on the exact hour. You'll need to make some modification to allow it to run at any time, for example by creating a one-shot timer or by sleeping until the next hour
#include <chrono>
using namespace std::chrono_literals;
int32_t get_local_hour()
{
time_t t = std::time(nullptr);
if (t == -1) { return -1; }
struct tm *time_info_ptr = localtime(&t);
return (nullptr != time_info_ptr) ? time_info_ptr->tm_hour : -1;
}
static int32_t local_hour = get_local_hour();
bool running = true;
// Timer callback body, called every hour
void update_local_hour(const boost::system::error_code& /*e*/,
boost::asio::deadline_timer* t)
{
while (running)
{
t->expires_at(t->expires_at() + boost::posix_time::hour(1));
t->async_wait(boost::bind(print,
boost::asio::placeholders::error, t, count));
local_hour = get_local_hour();
}
}
int main()
{
boost::asio::io_service io;
// Timer that runs every hour and update the local_hour variable
boost::asio::deadline_timer t(io, boost::posix_time::hour(1));
t.async_wait(boost::bind(update_local_hour,
boost::asio::placeholders::error, &t));
running = true;
io.run();
std::this_thread::sleep_for(3h);
running = false; // stop the timer
}
Now just use local_hour directly instead of GetLocalHour()
Related
I'm working on timer queue using concurrent_priority_queue right now..
I implemented basic logic of executing most urgent event in this queue.
Here's my code.
TimerEvent ev{};
while (timer.mLoop)
{
while (timer.mQueue.empty() == false)
{
if (timer.mQueue.try_pop(ev) == false)
continue;
if (ev.Type == EVENT_TYPE::PHYSICS) // Physics event is around 15 ~ 17ms
{
auto now = Clock::now();
std::this_thread::sleep_for(ev.StartTime - now);
timer.mGameServerPtr->PostPhysicsOperation(ev.WorldID);
}
else if (ev.Type == EVENT_TYPE::INVINCIBLE) // This event is 3sec long.
{
auto now = Clock::now();
std::this_thread::sleep_for(ev.StartTime - now); // This is wrong!!
timer.mGameServerPtr->ReleaseInvincibleMode(ev.WorldID);
}
}
std::this_thread::sleep_for(10ms);
}
The problem would be easily solved if there is like front/top method in concurrent_priority_queue.
But there is no such method in class because it isn't thread-safe.
So, I just popped event out of the queue and waited until start time of the event.
In this way, I shouldn't have to insert event into queue again.
But problem is that if I have another type of event like EVENT_TYPE::INVINCIBLE, then I shouldn't just use sleep_for because this event is almost 3 second long. While waiting for 3 second, the PHYSICS event will not executed in time.
I can use sleep_for method for PHYSIC event since it is most shortest one to wait.
But I have to re-insert INVINCIBLE event into queue.
How can I optimize this timer without re-insert event into queue again?
How can I optimize this timer without re-insert event into queue again?
By the looks of it, that'll be hard when using the implementation of concurrent_priority_queue you are currently using. It wouldn't be hard if you just used the standard std::priority_queue and added some locking where needed though.
Example:
#include <atomic>
#include <chrono>
#include <condition_variable>
#include <functional>
#include <iostream>
#include <mutex>
#include <queue>
using Clock = std::chrono::steady_clock;
using time_point = std::chrono::time_point<Clock>;
struct TimerEvent {
void operator()() { m_event(); }
bool operator<(const TimerEvent& rhs) const {
return rhs.StartTime < StartTime;
}
time_point StartTime;
std::function<void()> m_event; // what to execute when the timer is due
};
class TimerQueue {
public:
~TimerQueue() { shutdown(); }
void shutdown() {
m_shutdown = true;
m_cv.notify_all();
}
// add a new TimerEvent to the queue
template<class... Args>
void emplace(Args&&... args) {
std::scoped_lock lock(m_mutex);
m_queue.emplace(TimerEvent{std::forward<Args>(args)...});
m_cv.notify_all();
}
// Wait until it's time to fire the event that is first in the queue
// which may change while we are waiting, but that'll work too.
bool wait_pop(TimerEvent& ev) {
std::unique_lock lock(m_mutex);
while(!m_shutdown &&
(m_queue.empty() || Clock::now() < m_queue.top().StartTime))
{
if(m_queue.empty()) { // wait "forever"
m_cv.wait(lock);
} else { // wait until first StartTime
auto st = m_queue.top().StartTime;
m_cv.wait_until(lock, st);
}
}
if(m_shutdown) return false; // time to quit
ev = std::move(m_queue.top()); // extract event
m_queue.pop();
return true;
}
private:
std::priority_queue<TimerEvent> m_queue;
mutable std::mutex m_mutex;
std::condition_variable m_cv;
std::atomic<bool> m_shutdown{};
};
If an event that is due before the event we're currently waiting for in wait_pop comes in, the m_cv.wait/m_cv.wait_until will unblock (because of the m_cv.notify_all() in emplace()) and that new element will be the first in queue.
The event loop could simply be:
void event_loop(TimerQueue& tq) {
TimerEvent te;
while(tq.wait_pop(te)) {
te(); // execute event
}
// the queue was shutdown, exit thread
}
And you could put any kind of invocable with the time point when you'd like it to fire in that queue.
#include <thread>
int main() {
TimerQueue tq;
// create a thread to run the event loop
auto ev_th = std::thread(event_loop, std::ref(tq));
// wait a second
std::this_thread::sleep_for(std::chrono::seconds(1));
// add an event in 5 seconds
tq.emplace(Clock::now() + std::chrono::seconds(5), [] {
std::cout << "second\n";
});
// wait a second
std::this_thread::sleep_for(std::chrono::seconds(1));
// add an event in 2 seconds
tq.emplace(Clock::now() + std::chrono::seconds(2), [] {
std::cout << "first\n";
});
// sleep some time
std::this_thread::sleep_for(std::chrono::seconds(3));
// shutdown, only the event printing "first" will have fired
tq.shutdown();
ev_th.join();
}
Demo with logging
So in the compilable code below, I'm sending a Query message to be handled by another thread and I want to wait for a response or timeout if it hits a certain timeout. I don't know why the wait_until is missing the signal and hitting the timeout period when it should not be doing that. It only happens if the handler is returning a response REALLY fast. How do you propose I fix the code below?
#include <mutex>
#include <memory>
#include <condition_variable>
#include <atomic>
#include <thread>
#include <iostream>
#include <queue>
#include <zconf.h>
class Question
{
};
class Answer
{
public:
bool isAnswered = false;
};
class Query
{
std::condition_variable _cv;
std::mutex _mutex;
std::atomic_bool _questionAnswered;
std::atomic_bool _questionSet;
std::shared_ptr<Question> _question;
std::shared_ptr<Answer> _answer;
public:
void setQuestion(std::shared_ptr<Question> & question)
{
if(!_questionSet)
{
_question = question;
_questionSet = true;
}
};
void setAnswer(std::shared_ptr<Answer> answer)
{
std::unique_lock<std::mutex> lock(_mutex);
if(!_questionAnswered)
{
// Set the answer and notify the getAnswerWithTimeout() to unlock if holding
_answer = answer;
_questionAnswered = true;
lock.unlock();
_cv.notify_all();
}
};
std::shared_ptr<Answer> getAnswerWithTimeout(uint64_t micros)
{
std::unique_lock<std::mutex> lock(_mutex);
if(!_questionAnswered)
{
auto now = std::chrono::system_clock::now();
// When timeout occurs, lock down this class, set the answer as null, and set error to timeout
if (!_cv.wait_until(lock, now + std::chrono::microseconds(micros), [&]() { return (bool)_questionAnswered; }) )
{
_answer = nullptr;
_questionAnswered = true;
}
}
return _answer;
};
};
void function_to_run(std::shared_ptr<Query> query)
{
// Respond to query and set the answer
auto answer = std::make_shared<Answer>();
answer->isAnswered = true;
// Set the response answer
query->setAnswer(answer);
}
std::queue<std::shared_ptr<Query>> queryHandler;
bool keepRunning = true;
std::mutex queryHandlerMutex;
std::condition_variable queryHandlerCv;
void handleQueryHandler()
{
while (true)
{
std::shared_ptr<Query> query;
{
std::unique_lock<std::mutex> lock(queryHandlerMutex);
queryHandlerCv.wait(lock, [&] { return !keepRunning || !queryHandler.empty(); });
if (!keepRunning) {
return;
}
// Pop off item from queue
query = queryHandler.front();
queryHandler.pop();
}
// Process query with function
function_to_run(query);
}
}
void insertIntoQueryHandler(std::shared_ptr<Query> & query)
{
{
std::unique_lock<std::mutex> lock(queryHandlerMutex);
// Insert into Query Handler
queryHandler.emplace(query);
}
// Notify query handler to start if locked on empty
queryHandlerCv.notify_one();
}
std::shared_ptr<Answer>
ask(std::shared_ptr<Query> query, uint64_t timeoutMicros=0)
{
std::shared_ptr<Answer> answer = nullptr;
// Send Query to be handled by external thread
insertIntoQueryHandler(query);
// Hold for the answer to be returned with timeout period
answer = query->getAnswerWithTimeout(timeoutMicros);
return answer;
}
int main()
{
// Start Up Query Handler thread to handle Queries
std::thread queryHandlerThread(handleQueryHandler);
// Create queries in infinite loop and process
for(int i = 0; i < 1000000; i++)
{
auto question = std::make_shared<Question>();
auto query = std::make_shared<Query>();
query->setQuestion(question);
auto answer = ask(query, 1000);
if(!answer)
{
std::cout << "Query Timed out after 1000us" << std::endl;
}
}
// Stop the thread
{
std::unique_lock<std::mutex> lock(queryHandlerMutex);
keepRunning = false;
}
queryHandlerCv.notify_one();
queryHandlerThread.join();
return 0;
}
As discussed in the comments, the main issue here is the timeout period you're using (1ms), in this interval:
auto now = std::chrono::system_clock::now();
.... another thread may sneak in here ....
if (!_cv.wait_until(lock, now + std::chrono::microseconds(micros), [&]() { return (bool)_questionAnswered; }) )
{
another thread can sneak in and consume a timeslice (e.g. 10ms) and the wait_until would timeout immediately. Furthermore there are reports of unexpected behaviour with wait_until as described here:
std::condition_variable wait_until surprising behaviour
Increasing the timeout to something in the order of several timeslices will fix this. You can also adjust thread priorities.
Personally I advocate polling a condition variable with wait_for which is efficient and also bails in a timely fashion (as opposed to polling a flag and sleeping).
Time slices in non-RTOS systems tend to be in the order of 10ms, so I would not expect such short timeouts to work accurately and predictably in general-purpose systems. See this for an introduction to pre-emptive multitasking:
https://www.geeksforgeeks.org/time-slicing-in-cpu-scheduling/
as well as this:
http://dev.ti.com/tirex/explore/node?node=AL.iEm6ATaD6muScZufjlQ__pTTHBmu__LATEST
As jtbandes points out, it's worth using tools such as Clang's thread sanitiser to check for potential logic races: https://clang.llvm.org/docs/ThreadSanitizer.html
I'd like to create a very efficient task scheduler system in C++.
The basic idea is this:
class Task {
public:
virtual void run() = 0;
};
class Scheduler {
public:
void add(Task &task, double delayToRun);
};
Behind Scheduler, there should be a fixed-size thread pool, which run the tasks (I don't want to create a thread for each task). delayToRun means that the task doesn't get executed immediately, but delayToRun seconds later (measuring from the point it was added into the Scheduler).
(delayToRun means an "at-least" value, of course. If the system is loaded, or if we ask the impossible from the Scheduler, it won't be able to handle our request. But it should do the best it can)
And here's my problem. How to implement delayToRun functionality efficiently? I'm trying to solve this problem with the use of mutexes and condition variables.
I see two ways:
With manager thread
Scheduler contains two queues: allTasksQueue, and tasksReadyToRunQueue. A task gets added into allTasksQueue at Scheduler::add. There is a manager thread, which waits the smallest amount of time so it can put a task from allTasksQueue to tasksReadyToRunQueue. Worker threads wait for a task available in tasksReadyToRunQueue.
If Scheduler::add adds a task in front of allTasksQueue (a task, which has a value of delayToRun so it should go before the current soonest-to-run task), then the manager task need to be woken up, so it can update the time of wait.
This method can be considered inefficient, because it needs two queues, and it needs two condvar.signals to make a task run (one for allTasksQueue->tasksReadyToRunQueue, and one for signalling a worker thread to actually run the task)
Without manager thread
There is one queue in the scheduler. A task gets added into this queue at Scheduler::add. A worker thread checks the queue. If it is empty, it waits without a time constraint. If it is not empty, it waits for the soonest task.
If there is only one condition variable for which the working threads waiting for: this method can be considered inefficient, because if a task added in front of the queue (front means, if there are N worker threads, then the task index < N) then all the worker threads need to be woken up to update the time which they are waiting for.
If there is a separate condition variable for each thread, then we can control which thread to wake up, so in this case we don't need to wake up all threads (we only need to wake up the thread which has the largest waiting time, so we need to manage this value). I'm currently thinking about implementing this, but working out the exact details are complex. Are there any recommendations/thoughts/document on this method?
Is there any better solution for this problem? I'm trying to use standard C++ features, but I'm willing to use platform dependent (my main platform is linux) tools too (like pthreads), or even linux specific tools (like futexes), if they provide a better solution.
You can avoid both having a separate "manager" thread, and having to wake up a large number of tasks when the next-to-run task changes, by using a design where a single pool thread waits for the "next to run" task (if there is one) on one condition variable, and the remaining pool threads wait indefinitely on a second condition variable.
The pool threads would execute pseudocode along these lines:
pthread_mutex_lock(&queue_lock);
while (running)
{
if (head task is ready to run)
{
dequeue head task;
if (task_thread == 1)
pthread_cond_signal(&task_cv);
else
pthread_cond_signal(&queue_cv);
pthread_mutex_unlock(&queue_lock);
run dequeued task;
pthread_mutex_lock(&queue_lock);
}
else if (!queue_empty && task_thread == 0)
{
task_thread = 1;
pthread_cond_timedwait(&task_cv, &queue_lock, time head task is ready to run);
task_thread = 0;
}
else
{
pthread_cond_wait(&queue_cv, &queue_lock);
}
}
pthread_mutex_unlock(&queue_lock);
If you change the next task to run, then you execute:
if (task_thread == 1)
pthread_cond_signal(&task_cv);
else
pthread_cond_signal(&queue_cv);
with the queue_lock held.
Under this scheme, all wakeups are directly at only a single thread, there's only one priority queue of tasks, and there's no manager thread required.
Your specification is a bit too strong:
delayToRun means that the task doesn't get executed immediately, but delayToRun seconds later
You forgot to add "at least" :
The task don't get executed now, but at least delayToRun seconds later
The point is that if ten thousand tasks are all scheduled with a 0.1 delayToRun, they surely won't practically be able to run at the same time.
With such correction, you just maintain some queue (or agenda) of (scheduled-start-time, closure to run), you keep that queue sorted, and you start N (some fixed number) of threads which atomically pop the first element of the agenda and run it.
then all the worker threads need to be woken up to update the time which they are waiting for.
No, some worker threads would be woken up.
Read about condition variables and broadcast.
You might also user POSIX timers, see timer_create(2), or Linux specific fd timer, see timerfd_create(2)
You probably would avoid running blocking system calls in your threads, and have some central thread managing them using some event loop (see poll(2)...); otherwise, if you have a hundred tasks running sleep(100) and one task scheduled to run in half a second it won't run before a hundred seconds.
You may want to read about continuation-passing style programming (it -CPS- is highly relevant). Read the paper about Continuation Passing C by Juliusz Chroboczek.
Look also into Qt threads.
You could also consider coding in Go (with its Goroutines).
This is a sample implementation for the interface you provided that comes closest to your 'With manager thread' description.
It uses a single thread (timer_thread) to manage a queue (allTasksQueue) that is sorted based on the actual time when a task must be started (std::chrono::time_point).
The 'queue' is a std::priority_queue (which keeps its time_point key elements sorted).
timer_thread is normally suspended until the next task is started or when a new task is added.
When a task is about to be run, it is placed in tasksReadyToRunQueue, one of the worker threads is signaled, wakes up, removes it from the queue and starts processing the task..
Note that the thread pool has a compile-time upper limit for the number of threads (40). If you are scheduling more tasks than can be dispatched to workers,
new task will block until threads are available again.
You said this approach is not efficient, but overall, it seems reasonably efficient to me. It's all event driven and you are not wasting CPU cycles by unnecessary spinning.
Of course, it's just an example, optimizations are possible (note: std::multimap has been replaced with std::priority_queue).
The implementation is C++11 compliant
#include <iostream>
#include <chrono>
#include <queue>
#include <unistd.h>
#include <vector>
#include <thread>
#include <condition_variable>
#include <mutex>
#include <memory>
class Task {
public:
virtual void run() = 0;
virtual ~Task() { }
};
class Scheduler {
public:
Scheduler();
~Scheduler();
void add(Task &task, double delayToRun);
private:
using timepoint = std::chrono::time_point<std::chrono::steady_clock>;
struct key {
timepoint tp;
Task *taskp;
};
struct TScomp {
bool operator()(const key &a, const key &b) const
{
return a.tp > b.tp;
}
};
const int ThreadPoolSize = 40;
std::vector<std::thread> ThreadPool;
std::vector<Task *> tasksReadyToRunQueue;
std::priority_queue<key, std::vector<key>, TScomp> allTasksQueue;
std::thread TimerThr;
std::mutex TimerMtx, WorkerMtx;
std::condition_variable TimerCV, WorkerCV;
bool WorkerIsRunning = true;
bool TimerIsRunning = true;
void worker_thread();
void timer_thread();
};
Scheduler::Scheduler()
{
for (int i = 0; i <ThreadPoolSize; ++i)
ThreadPool.push_back(std::thread(&Scheduler::worker_thread, this));
TimerThr = std::thread(&Scheduler::timer_thread, this);
}
Scheduler::~Scheduler()
{
{
std::lock_guard<std::mutex> lck{TimerMtx};
TimerIsRunning = false;
TimerCV.notify_one();
}
TimerThr.join();
{
std::lock_guard<std::mutex> lck{WorkerMtx};
WorkerIsRunning = false;
WorkerCV.notify_all();
}
for (auto &t : ThreadPool)
t.join();
}
void Scheduler::add(Task &task, double delayToRun)
{
auto now = std::chrono::steady_clock::now();
long delay_ms = delayToRun * 1000;
std::chrono::milliseconds duration (delay_ms);
timepoint tp = now + duration;
if (now >= tp)
{
/*
* This is a short-cut
* When time is due, the task is directly dispatched to the workers
*/
std::lock_guard<std::mutex> lck{WorkerMtx};
tasksReadyToRunQueue.push_back(&task);
WorkerCV.notify_one();
} else
{
std::lock_guard<std::mutex> lck{TimerMtx};
allTasksQueue.push({tp, &task});
TimerCV.notify_one();
}
}
void Scheduler::worker_thread()
{
for (;;)
{
std::unique_lock<std::mutex> lck{WorkerMtx};
WorkerCV.wait(lck, [this] { return tasksReadyToRunQueue.size() != 0 ||
!WorkerIsRunning; } );
if (!WorkerIsRunning)
break;
Task *p = tasksReadyToRunQueue.back();
tasksReadyToRunQueue.pop_back();
lck.unlock();
p->run();
delete p; // delete Task
}
}
void Scheduler::timer_thread()
{
for (;;)
{
std::unique_lock<std::mutex> lck{TimerMtx};
if (!TimerIsRunning)
break;
auto duration = std::chrono::nanoseconds(1000000000);
if (allTasksQueue.size() != 0)
{
auto now = std::chrono::steady_clock::now();
auto head = allTasksQueue.top();
Task *p = head.taskp;
duration = head.tp - now;
if (now >= head.tp)
{
/*
* A Task is due, pass to worker threads
*/
std::unique_lock<std::mutex> ulck{WorkerMtx};
tasksReadyToRunQueue.push_back(p);
WorkerCV.notify_one();
ulck.unlock();
allTasksQueue.pop();
}
}
TimerCV.wait_for(lck, duration);
}
}
/*
* End sample implementation
*/
class DemoTask : public Task {
int n;
public:
DemoTask(int n=0) : n{n} { }
void run() override
{
std::cout << "Start task " << n << std::endl;;
std::this_thread::sleep_for(std::chrono::seconds(2));
std::cout << " Stop task " << n << std::endl;;
}
};
int main()
{
Scheduler sched;
Task *t0 = new DemoTask{0};
Task *t1 = new DemoTask{1};
Task *t2 = new DemoTask{2};
Task *t3 = new DemoTask{3};
Task *t4 = new DemoTask{4};
Task *t5 = new DemoTask{5};
sched.add(*t0, 7.313);
sched.add(*t1, 2.213);
sched.add(*t2, 0.713);
sched.add(*t3, 1.243);
sched.add(*t4, 0.913);
sched.add(*t5, 3.313);
std::this_thread::sleep_for(std::chrono::seconds(10));
}
It means that you want to run all tasks continuously using some order.
You can create some type of sorted by a delay stack (or even linked list) of tasks. When a new task is coming you should insert it in the position depending of a delay time (just efficiently calculate that position and efficiently insert the new task).
Run all tasks starting with the head of the task stack (or list).
Core code for C++11:
#include <thread>
#include <queue>
#include <chrono>
#include <mutex>
#include <atomic>
using namespace std::chrono;
using namespace std;
class Task {
public:
virtual void run() = 0;
};
template<typename T, typename = enable_if<std::is_base_of<Task, T>::value>>
class SchedulerItem {
public:
T task;
time_point<steady_clock> startTime;
int delay;
SchedulerItem(T t, time_point<steady_clock> s, int d) : task(t), startTime(s), delay(d){}
};
template<typename T, typename = enable_if<std::is_base_of<Task, T>::value>>
class Scheduler {
public:
queue<SchedulerItem<T>> pool;
mutex mtx;
atomic<bool> running;
Scheduler() : running(false){}
void add(T task, double delayMsToRun) {
lock_guard<mutex> lock(mtx);
pool.push(SchedulerItem<T>(task, high_resolution_clock::now(), delayMsToRun));
if (running == false) runNext();
}
void runNext(void) {
running = true;
auto th = [this]() {
mtx.lock();
auto item = pool.front();
pool.pop();
mtx.unlock();
auto remaining = (item.startTime + milliseconds(item.delay)) - high_resolution_clock::now();
if(remaining.count() > 0) this_thread::sleep_for(remaining);
item.task.run();
if(pool.size() > 0)
runNext();
else
running = false;
};
thread t(th);
t.detach();
}
};
Test code:
class MyTask : Task {
public:
virtual void run() override {
printf("mytask \n");
};
};
int main()
{
Scheduler<MyTask> s;
s.add(MyTask(), 0);
s.add(MyTask(), 2000);
s.add(MyTask(), 2500);
s.add(MyTask(), 6000);
std::this_thread::sleep_for(std::chrono::seconds(10));
}
First of all I did look at the other topics on this website and found they don't relate to my problem as those mostly deal with people using I/O operations or thread creation overheads. My problem is that my threadpool or worker-task structure implementation is (in this case) a lot slower than single threading. I'm really confused by this and not sure if it's the ThreadPool, the task itself, how I test it, the nature of threads or something out of my control.
// Sorry for the long code
#include <vector>
#include <queue>
#include <thread>
#include <mutex>
#include <future>
#include "task.hpp"
class ThreadPool
{
public:
ThreadPool()
{
for (unsigned i = 0; i < std::thread::hardware_concurrency() - 1; i++)
m_workers.emplace_back(this, i);
m_running = true;
for (auto&& worker : m_workers)
worker.start();
}
~ThreadPool()
{
m_running = false;
m_task_signal.notify_all();
for (auto&& worker : m_workers)
worker.terminate();
}
void add_task(Task* task)
{
{
std::unique_lock<std::mutex> lock(m_in_mutex);
m_in.push(task);
}
m_task_signal.notify_one();
}
private:
class Worker
{
public:
Worker(ThreadPool* parent, unsigned id) : m_parent(parent), m_id(id)
{}
~Worker()
{
terminate();
}
void start()
{
m_thread = new std::thread(&Worker::work, this);
}
void terminate()
{
if (m_thread)
{
if (m_thread->joinable())
{
m_thread->join();
delete m_thread;
m_thread = nullptr;
m_parent = nullptr;
}
}
}
private:
void work()
{
while (m_parent->m_running)
{
std::unique_lock<std::mutex> lock(m_parent->m_in_mutex);
m_parent->m_task_signal.wait(lock, [&]()
{
return !m_parent->m_in.empty() || !m_parent->m_running;
});
if (!m_parent->m_running) break;
Task* task = m_parent->m_in.front();
m_parent->m_in.pop();
// Fixed the mutex being locked while the task is executed
lock.unlock();
task->execute();
}
}
private:
ThreadPool* m_parent = nullptr;
unsigned m_id = 0;
std::thread* m_thread = nullptr;
};
private:
std::vector<Worker> m_workers;
std::mutex m_in_mutex;
std::condition_variable m_task_signal;
std::queue<Task*> m_in;
bool m_running = false;
};
class TestTask : public Task
{
public:
TestTask() {}
TestTask(unsigned number) : m_number(number) {}
inline void Set(unsigned number) { m_number = number; }
void execute() override
{
if (m_number <= 3)
{
m_is_prime = m_number > 1;
return;
}
else if (m_number % 2 == 0 || m_number % 3 == 0)
{
m_is_prime = false;
return;
}
else
{
for (unsigned i = 5; i * i <= m_number; i += 6)
{
if (m_number % i == 0 || m_number % (i + 2) == 0)
{
m_is_prime = false;
return;
}
}
m_is_prime = true;
return;
}
}
public:
unsigned m_number = 0;
bool m_is_prime = false;
};
int main()
{
ThreadPool pool;
unsigned num_tasks = 1000000;
std::vector<TestTask> tasks(num_tasks);
for (auto&& task : tasks)
task.Set(randint(0, 1000000000));
auto s = std::chrono::high_resolution_clock::now();
#if MT
for (auto&& task : tasks)
pool.add_task(&task);
#else
for (auto&& task : tasks)
task.execute();
#endif
auto e = std::chrono::high_resolution_clock::now();
double seconds = std::chrono::duration_cast<std::chrono::nanoseconds>(e - s).count() / 1000000000.0;
}
Benchmarks with VS2013 Profiler:
10,000,000 tasks:
MT:
13 seconds of wall clock time
93.36% is spent in msvcp120.dll
3.45% is spent in Task::execute() // Not good here
ST:
0.5 seconds of wall clock time
97.31% is spent with Task::execute()
Usual disclaimer in such answers: the only way to tell for sure is to measure it with a profiler tool.
But I will try to explain your results without it. First of all, you have one mutex across all your threads. So only one thread at a time can execute some task. It kills all your gains you might have. In spite of your threads your code is perfectly serial. So at the very least make your task execution out of the mutex. You need to lock the mutex only to get a task out of the queue — you don't need to hold it when the task gets executed.
Next, your tasks are so simple that single thread will execute them in no time. You just can't measure any gains with such tasks. Create some heavy tasks which could produce some more interesting results(some tasks which are closer to the real world, not such contrived).
And the 3rd point: threads are not without their cost — context switching, mutex contention etc. To have real gains, as the previous 2 points say, you need to have tasks which take more time than the overheads threads introduce and the code should be truly parallel instead of waiting on some resource making it serial.
UPD: I looked at the wrong part of the code. The task is complex enough provided you create tasks with sufficiently large numbers.
UPD2: I've played with your code and found a good prime number to show how the MT code is better. Use the following prime number: 1019048297. It will give enough computation complexity to show the difference.
But why your code doesn't produce good results? It is hard to tell without seeing the implementation of randint() but I take it is pretty simple and in a half of the cases it returns even numbers and other cases produce not much of big prime numbers either. So the tasks are so simple that context switching and other things around your particular implementation and threads in general consume more time than the computation itself. Using the prime number I gave you give the tasks no choice but spend time computing — no easy answer since the number is big and actually prime. That's why the big number will give you the answer you seek — better time for the MT code.
You should not hold the mutex while the task is getting executed, otherwise other threads will not be able to get a task:
void work() {
while (m_parent->m_running) {
Task* currentTask = nullptr;
std::unique_lock<std::mutex> lock(m_parent->m_in_mutex);
m_parent->m_task_signal.wait(lock, [&]() {
return !m_parent->m_in.empty() || !m_parent->m_running;
});
if (!m_parent->m_running) continue;
currentTask = m_parent->m_in.front();
m_parent->m_in.pop();
lock.unlock(); //<- Release the lock so that other threads can get tasks
currentTask->execute();
currentTask = nullptr;
}
}
For MT, how much time is spent in each phase of the "overhead": std::unique_lock, m_task_signal.wait, front, pop, unlock?
Based on your results of only 3% useful work, this means the above consumes 97%. I'd get numbers for each part of the above (e.g. add timestamps between each call).
It seems to me, that the code you use to [merely] dequeue the next task pointer is quite heavy. I'd do a much simpler queue [possibly lockless] mechanism. Or, perhaps, use atomics to bump an index into the queue instead of the five step process above. For example:
void
work()
{
while (m_parent->m_running) {
// NOTE: this is just an example, not necessarily the real function
int curindex = atomic_increment(&global_index);
if (curindex >= max_index)
break;
Task *task = m_parent->m_in[curindex];
task->execute();
}
}
Also, maybe you should pop [say] ten at a time instead of just one.
You might also be memory bound and/or "task switch" bound. (e.g.) For threads that access an array, more than four threads usually saturates the memory bus. You could also have heavy contention for the lock, such that the threads get starved because one thread is monopolizing the lock [indirectly, even with the new unlock call]
Interthread locking usually involves a "serialization" operation where other cores must synchronize their out-of-order execution pipelines.
Here's a "lockless" implementation:
void
work()
{
// assume m_id is 0,1,2,...
int curindex = m_id;
while (m_parent->m_running) {
if (curindex >= max_index)
break;
Task *task = m_parent->m_in[curindex];
task->execute();
curindex += NUMBER_OF_WORKERS;
}
}
I'm building a simulator to test student code for a very simple robot. I need to run two functions(to update robot sensors and robot position) on separate threads at regular time intervals. My current implementation is highly processor inefficient because it has a thread dedicated to simply incrementing numbers to keep track of the position in the code. My recent theory is that I may be able to use sleep to give the time delay between updating value of the sensor and robot position. My first question is: is this efficient? Second: Is there any way to do a simple thing but measure clock cycles instead of seconds?
Putting a thread to sleep by waiting on a mutex-like object is generally efficient. A common pattern involves waiting on a mutex with a timeout. When the timeout is reached, the interval is up. When the mutex is releaed, it is the signal for the thread to terminate.
Pseudocode:
void threadMethod() {
for(;;) {
bool signalled = this->mutex.wait(1000);
if(signalled) {
break; // Signalled, owners wants us to terminate
}
// Timeout, meaning our wait time is up
doPeriodicAction();
}
}
void start() {
this->mutex.enter();
this->thread.start(threadMethod);
}
void stop() {
this->mutex.leave();
this->thread.join();
}
On Windows systems, timeouts are generally specified in milliseconds and are accurate to roughly within 16 milliseconds (timeBeginPeriod() may be able to improve this). I do not know of a CPU cycle-triggered synchronization primitive. There are lightweight mutexes called "critical sections" that spin the CPU for a few thousand cycles before delegating to the OS thread scheduler. Within this time they are fairly accurate.
On Linux systems the accuracy may be a bit higher (high frequency timer or tickless kernel) and in addition to mutexes, there are "futexes" (fast mutex) which are similar to Windows' critical sections.
I'm not sure I grasped what you're trying to achieve, but if you want to test student code, you might want to use a virtual clock and control the passing of time yourself. For example by calling a processInputs() and a decideMovements() method that the students have to provide. After each call, 1 time slot is up.
This C++11 code uses std::chrono::high_resolution_clock to measure subsecond timing, and std::thread to run three threads. The std::this_thread::sleep_for() function is used to sleep for a specified time.
#include <iostream>
#include <thread>
#include <vector>
#include <chrono>
void seconds()
{
using namespace std::chrono;
high_resolution_clock::time_point t1, t2;
for (unsigned i=0; i<10; ++i) {
std::cout << i << "\n";
t1 = high_resolution_clock::now();
std::this_thread::sleep_for(std::chrono::seconds(1));
t2 = high_resolution_clock::now();
duration<double> elapsed = duration_cast<duration<double> >(t2-t1);
std::cout << "\t( " << elapsed.count() << " seconds )\n";
}
}
int main()
{
std::vector<std::thread> t;
t.push_back(std::thread{[](){
std::this_thread::sleep_for(std::chrono::seconds(3));
std::cout << "awoke after 3\n"; }});
t.push_back(std::thread{[](){
std::this_thread::sleep_for(std::chrono::seconds(7));
std::cout << "awoke after 7\n"; }});
t.push_back(std::thread{seconds});
for (auto &thr : t)
thr.join();
}
It's hard to know whether this meets your needs because there are a lot of details missing from the question. Under Linux, compile with:
g++ -Wall -Wextra -pedantic -std=c++11 timers.cpp -o timers -lpthread
Output on my machine:
0
( 1.00014 seconds)
1
( 1.00014 seconds)
2
awoke after 3
( 1.00009 seconds)
3
( 1.00015 seconds)
4
( 1.00011 seconds)
5
( 1.00013 seconds)
6
awoke after 7
( 1.0001 seconds)
7
( 1.00015 seconds)
8
( 1.00014 seconds)
9
( 1.00013 seconds)
Other C++11 standard features that may be of interest include timed_mutex and promise/future.
Yes your theory is correct. You can use sleep to put some delay between execution of a function by thread. Efficiency depends on how wide you can choose that delay to get desired result. You have to explain details of your implementation. For e.g we don't know whether two threads are dependent ( in that case you have to take care of synchronization which would blow up some cycles ).
Here's the one way to do it. I'm using C++11, thread, atomics and high precision clock. The scheduler will callback a function that takes dt seconds which is time elapsed since last call. The loop can be stopped by calling stop() method of if callback function returns false.
Scheduler code
#include <thread>
#include <chrono>
#include <functional>
#include <atomic>
#include <system_error>
class ScheduledExecutor {
public:
ScheduledExecutor()
{}
ScheduledExecutor(const std::function<bool(double)>& callback, double period)
{
initialize(callback, period);
}
void initialize(const std::function<bool(double)>& callback, double period)
{
callback_ = callback;
period_ = period;
keep_running_ = false;
}
void start()
{
keep_running_ = true;
sleep_time_sum_ = 0;
period_count_ = 0;
th_ = std::thread(&ScheduledExecutor::executorLoop, this);
}
void stop()
{
keep_running_ = false;
try {
th_.join();
}
catch(const std::system_error& /* e */)
{ }
}
double getSleepTimeAvg()
{
//TODO: make this function thread safe by using atomic types
//right now this is not implemented for performance and that
//return of this function is purely informational/debugging purposes
return sleep_time_sum_ / period_count_;
}
unsigned long getPeriodCount()
{
return period_count_;
}
private:
typedef std::chrono::high_resolution_clock clock;
template <typename T>
using duration = std::chrono::duration<T>;
void executorLoop()
{
clock::time_point call_end = clock::now();
while (keep_running_) {
clock::time_point call_start = clock::now();
duration<double> since_last_call = call_start - call_end;
if (period_count_ > 0 && !callback_(since_last_call.count()))
break;
call_end = clock::now();
duration<double> call_duration = call_end - call_start;
double sleep_for = period_ - call_duration.count();
sleep_time_sum_ += sleep_for;
++period_count_;
if (sleep_for > MinSleepTime)
std::this_thread::sleep_for(std::chrono::duration<double>(sleep_for));
}
}
private:
double period_;
std::thread th_;
std::function<bool(double)> callback_;
std::atomic_bool keep_running_;
static constexpr double MinSleepTime = 1E-9;
double sleep_time_sum_;
unsigned long period_count_;
};
Example usage
bool worldUpdator(World& w, double dt)
{
w.update(dt);
return true;
}
void main() {
//create world for your simulator
World w(...);
//start scheduler loop for every 2ms calls
ScheduledExecutor exec;
exec.initialize(
std::bind(worldUpdator, std::ref(w), std::placeholders::_1),
2E-3);
exec.start();
//main thread just checks on the results every now and then
while (true) {
if (exec.getPeriodCount() % 10000 == 0) {
std::cout << exec.getSleepTimeAvg() << std::endl;
}
}
}
There are also other, related questions on SO.