I am trying to implement thread pool in C++ using pthread. I want to encapsulate logic related to threads management in one object which is taking ownership of these threads. That means whenever this object is destroyed, threads must be stopped and cleaned up.
I've been testing my code and it turns out that I get segmentation fault when I destroy WorkerThreadManager object while there is boost::function called. See the code and backtrace from GDB. I don't really understand why it happens, as far as I know boost::function is copyable, so once I get a copy of it from the queue, I can pop() it and even destroy whole queue (I prooved that in some small test) and then call the function's copy.
WorkerThreadManager.h:
#include "WorkerThreadManagerInterface.h"
#include "utils/mutex.h"
#include <queue>
#include <semaphore.h>
#include <iostream>
class WorkerThreadManager : public WorkerThreadManagerInterface
{
public:
WorkerThreadManager(unsigned threadsNumber = 5);
virtual ~WorkerThreadManager();
virtual void PushTask(thread_function_t A_threadFun, result_function_t A_resultFun);
void SignalResults();
private:
static void* WorkerThread(void* A_data);
void PushResult(int A_result, result_function_t A_resultFun);
typedef boost::function<void ()> signal_function_t;
struct worker_thread_data_t
{
worker_thread_data_t(thread_function_t A_threadFun, result_function_t A_resultFun) :
threadFun(A_threadFun), resultFun(A_resultFun) {}
worker_thread_data_t() {}
thread_function_t threadFun;
result_function_t resultFun;
};
const unsigned m_threadsNumber;
pthread_t* m_pthreads;
utils::Mutex m_tasksMutex;
sem_t m_tasksSem;
std::queue<worker_thread_data_t> m_tasks;
utils::Mutex m_resultsMutex;
std::queue<signal_function_t> m_results;
};
WorkerThreadManager.cpp:
#include "WorkerThreadManager.h"
#include "gateway_log.h"
#include <pthread.h>
/**
* #brief Creates semaphore and starts threads.
*/
WorkerThreadManager::WorkerThreadManager(unsigned threadsNumber) : m_threadsNumber(threadsNumber)
{
if ( sem_init(&m_tasksSem, 0, 0) )
{
std::stringstream ss;
ss << "Semaphore could not be initialized: " << errno << " - " << strerror(errno);
LOG_FATAL(ss);
throw std::runtime_error(ss.str());
}
m_pthreads = new pthread_t[m_threadsNumber];
for (unsigned i = 0; i < m_threadsNumber; ++i)
{
int rc = pthread_create(&m_pthreads[i], NULL, WorkerThreadManager::WorkerThread, (void*) this );
if(rc)
{
std::stringstream ss;
ss << "Pthread could not be started: " << errno << " - " << strerror(errno);
LOG_FATAL(ss.str());
if ( sem_destroy(&m_tasksSem) )
LOG_ERROR("Semaphore could not be destroyed: " << errno << " - " << strerror(errno));
delete [] m_pthreads;
throw std::runtime_error(ss.str());
}
else
{
LOG_DEBUG("Worker thread started " << m_pthreads[i]);
if(pthread_detach(m_pthreads[i]))
LOG_WARN("Failed to detach worker thread");
}
}
}
/**
* #brief Cancels all threads, destroys semaphore
*/
WorkerThreadManager::~WorkerThreadManager()
{
LOG_DEBUG("~WorkerThreadManager()");
for(unsigned i = 0; i < m_threadsNumber; ++i)
{
if ( pthread_cancel(m_pthreads[i]) )
LOG_ERROR("Worker thread cancellation failed");
}
if ( sem_destroy(&m_tasksSem) )
LOG_ERROR("Semaphore could not be destroyed: " << errno << " - " << strerror(errno));
delete [] m_pthreads;
}
/**
* #brief Adds new task to queue, so worker threads can
* #param A_threadFun function which will be executed by thread
* #param A_resultFun function which will be enqueued for calling with return value of A_threadFun as parameter
* after worker thread executes A_threadFun.
*/
void WorkerThreadManager::PushTask(thread_function_t A_threadFun, result_function_t A_resultFun)
{
utils::ScopedLock mutex(m_tasksMutex);
worker_thread_data_t data(A_threadFun, A_resultFun);
m_tasks.push( data );
sem_post(&m_tasksSem);
LOG_DEBUG("Task for worker threads has been added to queue");
}
/**
* #brief Executes result functions (if there are any) to give feedback
* to classes which requested task execution in worker thread.
*/
void WorkerThreadManager::SignalResults()
{
while(true)
{
signal_function_t signal;
{
utils::ScopedLock mutex(m_resultsMutex);
if(m_results.size())
{
signal = m_results.front();
m_results.pop();
}
else
return;
}
signal();
}
}
/**
* #brief Enqueues result of function executed in worker thread.
* #param A_result return value of function executed in worker thread
* #param A_resultFun function which will be enqueued for calling with A_result as a parameter.
*/
void WorkerThreadManager::PushResult(int A_result, result_function_t A_resultFun)
{
utils::ScopedLock mutex(m_resultsMutex);
signal_function_t signal = boost::bind(A_resultFun, A_result);
m_results.push( signal );
}
/**
* #brief worker thread body
* #param A_data pointer to WorkerThreadManager instance
*/
void* WorkerThreadManager::WorkerThread(void* A_data)
{
WorkerThreadManager* manager = reinterpret_cast<WorkerThreadManager*>(A_data);
LOG_DEBUG("Starting worker thread loop");
while (1)
{
if ( -1 == sem_wait(&manager->m_tasksSem) && errno == EINTR )
{
LOG_DEBUG("sem_wait interrupted with signal");
continue;
}
LOG_DEBUG("WorkerThread:::::: about to call lock mutex");
worker_thread_data_t data;
{
utils::ScopedLock mutex(manager->m_tasksMutex);
data = manager->m_tasks.front();
manager->m_results.pop();
}
LOG_DEBUG("WorkerThread:::::: about to call resultFun");
int result = data.threadFun();
LOG_DEBUG("WorkerThread:::::: after call resultFun");
pthread_testcancel();
manager->PushResult(result, data.resultFun);
}
return NULL;
}
main.cpp:
#include "gateway_log.h"
#include "WorkerThreadManager.h"
#include <memory>
class A {
public:
int Fun() { LOG_DEBUG("Fun before sleep"); sleep(8); LOG_DEBUG("Fun after sleep");return 0; }
void Result(int a) { LOG_DEBUG("Result: " << a); }
};
int main()
{
sd::auto_ptr<WorkerThreadManager> workerThreadManager = new WorkerThreadManager;
A a;
workerThreadManager->PushTask(boost::bind(&A::Fun, &a), boost::bind(&A::Result, &a, _1));
sleep(3);
LOG_DEBUG("deleting workerThreadManager");
workerThreadManager.reset(); // <<<--- CRASH
LOG_DEBUG("deleted workerThreadManager");
sleep(10);
LOG_DEBUG("after sleep");
return 0;
}
GDB:
(gdb) bt
#0 0xb7ad33a0 in ?? () from /lib/i386-linux-gnu/libc.so.6
#1 0x0807d3a7 in boost::function0<void>::clear (this=0x858db48) at /home/marcin/intel_build/boost_1_42_0/boost/function/function_template.hpp:856
#2 0x0807d17b in boost::function0<void>::~function0 (this=0x858db48, __in_chrg=<optimized out>) at /home/marcin/intel_build/boost_1_42_0/boost/function/function_template.hpp:752
#3 0x0807cec5 in boost::function<void()>::~function(void) (this=0x858db48, __in_chrg=<optimized out>) at /home/marcin/intel_build/boost_1_42_0/boost/function/function_template.hpp:1043
#4 0x0807ced8 in std::_Destroy<boost::function<void ()> >(boost::function<void ()>*) (__pointer=0x858db48) at /usr/include/c++/4.6/bits/stl_construct.h:94
#5 0x0807c868 in std::_Destroy_aux<false>::__destroy<boost::function<void ()>*>(boost::function<void ()>*, boost::function<void ()>*) (__first=0x858db48, __last=0x858d928) at /usr/include/c++/4.6/bits/stl_construct.h:104
#6 0x0807bd05 in std::_Destroy<boost::function<void ()>*>(boost::function<void ()>*, boost::function<void ()>*) (__first=0x858d938, __last=0x858d928) at /usr/include/c++/4.6/bits/stl_construct.h:127
#7 0x0807af23 in std::_Destroy<boost::function<void ()>*, boost::function<void ()> >(boost::function<void ()>*, boost::function<void ()>*, std::allocator<boost::function<void ()> >&) (__first=0x858d938, __last=0x858d928)
at /usr/include/c++/4.6/bits/stl_construct.h:153
#8 0x0807a037 in std::deque<boost::function<void ()>, std::allocator<boost::function<void ()> > >::_M_destroy_data_aux(std::_Deque_iterator<boost::function<void ()>, boost::function<void ()>&, boost::function<void ()>*>, std::_Deque_iterator<boost::function<void ()>, boost::function<void ()>&, boost::function<void ()>*>) (this=0x858beec, __first=..., __last=...) at /usr/include/c++/4.6/bits/deque.tcc:795
#9 0x08076153 in std::deque<boost::function<void ()>, std::allocator<boost::function<void ()> > >::_M_destroy_data(std::_Deque_iterator<boost::function<void ()>, boost::function<void ()>&, boost::function<void ()>*>, std::_Deque_iterator<boost::function<void ()>, boost::function<void ()>&, boost::function<void ()>*>, std::allocator<boost::function<void ()> > const&) (this=0x858beec, __first=..., __last=...) at /usr/include/c++/4.6/bits/stl_deque.h:1816
#10 0x08073411 in std::deque<boost::function<void()>, std::allocator<boost::function<void()> > >::~deque(void) (this=0x858beec, __in_chrg=<optimized out>) at /usr/include/c++/4.6/bits/stl_deque.h:898
#11 0x0806a355 in std::queue<boost::function<void()>, std::deque<boost::function<void()>, std::allocator<boost::function<void()> > > >::~queue(void) (this=0x858beec, __in_chrg=<optimized out>)
at /usr/include/c++/4.6/bits/stl_queue.h:92
#12 0x0815a054 in WorkerThreadManager::~WorkerThreadManager (this=0x858be98, __in_chrg=<optimized out>) at WorkerThreadManager.cpp:42
#13 0x0815a1e3 in WorkerThreadManager::~WorkerThreadManager (this=0x858be98, __in_chrg=<optimized out>) at WorkerThreadManager.cpp:56
#14 0x080c6c51 in std::auto_ptr<WorkerThreadManager>::reset (this=0x85463e4, __p=0x0) at /usr/include/c++/4.6/backward/auto_ptr.h:244
#15 0x080604a9 in main ()
I would really appreciate any help.
There is no guarantee that pthread_cancel waits for the cancellation of the target completes before it returns. When successful, it simply requests cancellation, but does not wait for it to complete. You need to use pthread_join to wait for the threads to have completed.
I suspect that as the destructor is proceeding in one thread, one of the threads wakes up (due to the sem_destroy), and erroneously attempts to read/pop the queue. I'm not sure why it's causing a crash in the main thread, but I would eliminate this potential issue first.
Finally, I would highly recommend you move some of these semaphore and thread mechanisms to their own classes, to make the code more exception safe.
I found a bug, it was trivial - shame on me :(
In function void* WorkerThreadManager::WorkerThread(void* A_data) I popped m_results queue instead of m_tasks as I had intended:
worker_thread_data_t data;
{
utils::ScopedLock mutex(manager->m_tasksMutex);
data = manager->m_tasks.front();
manager->m_results.pop();
}
Anyway I don't really understand why it caused crash so late - in destructor of the queue.
Related
I have this small snippet of code:
// (...)
class Time
{
std::atomic<bool> m_running;
std::thread m_worker;
// ...
};
Time::Time()
{
// ...
m_running = true;
m_worker = std::move(std::thread(std::bind(&Time::Worker, this)));
}
bool Time::HasTimedOut() const
{
return (!m_disabled) &&
(IsPending() && (GetRunTime() >= m_maximum_timeout) && (CloseHandlesDiff() >= m_minimum_close_time));
}
Time::~Time()
{
if (m_running)
{
m_running = false;
m_worker.join();
}
}
void Time::Worker()
{
while (m_running)
{
if (time_data->HasTimedOut())
{
time_data->RunTimedOutCallback();
}
if (m_count < 0)
{
m_count = 0;
}
if (m_running)
{
std::this_thread::sleep_for(std::chrono::milliseconds(20));
}
}
}
std::shared_ptr<Time> time_data(std::make_shared<Time>());
To my surprise, I have gotten a coredump, and the backtrace command from gdb shows this:
(gdb) bt
#0 0x09438408 in monitor::Time::HasTimedOut (this=0x0)
at monitor.cxx: // return (!m_disabled) &&
#1 0x09438a84 in monitor::Time::Worker (this=0xbd96dd8)
monitor.cxx: // if(time_data->HasTimedOut()
#2 0x0943cf81 in std::__invoke_impl<void, void (monitor::Time::*&)(), monitor::Time*&> (
__f=#0xbd96e54: (void (monitor::Time::*)(monitor::Time * const)) 0x94389f0 <monitor::Time::Worker()>, __t=#0xbd96e5c: 0xbd96dd8)
A nullptr seems to be the problem (SEGFAULT):
(this=0x0)
This means that my class got destroyed, without the destructor being called.
This might be possible when the OS/watchdog for my application does a force exit / quick terminate as far as I know/suspect.
Are there any ways to deal with this? Maybe some shared_ptr atomic wrapping where I could check if the shared_ptr is a nullptr, is there some atomic if-not-null-execute-this? Then again.. this happened literally mid-execution.
I know one can add quick-exit hooks, but the quick exit is sometimes used for good reason, it would be agains the design to slow down the quick-exit. What would be the best way to handle this becoming a nullptr?
Or should I just let the SEGFALT happen because the application is being quick-exited anyway?
Here is the full stack, but that probably won't add any more useful information:
(gdb) bt
(gdb) bt
#0 0x09438408 in monitor::Time::HasTimedOut (this=0x0)
at /opt/procesleiding/vptlib/lib/oracle_monitor.cxx:111
#1 0x09438a84 in monitor::Time::Worker (this=0xbd96dd8)
at /opt/procesleiding/vptlib/lib/oracle_monitor.cxx:142
#2 0x0943cf81 in std::__invoke_impl<void, void (monitor::Time::*&)(), monitor::Time*&> (
__f=#0xbd96e54: (void (monitor::Time::*)(monitor::Time * const)) 0x94389f0 <monitor::Time::Worker()>, __t=#0xbd96e5c: 0xbd96dd8)
at /usr/include/c++/7/bits/invoke.h:73
#3 0x0943c99f in std::__invoke<void (monitor::Time::*&)(), monitor::Time*&> (
__fn=#0xbd96e54: (void (monitor::Time::*)(monitor::Time * const)) 0x94389f0 <monitor::Time::Worker()>, __args#0=#0xbd96e5c: 0xbd96dd8)
at /usr/include/c++/7/bits/invoke.h:95
#4 0x0943c70c in std::_Bind<void (monitor::Time::*(monitor::Time*))()>::__call<void, , 0u>(std::tuple<>&&, std::_Index_tuple<0u>) (
this=0xbd96e54, __args=...)
at /usr/include/c++/7/functional:467
#5 0x0943c28c in std::_Bind<void (monitor::Time::*(monitor::Time*))()>::operator()<, void>() (this=0xbd96e54)
at /usr/include/c++/7/functional:551
#6 0x0943bcaf in std::__invoke_impl<void, std::_Bind<void (monitor::Time::*(monitor::Time*))()>>(std::__invoke_other, std::_Bind<void (monitor::Time::*(monitor::Time*))()>&&) (__f=...) at /usr/include/c++/7/bits/invoke.h:60
#7 0x0943b022 in std::__invoke<std::_Bind<void (monitor::Time::*(monitor::Time*))()>>(std::_Bind<void (monitor::Time::*(monitor::Time*))()>&&) (__fn=...) at /usr/include/c++/7/bits/invoke.h:95
#8 0x0943e2a6 in std::thread::_Invoker<std::tuple<std::_Bind<void (monitor::Time::*(monitor::Time*))()> > >::_M_invoke<0u>(std::_Index_tuple<0u>) (this=0xbd96e54) at /usr/include/c++/7/thread:234
#9 0x0943e15c in std::thread::_Invoker<std::tuple<std::_Bind<void (monitor::Time::*(monitor::Time*))()> > >::operator()() (this=0xbd96e54) at /usr/include/c++/7/thread:243
#10 0x0943e067 in std::thread::_State_impl<std::thread::_Invoker<std::tuple<std::_Bind<void (monitor::Time::*(monitor::Time*))()> > > >::_M_run() (this=0xbd96e50) at /usr/include/c++/7/thread:186
Your code has a race condition.
Time::Time()
{
// ...
m_running = true;
m_worker = std::move(std::thread(std::bind(&Time::Worker, this)));
}
Here tread starts before
std::shared_ptr<Time> time_data(std::make_shared<Time>());
is completed.
Simply thread reaches monitor::Time::HasTimedOut before std::shared_ptr<Time> time_data(std::make_shared<Time>()); is completed.
Spawn thread not in constructor, but in separate method which you will invoke after time_data is assigned.
Anyway it would be better if your Timer do not use time_data global variable at all.
Code:
Listener.hpp:
template <typename SocketType>
void Listener<SocketType>::BeginAccept()
{
auto worker = SelectWorker();
auto socket = worker->CreateSocket();
m_acceptor->async_accept(socket->GetAsioSocket(),
[this, worker, socket] (const boost::system::error_code &ec)
{
this->OnAccept(worker, socket, ec);
});
}
template <typename SocketType>
void Listener<SocketType>::OnAccept(NetworkThread<SocketType> *worker, std::shared_ptr<SocketType> const& socket, const boost::system::error_code &ec)
{
// an error has occurred
if (ec)
worker->RemoveSocket(socket.get());
else
socket->Open();
BeginAccept();
}
Socket.cpp:
bool Socket::Open()
{
try
{
const_cast<std::string &>(m_address) = m_socket.remote_endpoint().address().to_string();
const_cast<std::string &>(m_remoteEndpoint) = boost::lexical_cast<std::string>(m_socket.remote_endpoint());
}
catch (boost::system::error_code& error)
{
sLog.outInfo("Socket::Open() failed to get remote address. Error: %s", error.message().c_str());
return false;
}
catch (const std::exception& error)
{
sLog.outInfo("Socket::Open() failed(with std::exception) to get remote address. Error: %s", error.what());
return false;
}
catch (...)
{
sLog.outError("Socket::Open() failed to get remote address. Other error");
return false;
}
m_outBuffer.reset(new PacketBuffer);
m_secondaryOutBuffer.reset(new PacketBuffer);
m_inBuffer.reset(new PacketBuffer);
StartAsyncRead();
return true;
}
Is this related to TCP attack?
EDIT: gdb:
(gdb) p m_socket
$1 = {<boost::asio::basic_socket<boost::asio::ip::tcp, boost::asio::stream_socket_service<boost::asio::ip::tcp> >> = {<boost::asio::basic_io_object<boost::asio::stream_socket_service<boost::asio::ip::tcp>, true>> = {
implementation = {<boost::asio::detail::reactive_socket_service_base::base_implementation_type> = {socket_ = 21, state_ = 80 'P', reactor_data_ = 0x7ffff0005120}, protocol_ = {family_ = 2}}, service_ = 0x7f2b30}, <boost::asio::socket_base> = {
static message_peek = 2, static message_out_of_band = 1, static message_do_not_route = 4, static message_end_of_record = 128, static max_connections = 128}, <No data fields>}, <No data fields>}
(gdb) p m_socket.is_open()
$2 = true
(gdb) bt
#0 MaNGOS::Socket::Open (this=0x7ffff0003c40) at /home/ubuntu/MoltenCore/MoltenCore/src/shared/Network/Socket.cpp:52
#1 0x00000000004fbff1 in MaNGOS::Listener<AuthSocket>::OnAccept (this=0x7fffffffe6c0, worker=0x8041e0, socket=std::shared_ptr (count 2, weak 1) 0x7ffff0003c40, ec=...) at /home/ubuntu/MoltenCore/MoltenCore/src/shared/Network/Listener.hpp:121
#2 0x00000000004fa030 in MaNGOS::Listener<AuthSocket>::BeginAccept()::{lambda(boost::system::error_code const&)#1}::operator()(boost::system::error_code const&) const (__closure=0x7ffff4ec8be0, ec=...)
at /home/ubuntu/MoltenCore/MoltenCore/src/shared/Network/Listener.hpp:110
#3 0x0000000000502a2d in boost::asio::detail::binder1<MaNGOS::Listener<AuthSocket>::BeginAccept()::{lambda(boost::system::error_code const&)#1}, boost::system::error_code>::operator()() (this=0x7ffff4ec8be0) at /usr/include/boost/asio/detail/bind_handler.hpp:47
#4 0x00000000005020c3 in boost::asio::asio_handler_invoke<boost::asio::detail::binder1<MaNGOS::Listener<AuthSocket>::BeginAccept()::{lambda(boost::system::error_code const&)#1}, boost::system::error_code> >(boost::asio::detail::binder1<MaNGOS::Listener<AuthSocket>::BeginAccept()::{lambda(boost::system::error_code const&)#1}, boost::system::error_code>&, ...) (function=...) at /usr/include/boost/asio/handler_invoke_hook.hpp:69
#5 0x00000000005014b8 in boost_asio_handler_invoke_helpers::invoke<boost::asio::detail::binder1<MaNGOS::Listener<AuthSocket>::BeginAccept()::{lambda(boost::system::error_code const&)#1}, boost::system::error_code>, {lambda(boost::system::error_code const&)#1}>(boost::asio::detail::binder1<MaNGOS::Listener<AuthSocket>::BeginAccept()::{lambda(boost::system::error_code const&)#1}, boost::system::error_code>&, {lambda(boost::system::error_code const&)#1}&) (function=..., context=...)
at /usr/include/boost/asio/detail/handler_invoke_helpers.hpp:37
#6 0x0000000000500886 in boost::asio::detail::reactive_socket_accept_op<boost::asio::basic_socket<boost::asio::ip::tcp, boost::asio::stream_socket_service<boost::asio::ip::tcp> >, boost::asio::ip::tcp, MaNGOS::Listener<AuthSocket>::BeginAccept()::{lambda(boost::system::error_code const&)#1}>::do_complete(boost::asio::detail::task_io_service*, boost::asio::detail::task_io_service_operation*, boost::system::error_code const&, unsigned long) (owner=0x8084d0, base=0x804910) at /usr/include/boost/asio/detail/reactive_socket_accept_op.hpp:123
#7 0x00000000004f0300 in boost::asio::detail::task_io_service_operation::complete (this=0x804910, owner=..., ec=..., bytes_transferred=0) at /usr/include/boost/asio/detail/task_io_service_operation.hpp:38
#8 0x00000000004f1d55 in boost::asio::detail::epoll_reactor::descriptor_state::do_complete (owner=0x8084d0, base=0x804140, ec=..., bytes_transferred=1) at /usr/include/boost/asio/detail/impl/epoll_reactor.ipp:651
#9 0x00000000004f0300 in boost::asio::detail::task_io_service_operation::complete (this=0x804140, owner=..., ec=..., bytes_transferred=1) at /usr/include/boost/asio/detail/task_io_service_operation.hpp:38
#10 0x00000000004f2823 in boost::asio::detail::task_io_service::do_run_one (this=0x8084d0, lock=..., this_thread=..., ec=...) at /usr/include/boost/asio/detail/impl/task_io_service.ipp:372
#11 0x00000000004f23a1 in boost::asio::detail::task_io_service::run (this=0x8084d0, ec=...) at /usr/include/boost/asio/detail/impl/task_io_service.ipp:149
#12 0x00000000004f2abc in boost::asio::io_service::run (this=0x7ef540) at /usr/include/boost/asio/impl/io_service.ipp:59
#13 0x00000000004f7c89 in MaNGOS::Listener<AuthSocket>::Listener(int, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, int)::{lambda()#1}::operator()() const () at /home/ubuntu/MoltenCore/MoltenCore/src/shared/Network/Listener.hpp:84
#14 0x0000000000505624 in std::_Bind_simple<MaNGOS::Listener<AuthSocket>::Listener(int, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, int)::{lambda()#1} ()>::_M_invoke<>(std::_Index_tuple<>) (this=0x8049c8)
at /usr/include/c++/5/functional:1531
#15 0x0000000000504f38 in std::_Bind_simple<MaNGOS::Listener<AuthSocket>::Listener(int, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, int)::{lambda()#1} ()>::operator()() (this=0x8049c8) at /usr/include/c++/5/functional:1520
#16 0x0000000000504416 in std::thread::_Impl<std::_Bind_simple<MaNGOS::Listener<AuthSocket>::Listener(int, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, int)::{lambda()#1} ()> >::_M_run() (this=0x8049b0) at /usr/include/c++/5/thread:115
#17 0x00007ffff6c98c80 in ?? () from /usr/lib/x86_64-linux-gnu/libstdc++.so.6
#18 0x00007ffff7bc16ba in start_thread (arg=0x7ffff4ec9700) at pthread_create.c:333
#19 0x00007ffff63fe82d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:109
EDIT:
NetworkThread.hpp:
template <typename SocketType>
class NetworkThread
{
private:
boost::asio::io_service m_service;
std::mutex m_socketLock;
std::unordered_set<std::shared_ptr<SocketType>> m_sockets;
// note that the work member *must* be declared after the service member for the work constructor to function correctly
std::unique_ptr<boost::asio::io_service::work> m_work;
std::thread m_serviceThread;
public:
NetworkThread() : m_work(new boost::asio::io_service::work(m_service))
{
m_serviceThread = std::thread([this] { boost::system::error_code ec; this->m_service.run(ec); });
}
~NetworkThread()
{
// Allow io_service::run() to exit.
m_work.reset();
m_service.stop();
m_serviceThread.join();
// attempt to gracefully close any open connections
for (auto i = m_sockets.begin(); i != m_sockets.end();)
{
auto const current = i;
++i;
if (!(*current)->IsClosed())
(*current)->Close();
}
}
size_t Size() const { return m_sockets.size(); }
std::shared_ptr<SocketType> CreateSocket();
void RemoveSocket(Socket *socket)
{
std::lock_guard<std::mutex> guard(m_socketLock);
m_sockets.erase(socket->shared<SocketType>());
}
};
template <typename SocketType>
std::shared_ptr<SocketType> NetworkThread<SocketType>::CreateSocket()
{
std::lock_guard<std::mutex> guard(m_socketLock);
auto const i = m_sockets.emplace(std::make_shared<SocketType>(m_service, [this] (Socket *socket) { this->RemoveSocket(socket); }));
return *i.first;
}
The problem is totally different:
In your code:
template <typename SocketType>
void Listener<SocketType>::OnAccept(
NetworkThread<SocketType> *worker,
std::shared_ptr<SocketType> const& socket,
const boost::system::error_code &ec )
{
// at this point, socket has 1 reference count, held by the lambda
// in BeginAccept, its reference count cannot increase, since it's
// constant.
// ...
BeginAccept(); // this creates a new, distinct instance of the lambda
// fuinction object in BeginAccept.
// when we exit, constrol is given back to calling lambda in BeginAccept
// its destructor happily deletes the socket, since its reference count
// is still only 1.
}
Bottom line: OnAccept() needs to receive the socket pointer by value, and it needs to transfer or store this shared pointer somehow. This is usually done
by passing the shared_ptr to BeginReceive() BY VALUE. BeginReceive() must then take the responsibility to keep the socket pointer alive until the connection dies, this, it usually does that by passing the pointer back to itself, either as a parameter, or as a lambda capture, always by value.
The easiest way to fix your code is to move the call to StartAsyncRead() to OnAccept(), since there is a convenient copy of a shared_ptr of the socket there.
I'm trying to make a thread pool that blocks the main thread until all it's children have completed. The real-world use-case for this is a "Controller" process that spawns independent processes for the user to interact with.
Unfortunately, when the main exits, a segmentation fault is encountered. I cannot figure out the cause of this segmentation fault.
I've authored a Process class which is little more than opening a shell script (called waiter.sh that contains a sleep 5) and waiting for the pid to exit. The Process class is initialized and then the Wait() method is placed in one of the threads in the thread pool.
The problem arises when ~thread_pool() is called. The std::queue cannot properly deallocate the boost::function passed to it, even though the reference to Process is still valid.
#include <sys/types.h>
#include <sys/wait.h>
#include <spawn.h>
#include <queue>
#include <boost/bind.hpp>
#include <boost/thread.hpp>
extern char **environ;
class Process {
private:
pid_t pid;
int status;
public:
Process() : status(0), pid(-1) {
}
~Process() {
std::cout << "calling ~Process" << std::endl;
}
void Spawn(char **argv) {
// want spawn posix and wait for th epid to return
status = posix_spawn(&pid, "waiter.sh", NULL, NULL, argv, environ);
if (status != 0) {
perror("unable to spawn");
return;
}
}
void Wait() {
std::cout << "spawned proc with " << pid << std::endl;
waitpid(pid, &status, 0);
// wait(&pid);
std::cout << "wait complete" << std::endl;
}
};
Below is the thread_pool class. This is loosely adapted from the accepted answer for this question
class thread_pool {
private:
std::queue<boost::function<void() >> tasks;
boost::thread_group threads;
std::size_t available;
boost::mutex mutex;
boost::condition_variable condition;
bool running;
public:
thread_pool(std::size_t pool_size) : available(pool_size), running(true) {
std::cout << "creating " << pool_size << " threads" << std::endl;
for (std::size_t i = 0; i < available; ++i) {
threads.create_thread(boost::bind(&thread_pool::pool_main, this));
}
}
~thread_pool() {
std::cout << "~thread_pool" << std::endl;
{
boost::unique_lock<boost::mutex> lock(mutex);
running = false;
condition.notify_all();
}
try {
threads.join_all();
} catch (const std::exception &) {
// supress exceptions
}
}
template <typename Task>
void run_task(Task task) {
boost::unique_lock<boost::mutex> lock(mutex);
if (0 == available) {
return; //\todo err
}
--available;
tasks.push(boost::function<void()>(task));
condition.notify_one();
return;
}
private:
void pool_main() {
// wait on condition variable while the task is empty and the pool is still
// running
boost::unique_lock<boost::mutex> lock(mutex);
while (tasks.empty() && running) {
condition.wait(lock);
}
// copy task locally and remove from the queue. this is
// done within it's own scope so that the task object is destructed
// immediately after running the task. This is useful in the
// event that the function contains shared_ptr arguments
// bound via 'bind'
{
auto task = tasks.front();
tasks.pop();
lock.unlock();
// run the task
try {
std::cout << "running task" << std::endl;
task();
} catch (const std::exception &) {
// supress
}
}
// task has finished so increment count of availabe threads
lock.lock();
++available;
}
};
Here is the main:
int main() {
// input arguments are not required
char *argv[] = {NULL};
Process process;
process.Spawn(argv);
thread_pool pool(5);
pool.run_task(boost::bind(&Process::Wait, &process));
return 0;
}
The output for this is
creating 5 threads
~thread_pool
I am waiting... (from waiting.sh)
running task
spawned proc with 2573
running task
running task
running task
running task
wait complete
Segmentation fault (core dumped)
And here is the stack trace:
Starting program: /home/jandreau/NetBeansProjects/Controller/dist/Debug/GNU- Linux/controller
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1".
creating 5 threads
[New Thread 0x7ffff691d700 (LWP 2600)]
[New Thread 0x7ffff611c700 (LWP 2601)]
[New Thread 0x7ffff591b700 (LWP 2602)]
[New Thread 0x7ffff511a700 (LWP 2603)]
[New Thread 0x7ffff4919700 (LWP 2604)]
~thread_pool
running task
running task
spawned proc with 2599
[Thread 0x7ffff611c700 (LWP 2601) exited]
running task
[Thread 0x7ffff591b700 (LWP 2602) exited]
running task
[Thread 0x7ffff511a700 (LWP 2603) exited]
running task
[Thread 0x7ffff4919700 (LWP 2604) exited]
I am waiting...
wait complete
[Thread 0x7ffff691d700 (LWP 2600) exited]
Thread 1 "controller" received signal SIGSEGV, Segmentation fault.
0x000000000040f482 in boost::detail::function::basic_vtable0<void>::clear (
this=0xa393935322068, functor=...)
at /usr/include/boost/function/function_template.hpp:509
509 if (base.manager)
(gdb) where
#0 0x000000000040f482 in boost::detail::function::basic_vtable0<void>::clear (
this=0xa393935322068, functor=...)
at /usr/include/boost/function/function_template.hpp:509
#1 0x000000000040e263 in boost::function0<void>::clear (this=0x62ef50)
at /usr/include/boost/function/function_template.hpp:883
#2 0x000000000040cf20 in boost::function0<void>::~function0 (this=0x62ef50,
__in_chrg=<optimized out>)
at /usr/include/boost/function/function_template.hpp:765
#3 0x000000000040b28e in boost::function<void ()>::~function() (
this=0x62ef50, __in_chrg=<optimized out>)
at /usr/include/boost/function/function_template.hpp:1056
#4 0x000000000041193a in std::_Destroy<boost::function<void ()> >(boost::function<void ()>*) (__pointer=0x62ef50)
at /usr/include/c++/5/bits/stl_construct.h:93
#5 0x00000000004112df in std::_Destroy_aux<false>::__destroy<boost::function<void ()>*>(boost::function<void ()>*, boost::function<void ()>*) (
__first=0x62ef50, __last=0x62ed50)
at /usr/include/c++/5/bits/stl_construct.h:103
#6 0x0000000000410d16 in std::_Destroy<boost::function<void ()>*>(boost::function<void ()>*, boost::function<void ()>*) (__first=0x62edd0, __last=0x62ed50)
at /usr/include/c++/5/bits/stl_construct.h:126
#7 0x0000000000410608 in std::_Destroy<boost::function<void ()>*, boost::function<void ()> >(boost::function<void ()>*, boost::function<void ()>*, std::allocat---Type <return> to continue, or q <return> to quit---
or<boost::function<void ()> >&) (__first=0x62edd0, __last=0x62ed50)
at /usr/include/c++/5/bits/stl_construct.h:151
#8 0x000000000040fac5 in std::deque<boost::function<void ()>, std::allocator<boost::function<void ()> > >::_M_destroy_data_aux(std::_Deque_iterator<boost::function<void ()>, boost::function<void ()>&, boost::function<void ()>*>, std::_Deque_iterator<boost::function<void ()>, boost::function<void ()>&, boost::function<void ()>*>) (this=0x7fffffffdaf0, __first=..., __last=...)
at /usr/include/c++/5/bits/deque.tcc:845
#9 0x000000000040e6e4 in std::deque<boost::function<void ()>, std::allocator<boost::function<void ()> > >::_M_destroy_data(std::_Deque_iterator<boost::function<void ()>, boost::function<void ()>&, boost::function<void ()>*>, std::_Deque_iterator<boost::function<void ()>, boost::function<void ()>&, boost::function<void ()>*>, std::allocator<boost::function<void ()> > const&) (
this=0x7fffffffdaf0, __first=..., __last=...)
at /usr/include/c++/5/bits/stl_deque.h:2037
#10 0x000000000040d0c8 in std::deque<boost::function<void ()>, std::allocator<boost::function<void ()> > >::~deque() (this=0x7fffffffdaf0,
__in_chrg=<optimized out>) at /usr/include/c++/5/bits/stl_deque.h:1039
#11 0x000000000040b3ce in std::queue<boost::function<void ()>, std::deque<boost::function<void ()>, std::allocator<boost::function<void ()> > > >::~queue() (
this=0x7fffffffdaf0, __in_chrg=<optimized out>)
at /usr/include/c++/5/bits/stl_queue.h:96
#12 0x000000000040b6c0 in thread_pool::~thread_pool (this=0x7fffffffdaf0,
---Type <return> to continue, or q <return> to quit---
__in_chrg=<optimized out>) at main.cpp:63
#13 0x0000000000408b60 in main () at main.cpp:140
I'm puzzled by this because the Process hasn't yet gone out of scope and I'm passing a copy of the boost::function<void()> to the thread pool for processing.
Any ideas?
The stack trace indicates that you are destroying a std::function that has not been properly initialized (e.g. some random memory location that is treated as being a std::function) or that you are destroying a std::function twice.
The problem is that your program pushes to tasks only once, but pops five times, hence you remove elements from an empty deque, which is undefined behaviour.
The while loop in pool_main terminates if running is false, and running may be false even if the deque is empty. Then you pop unconditionally. You might consider correcting pool_main as follows:
void pool_main() {
// wait on condition variable
// while the task is empty and the pool is still
// running
boost::unique_lock<boost::mutex> lock(mutex);
while (tasks.empty() && running) {
condition.wait(lock);
}
// copy task locally and remove from the queue. this is
// done within it's own scope so that the task object is destructed
// immediately after running the task. This is useful in the
// event that the function contains shared_ptr arguments
// bound via 'bind'
if (!tasks.empty ()) { // <--- !!!!!!!!!!!!!!!!!!!!!!!!
auto task = tasks.front();
tasks.pop();
lock.unlock();
// run the task
try {
std::cout << "running task" << std::endl;
task();
} catch (const std::exception &) {
// supress
}
}
// task has finished so increment count of availabe threads
lock.lock();
++available;
};
I am, however, not sure whether the logic regarding available is correct. Shouldn't available be decremented on starting the processing of a task and be incremented when it is finished (hence be changed within pool_main only and only within the newly introduced if clause)?
You don't seem to be allocating memory for
extern char **environ;
anywhere. Though wouldn't that be a link error?
Cutting this back to be a minimal reproduction case would help a lot. There's a lot of code here that's presumably not necessary to reproduce the problem.
Also, what is this:
// supress exceptions
If you are getting exceptions while joining your threads, then you presumably haven't joined them all and cleaning up the threads without joining them will cause an error after main exits.
Compiling with gcc 4.7.2 on Ubuntu, compiled with -std=c++11 -O0 -pthread, I somehow created a deadlock in code that doesn't seem like it should ever run into that problem. I have a thread which just acquires a lock and then runs through a vector<function<void()>>, calling everything. Meanwhile, the main thread pushes std::packaged_task<int()>s onto it one-by-one and blocks on when that task's future returns. The tasks themselves are trivial (print and return).
Here is the full code. Running the app sometimes succeeds, but within a few tries will hang:
#include <iostream>
#include <future>
#include <thread>
#include <vector>
#include <functional>
std::unique_lock<std::mutex> lock() {
static std::mutex mtx;
return std::unique_lock<std::mutex>{mtx};
}
int main(int argc, char** argv)
{
std::vector<std::function<void()>> messages;
std::atomic<bool> running{true};
std::thread thread = std::thread([&]{
while (running) {
auto lk = lock();
std::cout << "[T] locked with " << messages.size() << " messages." << std::endl;
for (auto& fn: messages) {
fn();
}
messages.clear();
}
});
for (int i = 0; i < 1000000; ++i) {
std::packaged_task<int()> task([=]{
std::cout << "[T] returning " << i << std::endl;
return i;
});
{
auto lk = lock();
messages.emplace_back(std::ref(task));
}
task.get_future().get();
}
running = false;
thread.join();
}
Sample output:
[T] returning 127189
[T] locked with 0 messages.
[T] locked with 0 messages.
[T] locked with 0 messages.
[T] locked with 0 messages.
[T] locked with 0 messages.
[T] locked with 0 messages.
[T] locked with 0 messages.
[T] locked with 1 messages.
[T] returning 127190
[T] locked with 0 messages.
[T] locked with 0 messages.
[T] locked with 0 messages.
[T] locked with 0 messages.
[T] locked with 0 messages.
[T] locked with 1 messages.
[T] returning 127191
[T] locked with 0 messages.
[T] locked with 0 messages.
[T] locked with 0 messages.
[T] locked with 0 messages.
[T] locked with 1 messages.
... hangs forever ...
What's going on? Why does the call into packaged_task::operator() hang? Where is the deadlock? Is this a gcc bug?
[update] Upon deadlock, the two threads are at:
Thread 1 (line 39 is the task.get_future().get() line):
#0 pthread_cond_wait##GLIBC_2.3.2 () at ../nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S:162
#1 0x00007feb01fe800c in __gthread_cond_wait (this=Unhandled dwarf expression opcode 0xf3
)
at [snip]/libstdc++-v3/include/x86_64-unknown-linux-gnu/bits/gthr-default.h:879
#2 std::condition_variable::wait (this=Unhandled dwarf expression opcode 0xf3
) at [snip]/gcc-4.7.2/libstdc++-v3/src/c++11/condition_variable.cc:52
#3 0x0000000000404aff in void std::condition_variable::wait<std::__future_base::_State_base::wait()::{lambda()#1}>(std::unique_lock<std::mutex>&, std::__future_base::_State_base::wait()::{lambda()#1}) (this=0x6111e0, __lock=..., __p=...)
at [snip]gcc-4.7.2/lib/gcc/x86_64-unknown-linux-gnu/4.7.2/../../../../include/c++/4.7.2/condition_variable:93
#4 0x0000000000404442 in std::__future_base::_State_base::wait (this=0x6111a8)
at [snip]gcc-4.7.2/lib/gcc/x86_64-unknown-linux-gnu/4.7.2/../../../../include/c++/4.7.2/future:331
#5 0x00000000004060fb in std::__basic_future<int>::_M_get_result (this=0x7fffc451daa0)
at [snip]gcc-4.7.2/lib/gcc/x86_64-unknown-linux-gnu/4.7.2/../../../../include/c++/4.7.2/future:601
#6 0x0000000000405488 in std::future<int>::get (this=0x7fffc451daa0)
at [snip]gcc-4.7.2/lib/gcc/x86_64-unknown-linux-gnu/4.7.2/../../../../include/c++/4.7.2/future:680
#7 0x00000000004024dc in main (argc=1, argv=0x7fffc451dbb8) at test.cxx:39
and Thread 2 (line 22 is the fn() line):
#0 pthread_once () at ../nptl/sysdeps/unix/sysv/linux/x86_64/pthread_once.S:95
#1 0x00000000004020f6 in __gthread_once (__once=0x611214, __func=0x401e68 <__once_proxy#plt>)
at [snip]/gcc-4.7.2/lib/gcc/x86_64-unknown-linux-gnu/4.7.2/../../../../include/c++/4.7.2/x86_64-unknown-linux-gnu/bits/gthr-default.h:718
#2 0x0000000000404db1 in void std::call_once<void (std::__future_base::_State_base::*)(std::function<std::unique_ptr<std::__future_base::_Result_base, std::__future_base::_Result_base::_Deleter> ()()>&, bool&), std::__future_base::_State_base* const, std::reference_wrapper<std::function<std::unique_ptr<std::__future_base::_Result_base, std::__future_base::_Result_base::_Deleter> ()()> >, std::reference_wrapper<bool> >(std::once_flag&, void (std::__future_base::_State_base::*&&)(std::function<std::unique_ptr<std::__future_base::_Result_base, std::__future_base::_Result_base::_Deleter> ()()>&, bool&), std::__future_base::_State_base* const&&, std::reference_wrapper<std::function<std::unique_ptr<std::__future_base::_Result_base, std::__future_base::_Result_base::_Deleter> ()()> >&&, std::reference_wrapper<bool>&&) (__once=..., __f=#0x7feb014fdc10)
at [snip]/gcc-4.7.2/lib/gcc/x86_64-unknown-linux-gnu/4.7.2/../../../../include/c++/4.7.2/mutex:819
#3 0x0000000000404517 in std::__future_base::_State_base::_M_set_result(std::function<std::unique_ptr<std::__future_base::_Result_base, std::__future_base::_Result_base::_Deleter> ()()>, bool) (this=0x6111a8, __res=..., __ignore_failure=false)
at [snip]/gcc-4.7.2/lib/gcc/x86_64-unknown-linux-gnu/4.7.2/../../../../include/c++/4.7.2/future:362
#4 0x0000000000407af0 in std::__future_base::_Task_state<int ()()>::_M_run() (this=0x6111a8)
at [snip]/gcc-4.7.2/lib/gcc/x86_64-unknown-linux-gnu/4.7.2/../../../../include/c++/4.7.2/future:1271
#5 0x00000000004076cc in std::packaged_task<int ()()>::operator()() (this=0x7fffc451da30)
at [snip]/gcc-4.7.2/lib/gcc/x86_64-unknown-linux-gnu/4.7.2/../../../../include/c++/4.7.2/future:1379
#6 0x000000000040745a in std::_Function_handler<void ()(), std::reference_wrapper<std::packaged_task<int ()()> > >::_M_invoke(std::_Any_data const&) (
__functor=...) at [snip]/gcc-4.7.2/lib/gcc/x86_64-unknown-linux-gnu/4.7.2/../../../../include/c++/4.7.2/functional:1956
#7 0x00000000004051f2 in std::function<void ()()>::operator()() const (this=0x611290)
at [snip]/gcc-4.7.2/lib/gcc/x86_64-unknown-linux-gnu/4.7.2/../../../../include/c++/4.7.2/functional:2311
#8 0x000000000040232f in operator() (__closure=0x611040) at test.cxx:22
#9 0x0000000000403d8e in _M_invoke<> (this=0x611040)
at [snip]/gcc-4.7.2/lib/gcc/x86_64-unknown-linux-gnu/4.7.2/../../../../include/c++/4.7.2/functional:1598
#10 0x0000000000403cdb in operator() (this=0x611040)
at [snip]/gcc-4.7.2/lib/gcc/x86_64-unknown-linux-gnu/4.7.2/../../../../include/c++/4.7.2/functional:1586
#11 0x0000000000403c74 in _M_run (this=0x611028) at [snip]/gcc-4.7.2/lib/gcc/x86_64-unknown-linux-gnu/4.7.2/../../../../include/c++/4.7.2/thread:115
#12 0x00007feb01feae10 in execute_native_thread_routine (__p=Unhandled dwarf expression opcode 0xf3
) at [snip]/gcc-4.7.2/libstdc++-v3/src/c++11/thread.cc:73
#13 0x00007feb018879ca in start_thread (arg=<value optimized out>) at pthread_create.c:300
#14 0x00007feb015e569d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:112
#15 0x0000000000000000 in ?? ()
It seems that the problem is that you destroy the packaged_task possibly before operator() returns in the worker thread. This is most likely undefined behaviour. The program works fine for me if I re-aquire the mutex in the loop after waiting for the future to return a result. This serializes operator() and the destructor of the packaged_task.
I can't explain why your code was broken, but I did find a way to fix it (storing tasks, not std::functions constructed from tasks):
#include <iostream>
#include <future>
#include <thread>
#include <vector>
#include <functional>
#include <unistd.h>
int main(int argc, char** argv)
{
// Let's face it - your lock() function was kinda weird.
std::mutex mtx;
// I've changed this to a vector of tasks, from a vector
// of functions. Seems to have done the job. Not sure exactly
// why but this seems to be the proper way to go.
std::vector<std::packaged_task<int()>> messages;
std::atomic<bool> running{true};
std::thread thread([&]{
while (running) {
std::unique_lock<std::mutex> l{mtx};
std::cout << "[T] locked with " << messages.size() << " messages." << std::endl;
for (auto& fn: messages) {
fn();
}
messages.clear();
}
});
for (int i = 0; i < 1000000; ++i) {
std::packaged_task<int()> task([i]{
std::cout << "[T] returning " << i << std::endl;
return i;
});
// Without grabbing this now, if the thread executed fn()
// before I do f.get() below, it complained about having
// no shared state.
std::future<int> f = task.get_future();
{
std::unique_lock<std::mutex> l{mtx};
messages.emplace_back(std::move(task));
}
f.get();
}
running = false;
thread.join();
}
At the very least, if this code also deadlocks, then it hasn't yet for me.
I have encountered a strange problem with boost::asio::io_service::run. Sometimes this run function seems to eat the whole cpu(100%), and sometimes not. I am not very clear about the pattern.
the relevant code:
class Asio {
public:
Asio() :
io_service_(new boost::asio::io_service),
run_() {}
void Start() {
if (!asio_thread_.joinable()) {
run_ = true;
asio_thread_ = std::thread([=] {
Run();
});
}
}
boost::asio::io_service* io_service() { return io_service_.get(); }
protected:
void Run() {
for (;;) {
boost::system::error_code ec;
io_service()->run(ec);
if (run_) {
io_service()->reset();
std::this_thread::sleep_for(std::chrono::milliseconds(100));
} else {
break;
}
}
}
protected:
std::unique_ptr<boost::asio::io_service> io_service_;
std::thread asio_thread_;
std::atomic<bool> run_;
};
When the run function runs normally, below is the callstack
#0 0x00000035f74e9163 in epoll_wait () from /lib64/libc.so.6
#1 0x0000000000b3f6ef in boost::asio::detail::epoll_reactor::run(bool, boost::asio::detail::op_queue<boost::asio::detail::task_io_service_operation>&) ()
#2 0x0000000000b40111 in boost::asio::detail::task_io_service::do_run_one(boost::asio::detail::scoped_lock<boost::asio::detail::posix_mutex>&, boost::asio::detail::task_io_service_thread_info&, boost::system::error_code const&) ()
#3 0x0000000000b3feaf in boost::asio::detail::task_io_service::run(boost::system::error_code&) ()
#4 0x0000000000b403fd in boost::asio::io_service::run(boost::system::error_code&) ()
#5 0x0000000000b3ddc1 in Asio::Run() ()
When the run function behaves abnormally, below is the callstack:
#0 0x00000031bbee53c9 in syscall () from /lib64/libc.so.6
#1 0x00007f831d1d3d68 in std::chrono::_V2::steady_clock::now() () from /usr/local/gcc48/lib64/libstdc++.so.6
#2 0x0000000000b45b6d in boost::asio::detail::chrono_time_traits<std::chrono::_V2::steady_clock, boost::asio::wait_traits<std::chrono::_V2::steady_clock> >::now() ()
#3 0x0000000000b45608 in boost::asio::detail::timer_queue<boost::asio::detail::chrono_time_traits<std::chrono::_V2::steady_clock, boost::asio::wait_traits<std::chrono::_V2::steady_clock> > >::get_ready_timers(boost::asio::detail::op_queue<boost::asio::detail::task_io_service_operation>&) ()
#4 0x0000000000b3f5d7 in boost::asio::detail::timer_queue_set::get_ready_timers(boost::asio::detail::op_queue<boost::asio::detail::task_io_service_operation>&) ()
#5 0x0000000000b3f815 in boost::asio::detail::epoll_reactor::run(bool, boost::asio::detail::op_queue<boost::asio::detail::task_io_service_operation>&) ()
#6 0x0000000000b40111 in boost::asio::detail::task_io_service::do_run_one(boost::asio::detail::scoped_lock<boost::asio::detail::posix_mutex>&, boost::asio::detail::task_io_service_thread_info&, boost::system::error_code const&) ()
#7 0x0000000000b3feaf in boost::asio::detail::task_io_service::run(boost::system::error_code&) ()
#8 0x0000000000b403fd in boost::asio::io_service::run(boost::system::error_code&) ()
#9 0x0000000000b3ddc1 in Asio::Run() ()
In both cases, there're some pending handlers in the io_service, so the io_service::run should not return and should be wait for the event to happen.
Any advise is welcome.
I did further check, it's seems it's due to the boost::asio::steady_timer used. The usage of steady_timer involves the usage of the following pattern:
boost::asio::steady_timer timer;
timer.expires_at(some_expiry, error_code)
timer.async_wait((=)(boost::system::error_code ec) {
// some operation
timer.expires_at(new_expiry, error_code);
timer.asyn_wait(...);
});
Where the timer is wrapped in a shared pointer, and it's safe to copy into the lamda function.