c++ Schwarz counter with thread_local - c++

Can I use Schwarz counter (aka Nifty counter) idiom, with thread_local? (Assuming I replace all static with thread_local)
I need this (helper for java jni threads):
class ThisThread{
JNIEnv* jni_env{nullptr};
public:
JNIEnv* getEnv(){
if (!jni_env){
// Attach thread
java_vm->GetEnv((void**)&jni_env, JNI_VERSION);
java_vm->AttachCurrentThread(&jni_env, NULL);
}
return jni_env;
}
~ThisThread(){
if (!jni_env) return;
// Deattach thread
java_vm->DetachCurrentThread();
}
};
static thread_local ThisThread this_thread;
To be constructed first, and destructed last in each thread.
I may call this_thread->getEnv() from destructor/constructor of other static or thread_local object.
UPDATE
https://stackoverflow.com/a/30200992 - here, standard says that thread_local destructors called BEFORE static, and I need this one to be after.

I think the best solution is to implement the schwartz counter as normal, but implement the ThisThread class in terms of a thread_local static Impl.
Complete example with outputs:
// header file
#include <memory>
#include <mutex>
#include <iostream>
#include <thread>
std::mutex emit_mutex;
template<class...Ts>
void emit(Ts&&...ts)
{
auto action = [](auto&&x) { std::cout << x; };
auto lock = std::unique_lock<std::mutex>(emit_mutex);
using expand = int[];
expand{ 0,
(action(std::forward<Ts>(ts)), 0)...
};
}
struct ThisThread
{
struct Impl
{
Impl()
{
emit("ThisThread created on thread ", std::this_thread::get_id(), '\n');
}
~Impl()
{
emit("ThisThread destroyed on thread ", std::this_thread::get_id(), '\n');
}
void foo()
{
emit("foo on thread ", std::this_thread::get_id(), '\n');
}
};
decltype(auto) foo() { return get_impl().foo(); }
private:
static Impl& get_impl() { return impl_; }
static thread_local Impl impl_;
};
struct ThisThreadInit
{
ThisThreadInit();
~ThisThreadInit();
static int initialised;
};
extern ThisThread& thisThread;
static ThisThreadInit thisThreadInit;
// cppfile
static std::aligned_storage_t<sizeof(ThisThread), alignof(ThisThread)> storage;
ThisThread& thisThread = *reinterpret_cast<ThisThread*>(std::addressof(storage));
int ThisThreadInit::initialised;
thread_local ThisThread::Impl ThisThread::impl_;
ThisThreadInit::ThisThreadInit()
{
if (0 == initialised++)
{
new (std::addressof(storage)) ThisThread ();
}
}
ThisThreadInit::~ThisThreadInit()
{
if (0 == --initialised)
{
thisThread.~ThisThread();
}
}
// now use the object
#include <thread>
int main()
{
thisThread.foo();
auto t = std::thread([]{ thisThread.foo(); });
t.join();
}
example output:
ThisThread created on thread 140475785611072
foo on thread 140475785611072
ThisThread created on thread 140475768067840
foo on thread 140475768067840
ThisThread destroyed on thread 140475768067840
ThisThread destroyed on thread 140475785611072

This does not answer how to make Schwarz counter for thread_local static 's (so I don't accept this as answer). But in the end, I came up with this platform-dependent(Linux/Android) solution.
#include <jni.h>
#include <cassert>
#include "JavaVM.h"
namespace jni_interface{
class ThisThread{
inline static thread_local pthread_key_t p_key;
static void pthread_dstr(void *arg){
if (!jni_env) return;
java_vm->DetachCurrentThread();
jni_env = nullptr;
pthread_setspecific(p_key, NULL);
pthread_key_delete(p_key);
}
static void register_dstr(void *arg){
{
const int res = pthread_key_create(&p_key, pthread_dstr);
assert(res != EAGAIN);
assert(res != ENOMEM);
assert(res == 0);
}
{
const int res = pthread_setspecific(p_key, arg);
assert(res == 0);
}
}
inline static thread_local JNIEnv* jni_env{nullptr};
public:
JNIEnv* getEnv(){
if (!jni_env){
assert(java_vm);
java_vm->GetEnv((void**)&jni_env, JNI_VERSION);
java_vm->AttachCurrentThread(&jni_env, NULL); // safe to call in main thread
register_dstr(jni_env);
}
return jni_env;
}
};
static thread_local ThisThread this_thread;
}
Even if by some reason, pthread_dstr will be called before C++'s static thread_locals (or interleaved) [in other words ThisThread destroyed before last use], on next call to object (getEnv()) we kinda re-init/re-create it and register pthread_dstr for another round.
N.B. At total maximum we can have PTHREAD_DESTRUCTOR_ITERATIONS rounds, which is 4. But we always will end up on a second one, at worst case (if C++ thread_local implementation will use p_thread destructors [which will mean that OUR pthread_dstr may not be called last in the first round]).

Related

Writing a thread safe optimized Datastore class

I am trying to write a thread safe datastore class.
This class object is shared with between many threads in Generator and Consumer, where the class members can be set or get.
By calling setDatastore() the object is set for usage at different threads.
Below is my code,
#ifndef IF_DATA_STORE_H
#define IF_DATA_STORE_H
#include <mutex>
#include <shared_mutex>
#include <memory>
class DataType1{public:int value;};
class DataType2{public:int value;};
class DataStore
{
public:
DataStore(): _member1(), _member2(){}
~DataStore(){}
// for member1
void setMember1(const DataType1& val)
{
std::unique_lock lock(_mtx1); // no one can read/write!
_member1 = val;
}
const DataType1& getMember1() const
{
std::shared_lock lock(_mtx1); // multiple threads can read!
return _member1;
}
// for member2
void setMember2(const DataType2& val)
{
std::unique_lock lock(_mtx2); // no one can read/write!
_member2 = val;
}
const DataType2& getMember2() const
{
std::shared_lock lock(_mtx2); // multiple threads can read!
return _member2;
}
private:
mutable std::shared_mutex _mtx1;
mutable std::shared_mutex _mtx2;
DataType1 _member1;
DataType2 _member2;
// different other member!
};
// now see where data is generated/consumed!
class Generator
{
public:
void start(){/* start thread!*/}
void setDataStore(std::shared_ptr<DataStore> store)
{
_store = store;
}
void threadRoutine() //this is called from different thread and updating values
{
// some code...
{
_data.value = 10; // keep a local updated copy of data!
_store->setMember1(_data);
}
}
private:
std::shared_ptr<DataStore> _store;
DataType1 _data;
};
class Consumer
{
public:
void start(){/* start thread!*/}
void setDataStore(std::shared_ptr<DataStore> store)
{
_store = store;
}
void threadRoutine() // running a check on datastore every 1sec
{
// some code...
auto val = _store->getMember1();
// do something..
}
private:
std::shared_ptr<DataStore> _store;
};
// fianlly start all!
int main()
{
// somewhere in main thread
std::shared_ptr<DataStore> store;
Consumer c; Generator g;
c.setDataStore(store); c.start();
g.setDataStore(store); g.start();
}
#endif
Questions:
Is there any other way than creating multiple shared mutex for each member?
In Generator.threadRoutine() if I keep a local copy of DataType1 does this cause high memory issues (I see high cpu and memory) when this block called frequently, don't if this is the root cause of it.
Any other better way suggested?

Smart pointers Object pool test application fails at finish/exit

I have written a multi-threaded app in Qt/C++11 , Windows.
The idea was to have and recycle some strings from a pool, using smart pointers.
Here is stringpool.cpp:
#include "stringpool.h"
QMutex StringPool::m_mutex;
int StringPool::m_counter;
std::stack<StringPool::pointer_type<QString>> StringPool::m_pool;
StringPool::pointer_type<QString> StringPool::getString()
{
QMutexLocker lock(&m_mutex);
if (m_pool.empty())
{
add();
}
auto inst = std::move(m_pool.top());
m_pool.pop();
return inst;
}
void StringPool::add(bool useLock, QString * ptr)
{
if(useLock)
m_mutex.lock();
if (ptr == nullptr)
{
ptr = new QString();
ptr->append(QString("pomo_hacs_%1").arg(++m_counter));
}
StringPool::pointer_type<QString> inst(ptr, [this](QString * ptr) { add(true, ptr); });
m_pool.push(std::move(inst));
if(useLock)
m_mutex.unlock();
}
And here is stringpool.h:
#pragma once
#include <QMutex>
#include <QString>
#include <functional>
#include <memory>
#include <stack>
class StringPool
{
public:
template <typename T> using pointer_type = std::unique_ptr<T, std::function<void(T*)>>;
//
StringPool() = default;
pointer_type<QString> getString();
private:
void add(bool useLock = false, QString * ptr = nullptr);
//
static QMutex m_mutex;
static int m_counter;
static std::stack<pointer_type<QString>> m_pool;
};
And here is the test app:
#include <QtCore>
#include "stringpool.h"
static StringPool Pool;
class Tester : public QThread
{
public:
void run() override
{
for(int i = 0; i < 20; i++)
{
{
auto str = Pool.getString();
fprintf(stderr, "Thread %p : %s \n", QThread::currentThreadId(), str->toUtf8().data());
msleep(rand() % 500);
}
}
fprintf(stderr, "Thread %p : FINITA! \n", QThread::currentThreadId());
}
};
#define MAX_TASKS_NBR 3
int main(int argc, char *argv[])
{
QCoreApplication app(argc, argv);
Tester tester[MAX_TASKS_NBR];
for(auto i = 0; i < MAX_TASKS_NBR; i++)
tester[i].start();
for(auto i = 0; i < MAX_TASKS_NBR; i++)
tester[i].wait();
//
return 0;
}
It compiles ok, it runs and produces the following result:
Well, the idea is that the app runs (apparently) OK.
But immediately after it finishes, I have this error:
Does anyone have an idea how can I fix this?
The reason for this error has to do with the smart pointer and not the multithreading.
You define pointer_type as an alias for unique_ptr with a custom deleter
template <typename T> using pointer_type = std::unique_ptr<T, std::function<void(T*)>>;
You create strings with custom deleters
void StringPool::add(bool useLock, QString * ptr)
{
if (ptr == nullptr)
{
ptr = new QString();
ptr->append(QString("pomo_hacs_%1").arg(++m_counter));
}
StringPool::pointer_type<QString> inst(ptr, [this](QString * ptr) { add(true, ptr); }); // here
m_pool.push(std::move(inst));
}
At the end of the program, m_pool goes out of scope and runs its destructor.
Consider the path of execution...m_pool will try to destroy all its members. For each member, the custom deleter. The custom deleter calls add. add pushes the pointer to the stack.
Logically this is an infinite loop. But it's more likely to create some kind of undefined behavior by breaking the consistency of the data structure. (i.e. The stack shouldn't be pushing new members while it is being destructed). An exception might occur due to function stack overflow or literal stack overflow (heh) when there is not enough memory to add to the stack data structure. Since the exception occurs in a destructor unhandled, it ends the program immediately. But it could also very likely be a seg fault due to the pushing while destructing.
Fixes:
I already didn't like your add function.
StringPool::pointer_type<QString> StringPool::getString()
{
QMutexLocker lock(&m_mutex);
if (m_pool.empty())
{
auto ptr = new QString(QString("pomo_hacs_%1").arg(++m_counter));
return pointer_type<QString>(ptr, [this](QString* ptr) { reclaim(ptr); });
}
auto inst = std::move(m_pool.top());
m_pool.pop();
return inst;
}
void StringPool::reclaim(QString* ptr)
{
QMutexLocker lock(&m_mutex);
if (m_teardown)
delete ptr;
else
m_pool.emplace(ptr, [this](QString* ptr) { reclaim(ptr); });
}
StringPool::~StringPool()
{
QMutexLocker lock(&m_mutex);
m_teardown = true;
}
StringPool was a static class but with this fix it must now be a singleton class.
It might be tempting to pull m_teardown out of the critical section, but it is shared data, so doing will open the door for race conditions. As a premature optimization, you could make m_teardown an std::atomic<bool> and perform a read check before entering the critical section (can skip the critical section if false) but this requires 1) you check the value again in the critical section and 2) you change from true to false exactly once.

Mutex inside templated struct causes segmentation fault

For exchanging data between classes, I use a kind of "main-hub-class", from which each other class can access the data.
Now, to make this thread-safe I came up with a templated struct that holds a variable and a boost::shared_mutex for that variable:
class DataExchange {
[...]
template <typename T>
struct ShareDataEntry {
T value;
boost::shared_mutex _mutex;
};
SharedDataEntry<int> ultraSonicValue;
[...]
}
In the .cpp I am trying to use that like this:
void DataExchange::setUltrasSonicValue(int _value) {
boost::unique_lock<boost::shared_mutex> lock ( ultraSonicValue._mutex ); // <-- this segfaults
ultraSonicValue.value = _value;
lock.unlock();
}
From gdb, I get the error
__GI____pthread_mutex_lock (mutex=0x58) at pthread_mutex_lock.c:66
66 pthread_mutex_lock.c: No such file or directory
What am I doing wrong? My guess is that the mutex isn't initialized? But how (and where) would I do that?
EDIT
Updated code sample, now showing everything I use, also with a test for the problem I described:
DataExchange.hpp:
#pragma once
#include <boost/thread.hpp>
class DataExchange {
private:
DataExchange();
DataExchange(DataExchange const&) {};
DataExchange& operator=(DataExchangeconst&) { return *instance; };
static DataExchange* instance;
template <typename T>
struct ShareDataEntry {
T value;
boost::shared_mutex _mutex;
};
// simple int with extra mutex
int testIntOne;
boost::shared_mutex testIntOne_M;
// int in my struct
SharedDataEntry<int> testIntTwo;
public:
static DataExchange* getInstance();
~DataExchange() { delete instance; };
void setTestIntOne(int _tmp);
int getTestIntOne();
void setTestIntTwo(int _tmp);
int getTestIntTwo();
}
DataExchange.cpp:
#include "infrastructure/DataExchange.hpp"
DataExchange* DataExchange::instance = NULL;
DataExchange::DataExchange() {};
DataExchange* DataExchange::getInstance() {
if (instance == NULL) instance = new DataExchange;
return instance;
}
void DataExchange::setTestIntOne(int _tmp) {
boost::unique_lock<boost::shared_mutex> lock ( testIntOne_M ); // this is now where the segfault occurs
testIntOne = _tmp;
lock.unlock();
}
int DataExchange::getTestIntOne() {
boost::shared_lock<boost::shared_mutex> lock ( testIntOne_M );
return testIntOne;
}
void DataExchange::setTestIntTwo(int _tmp) {
boost::unique_lock<boost::shared_mutex> lock ( testIntTwo._mutex );
testIntTwo.value = _tmp;
lock.unlock();
}
int DataExchange::getTestIntTwo() {
boost::shared_lock<boost::shared_mutex> lock ( testIntTwo._mutex );
return testIntTwo.value;
}
main.cpp:
#inlcude "infarstructure/DataExchange.hpp"
int main(int argc, char *argv[]) {
DataExchange* dataExchange = DataExchange::getInstance();
// this line segfaults already, altough I was pretty sure it worked before
dataExchange->setTestIntOne(5);
cout << dataExchange->getTestIntOne() << "\n";
dataExchange->setTestIntTwo(-5);
cout << dataExchange->getTestIntTwo() << "\n";
return 0;
}
Does it segfault because the mutex wasn't initialized?
Also, I am very sure it worked earlier, at least the first way (without the struct).
Second Edit:
Alright, everything is working fine now. It was a stupid mistake on my part. Both approaches work flawlessly - as long as one initializes the DataExchange object.

Cast to self pointer in static method throws segfault on (derived) method call

I am trying to implement a simple thread starter class. Below you find a Simple base class implementation and 2 derived variations that are supposed to work as starters. The first one throws segfaults at static void* Threaded::run (void* self) sporadically. I suppose this might a pointer issue but I am not able to figure out why?
Does this in Threaded::start point to a wrong address or is there any other issue with my first derivation?
This is how it's used:
Thread thread (ptr_to_some_obj);
thread.start (&this_obj::callback);
thread.detach ();
Simple base class
class Threaded
{
public:
Threaded () {/* empty */}
virtual ~Threaded () {/* empty */}
/** Returns true if the thread was successfully started, false if there was an error starting the thread */
bool start ()
{
return (pthread_create (&_thread, NULL, run, this) == 0);
}
/** Implement this method in your subclass with the code which allows to gently stop execution. */
virtual void stop () = 0;
/** Will not return until the internal thread has exited. */
void wait ()
{
(void) pthread_join (_thread, NULL);
}
bool detach ()
{
return (pthread_detach (_thread) == 0);
}
protected:
/** Implement this method in your subclass with the code you want your thread to run. */
virtual void run () = 0;
static void* run (void* self)
{
((Threaded*) self) -> run ();
return NULL;
}
pthread_t _thread;
};
Derived class 1 (throws segfault at ((Threaded*) self) -> run (); above)
typedef void (*staticcall)(void*);
class Thread : public Threaded
{
public:
Thread (void* passthru)
:_call (NULL)
{
_passthru = passthru;
}
~Thread () { /* empty */ }
bool start (staticcall call)
{
_call = call;
assert (_call);
return start ();
}
void stop ()
{
// nothing
}
protected:
void run ()
{
(_call) (_passthru);
}
bool start ()
{
return Threaded::start ();
}
private:
Thread () { };
void* _passthru;
staticcall _call;
};
Derived class 2 (works, but i'd rather have Derived class 1 implementation)
typedef void (*staticcall)(void*);
class Thread2 : public Threaded
{
public:
Thread2 (void* passthru)
{
_passthru = passthru;
}
~Thread2 () { /* empty */ }
bool start (staticcall call)
{
_call = call;
assert (_call);
return start ();
}
void stop ()
{
// nothing
}
protected:
void run () { }
static void* run2 (void*)
{
(_call) (_passthru);
return NULL;
}
bool start ()
{
return (pthread_create (&_thread, NULL, run2, NULL) == 0);
}
private:
Thread2 () { };
static void* _passthru;
static staticcall _call;
};
void* Thread2::_passthru;
staticcall Thread2::_call;
As pointed out by molbdnilo:
pthread_create only queues the new thread. There are no guarantees regarding when the thread function will be called, and thread must be alive at that time.
Since I do not want to keep a list of spawned threads around I solved this with the use of pthread_cond_wait and pthread_cond_signal. The spawner will wait for a signal that is emitted by the method that runs in the thread. This way the thread creator won't destroy the thread object before the to-be-threaded method is called.
class ThreadSpawner
{
public:
ThreadSpawner ()
{
pthread_mutex_init (&MtxThreadStarter, 0);
pthread_cond_init (&CondThreadStarter, 0);
}
~ThreadSpawner ()
{
pthread_cond_destroy (&CondThreadStarter);
pthread_mutex_destroy (&MtxThreadStarter);
}
void spawn ()
{
Thread thread (pass_object);
pthread_mutex_lock (&MtxThreadStarter);
if (thread.start (&ThreadSpawner::callback))
{
// wait here for signal
pthread_cond_wait (&CondThreadStarter, &MtxThreadStarter);
thread.detach ();
}
pthread_mutex_unlock (&MtxThreadStarter);
}
static void callback (void* passthru)
{
// send signal to thread spawner
pthread_mutex_lock (&MtxThreadStarter);
pthread_cond_signal (&CondThreadStarter);
pthread_mutex_unlock (&MtxThreadStarter);
// do threaded work
}
private:
static pthread_mutex_t MtxThreadStarter;
static pthread_cond_t CondThreadStarter;
}
pthread_mutex_t ThreadSpawner::MtxThreadStarter = PTHREAD_MUTEX_INITIALIZER;
pthread_cond_t ThreadSpawner::CondThreadStarter = PTHREAD_COND_INITIALIZER;
Edit: a solution to let a thread execute as a method invokation
Well the solution I thought about in the recent discussion would work if the thread entry point was a simple function.
However, I suppose the idea is more to take advantage of an actual object, so that the thread body is actually an invokation of the body() method.
This is more tricky, since there must be a live instance of the derived class for the whole thread duration, and yet the original instance is likely to get out of scope after a start / detach sequence.
One possible (though somewhat costly) solution is to have the thread body stub create a local copy of the original instance on the stack. So the thread launcher will construct a thread object, and the thread itself will copy it.
With this system, you only need to make sure the original instance will be kept live in the interval between pthread_create and thread activation by the scheduler.
This requires a semaphore (which will be done by hand with a mutex/cond. var. pair for the 1.000.000th time, since bloody C++11 does not have one in store).
To hide this messy code inside the base class, you need to downcast the base pointer into the appropriate subclass type.
I resorted to templating the base class, though there might be smarter solutions out there. I just could not think of any.
To test the solution, I use a counter system that detects whether the original Thread instance has been deleted before the thread stub could make a local copy.
The SYNC compilation flag activates the semaphore. The expected program output is 0->0. If other numbers appear, it means some threads ran on messed-up instances.
I tested it on Ubuntu in a VM, and it seemed to work well enough.
#include <cstdlib>
#include <cstdio>
#include <cassert>
#include <thread> // sleep_for
#include <chrono> // milliseconds
#include <pthread.h>
#define SYNC // undefine this to see what happens without synchronization
typedef void *(*tEntryPoint) (void *);
#include <mutex>
#include <condition_variable>
class semaphore {
private:
std::mutex m;
std::condition_variable v;
int c;
public:
semaphore (int count = 0):c(count){}
void V()
{
#ifdef SYNC
std::unique_lock<std::mutex> l(m);
c++;
v.notify_one();
#endif
}
void P()
{
#ifdef SYNC
std::unique_lock<std::mutex> l(m);
while (c == 0) v.wait(l);
c--;
#endif
}
};
template<typename Derived>
class Threaded
{
public:
/** Returns true if the thread was successfully started, false if there was an error starting the thread */
bool start(void)
{
destructor_guard = new semaphore();
bool res = (pthread_create(&_thread, NULL, (tEntryPoint)entry_point, this) == 0);
if (res) destructor_guard->P(); // wait fot thread to start execution
delete destructor_guard;
return res;
}
/** This optional method will be executed after the thread main body */
virtual void stop() {}
/** Will not return until the internal thread has exited. */
void wait()
{
(void)pthread_join(_thread, NULL);
}
/** Will let the underlying task run independently */
bool detach()
{
return (pthread_detach(_thread) == 0);
}
private:
static void * entry_point(Derived * self)
{
Derived local_self = *self;
local_self.destructor_guard->V(); // original can be deleted
local_self.body();
local_self.stop();
return NULL;
}
pthread_t _thread;
semaphore* destructor_guard;
};
#define NUM_THREADS 9
#define REPEAT 3000
static int signature[NUM_THREADS + 1] = { 0, };
class Thread : public Threaded<Thread>
{
unsigned id;
public:
Thread(unsigned id) : id(id) {}
~Thread() { id = 0; }
void body(void)
{
signature[id%(NUM_THREADS+1)]++;
}
void stop(void)
{
std::this_thread::sleep_for(std::chrono::milliseconds(10));
signature[id%(NUM_THREADS+1)]++;
}
};
void launch_a_thread(int id)
{
Thread thread (id);
if (thread.start())
{
// thread.wait();
thread.detach();
}
}
int main(void)
{
for (unsigned i = 0; i != REPEAT*NUM_THREADS; i++) launch_a_thread(1+i%NUM_THREADS);
std::this_thread::sleep_for(std::chrono::milliseconds(100)); // leave enough time for free running threads to terminate
for (int i = 0 ; i <= NUM_THREADS ; i++) if (signature[i] != 2*REPEAT) printf ("%d -> %d\n", i, signature[i]);
return 0;
}

Static initialization in c++ and thread safety

Static initialization of class instances is not thread-safe. The code below is an example of what not to do :
extern int computesomething();
class cachedcomputation
{
public:
cachedcomputation()
{
result = computesomething();
}
int result;
};
void usecached()
{
static cachedcomputation c;
// use of c.result - may break
}
However, would the code below be thread-safe ? (Ignoring the ugliness of the solution) When or why would it break ?
extern int computesomething();
class cachedcomputation
{
public:
cachedcomputation()
{
if(0==InterlockedCompareExchange(&mutex, 1, 0))
{
// first thread
result = computesomething();
InterlockedExchange(&mutex, 2);
}
else
{
// other thread - spinlock until mutex==2
while(2!=InterlockedCompareExchange(&mutex, 2, 2)){}
}
}
int result;
private:
long mutex;
};
void usecached()
{
static cachedcomputation c;
// use of c.result - ???
}
You need to:
initialize your "mutex"
reset "mutex" at the end of if block: InterlockedExchange(&mutex, 0)
optionally convert if-statement to while, so your code would block until "mutex" is unlocked