Wait until a variable becomes zero - c++

I'm writing a multithreaded program that can execute some tasks in separate threads.
Some operations require waiting for them at the end of execution of my program. I've written simple guard for such "important" operations:
class CPendingOperationGuard final
{
public:
CPendingOperationGuard()
{
InterlockedIncrementAcquire( &m_ullCounter );
}
~CPendingOperationGuard()
{
InterlockedDecrementAcquire( &m_ullCounter );
}
static bool WaitForAll( DWORD dwTimeOut )
{
// Here is a topic of my question
// Return false on timeout
// Return true if wait was successful
}
private:
static volatile ULONGLONG m_ullCounter;
};
Usage is simple:
void ImportantTask()
{
CPendingOperationGuard guard;
// Do work
}
// ...
void StopExecution()
{
if(!CPendingOperationGuard::WaitForAll( 30000 )) {
// Handle error
}
}
The question is: how to effectively wait until a m_ullCounter becames zero or until timeout.
I have two ideas:
To launch this function in another separate thread and write WaitForSingleObject( hThread, dwTimeout ):
DWORD WINAPI WaitWorker( LPVOID )
{
while(InterlockedCompareExchangeRelease( &m_ullCounter, 0, 0 ))
;
}
But it will "eat" almost 100% of CPU time - bad idea.
Second idea is to allow other threads to start:
DWORD WINAPI WaitWorker( LPVOID )
{
while(InterlockedCompareExchangeRelease( &m_ullCounter, 0, 0 ))
Sleep( 0 );
}
But it'll switch execution context into kernel mode and back - too expensive in may task. Bad idea too
The question is:
How to perform almost-zero-overhead waiting until my variable becames zero? Maybe without separate thread... The main condition is to support stopping of waiting by timeout.
Maybe someone can suggest completely another idea for my task - to wait for all registered operations (like in WinAPI's ThreadPools - its API has, for instance, WaitForThreadpoolWaitCallbacks to perform waiting for ALL registered tasks).
PS: it is not possible to rewrite my code with ThreadPool API :(

Have a look at the WaitOnAddress() and WakeByAddressSingle()/WakeByAddressAll() functions introduced in Windows 8.
For example:
class CPendingOperationGuard final
{
public:
CPendingOperationGuard()
{
InterlockedIncrementAcquire(&m_ullCounter);
Wake­By­Address­All(&m_ullCounter);
}
~CPendingOperationGuard()
{
InterlockedDecrementAcquire(&m_ullCounter);
Wake­By­Address­All(&m_ullCounter);
}
static bool WaitForAll( DWORD dwTimeOut )
{
ULONGLONG Captured, Now, Deadline = GetTickCount64() + dwTimeOut;
DWORD TimeRemaining;
do
{
Captured = InterlockedExchangeAdd64((LONG64 volatile *)&m_ullCounter, 0);
if (Captured == 0) return true;
Now = GetTickCount64();
if (Now >= Deadline) return false;
TimeRemaining = static_cast<DWORD>(Deadline - Now);
}
while (WaitOnAddress(&m_ullCounter, &Captured, sizeof(ULONGLONG), TimeRemaining));
return false;
}
private:
static volatile ULONGLONG m_ullCounter;
};
Raymond Chen wrote a series of blog articles about these functions:
WaitOnAddress lets you create a synchronization object out of any data variable, even a byte
Implementing a critical section in terms of WaitOnAddress
Spurious wakes, race conditions, and bogus FIFO claims: A peek behind the curtain of WaitOnAddress
Extending our critical section based on WaitOnAddress to support timeouts
Comparing WaitOnAddress with futexes (futexi? futexen?)
Creating a semaphore from WaitOnAddress
Creating a semaphore with a maximum count from WaitOnAddress
Creating a manual-reset event from WaitOnAddress
Creating an automatic-reset event from WaitOnAddress
A helper template function to wait for WaitOnAddress in a loop

you need for this task something like Run-Down Protection instead CPendingOperationGuard
before begin operation, you call ExAcquireRundownProtection and only if it return TRUE - begin execute operation. at the end you must call ExReleaseRundownProtection
so pattern must be next
if (ExAcquireRundownProtection(&RunRef)) {
do_operation();
ExReleaseRundownProtection(&RunRef);
}
when you want stop this process and wait for all active calls do_operation(); finished - you call ExWaitForRundownProtectionRelease (instead WaitWorker)
After ExWaitForRundownProtectionRelease is called, the ExAcquireRundownProtection routine will return FALSE (so new operations will not start after this). ExWaitForRundownProtectionRelease waits to return until all calls the ExReleaseRundownProtection routine to release the previously acquired run-down protection (so when all current(if exist) operation complete). When all outstanding accesses are completed, ExWaitForRundownProtectionRelease returns
unfortunately this api implemented by system only in kernel mode and no analog in user mode. however not hard implement such idea yourself
this is my example:
enum RundownState {
v_complete = 0, v_init = 0x80000000
};
template<typename T>
class RundownProtection
{
LONG _Value;
public:
_NODISCARD BOOL IsRundownBegin()
{
return 0 <= _Value;
}
_NODISCARD BOOL AcquireRP()
{
LONG Value, NewValue;
if (0 > (Value = _Value))
{
do
{
NewValue = InterlockedCompareExchangeNoFence(&_Value, Value + 1, Value);
if (NewValue == Value) return TRUE;
} while (0 > (Value = NewValue));
}
return FALSE;
}
void ReleaseRP()
{
if (InterlockedDecrement(&_Value) == v_complete)
{
static_cast<T*>(this)->RundownCompleted();
}
}
void Rundown_l()
{
InterlockedBitTestAndResetNoFence(&_Value, 31);
}
void Rundown()
{
if (AcquireRP())
{
Rundown_l();
ReleaseRP();
}
}
RundownProtection(RundownState Value = v_init) : _Value(Value)
{
}
void Init()
{
_Value = v_init;
}
};
///////////////////////////////////////////////////////////////
class OperationGuard : public RundownProtection<OperationGuard>
{
friend RundownProtection<OperationGuard>;
HANDLE _hEvent;
void RundownCompleted()
{
SetEvent(_hEvent);
}
public:
OperationGuard() : _hEvent(0) {}
~OperationGuard()
{
if (_hEvent)
{
CloseHandle(_hEvent);
}
}
ULONG WaitComplete(ULONG dwMilliseconds = INFINITE)
{
return WaitForSingleObject(_hEvent, dwMilliseconds);
}
ULONG Init()
{
return (_hEvent = CreateEvent(0, 0, 0, 0)) ? NOERROR : GetLastError();
}
} g_guard;
//////////////////////////////////////////////
ULONG CALLBACK PendingOperationThread(void*)
{
while (g_guard.AcquireRP())
{
Sleep(1000);// do operation
g_guard.ReleaseRP();
}
return 0;
}
void demo()
{
if (g_guard.Init() == NOERROR)
{
if (HANDLE hThread = CreateThread(0, 0, PendingOperationThread, 0, 0, 0))
{
CloseHandle(hThread);
}
MessageBoxW(0, 0, L"UI Thread", MB_ICONINFORMATION|MB_OK);
g_guard.Rundown();
g_guard.WaitComplete();
}
}
why simply wait when wait until a m_ullCounter became zero not enough
if we read 0 from m_ullCounter this mean only at this time no active operation. but pending operation can begin already after we check that m_ullCounter == 0 . we can use special flag (say bool g_bQuit) and set it. operation before begin check this flag and not begin if it true. but this anyway not enough
naive code:
//worker thread
if (!g_bQuit) // (1)
{
// MessageBoxW(0, 0, L"simulate delay", MB_ICONWARNING);
InterlockedIncrement(&g_ullCounter); // (4)
// do operation
InterlockedDecrement(&g_ullCounter); // (5)
}
// here we wait for all operation done
g_bQuit = true; // (2)
// wait on g_ullCounter == 0, how - not important
while (g_ullCounter) continue; // (3)
pending operation checking g_bQuit flag (1) - it yet false, so it
begin
worked thread is swapped (use MessageBox for simulate this)
we set g_bQuit = true; // (2)
we check/wait for g_ullCounter == 0, it 0 so we exit (3)
working thread wake (return from MessageBox) and increment
g_ullCounter (4)
problem here that operation can use some resources which we already begin destroy after g_ullCounter == 0
this happens because check quit flag (g_Quit) and increment counter after this not atomic - can be a gap between them.
for correct solution we need atomic access to flag+counter. this and do rundown protection. for flag+counter used single LONG variable (32 bit) because we can do atomic access to it. 31 bits used for counter and 1 bits used for quit flag. windows solution use 0 bit for flag (1 mean quit) and [1..31] bits for counter. i use the [0..30] bits for counter and 31 bit for flag (0 mean quit). look for

Related

What C++'s equivalent to winapi's MsgWaitForMultipleObjectsEx

I'm making the transition from using native Win32 API calls to manage my thread's message queue to using my own C++ code. I have encountered a question which I can't fully answer.
Given the following code snippet
LRESULT QueueConsumeThread()
{
MSG msg = { 0 };
HANDLE hHandles[] = { hHandle1, hHandle2 };
while (true)
{
DWORD dwRes;
switch (dwRes = ::MsgWaitForMultipleObjects(_countof(hHandles), hHandles, FALSE, INFINITE, QS_ALLEVENTS))
{
case WAIT_OBJECT_0 :
DoSomething();
break;
case WAIT_OBJECT_0 + 1:
DoSomething2();
break;
case WAIT_OBJECT_0 + _countof(hHandles):
ATLASSERT(msg.message == WM_QUIT);
return 1;
}
}
return 1;
}
I have read in many sources that a particular thread should be a associated with a single condition_variable, also that using multiple condition_variables or invoking wait_for() or wait_until() doesn't sound too efficient.
The following source suggested implementing a safe_queue using condition_variables. I guess that PeekMessage/GetMessage/MsgWaitForMultipleObject work similarly, but what kind of data should each cell of the queue hold and be able to receive event signals?
Edit: I'm asking this as I have to write a cross-platform application.
Contrary to windows synhronization events (which can be in signalled state) std::condition_variable is decoupled from the state. So, the most natural approach would be to define several conditions and wait/report them with the single condition_variable:
std::unique_lock<std::mutex> lock(m);
cv.wait(lock, []{ return ready1 || ready2 || ready3; });
if (ready1) { ... }
if (ready2) { ... }
if (ready3) { ... }
std::unique_lock<std::mutex> lock(m);
ready1 = true;
cv.notify_one();

Sleeping thread and thread initialization inside constructor

Im trying to make a thread run out of a ctor , the thread should sleep , wake up and then perform a buffer dump and then sleep again and so on this is the code for the ctor:
Logger::Logger()
{
BufferInUse = &CyclicBuffer1; //buffer 1 will be used at beggining
MaxBufferSize = 5; //initial state
NumOfCycles = 0;
CurrentMaxStringLength = 0;
position = BufferInUse->end();
OutPutMethod = odBuffer; //by default
Thresh = 1; //by default
hTimer = CreateWaitableTimer(NULL, TRUE, NULL);
EventTime.QuadPart = -20000000; //1 second by default
Mutex = CreateMutex(NULL,FALSE,NULL);
if (Mutex == NULL)
{
OutputDebugStringA("CreateMutex error! the Logger will close \n");
return ;
}
_beginthread( Logger::WorkerThread , 0,(void*)this ); //run the thread
}
when I debug it , it takes lots of time for the thread to even be created and finish the ctor function but in that time my object member functions get called lots of times (i see it when debugging).
1.I want the thread to be created before my member functions get called, what is the best way to achieve that?
now my thread implementation is:
void Logger::WorkerThread ( void *lpParam )
{
Logger *log = static_cast <Logger*> (lpParam);
if (NULL == log->hTimer)
{
log->LogStringToOutput("CreateWaitableTimer() failed , Logger will close \n");
return;
}
for(;;)
{
//set timer for time specified by the EventTime variable inside the Logger
if (!SetWaitableTimer(log->hTimer, & (log->EventTime), 0, NULL, NULL, 0))
{
log->LogStringToOutput("SetWaitableTimer() failed , Logger will close\n" );
_endthread();
}
//wait for timer
if (WaitForSingleObject(log->hTimer, INFINITE) != WAIT_OBJECT_0)
{
log->LogStringToOutput("WaitForSingleObject() failed! Logger will close\n");
_endthread();
return;
}
if(log->getOutputMethod() == odBuffer && log->BufferInUse->size() >= log->Thresh && !log->BufferInUse->empty())
{
TTFW_LogRet ret;
ret = log->FullBufferDump();
if (ret != SUCCESS)
{
log->LogStringToOutput("Error occured in dumping cyclic buffer , the buffer will be cleared\n");
}
}
}
}
is there more elegant implementation of this thread functionality?
you need some mechanism to synchronous WorkerThread starting and member function access.
for example, use a condition variable (documents in msdn):
add 3 member to Logger:
class Logger{
...
private:
CRITICAL_SECTION CritSection;
CONDITION_VARIABLE ConditionVar;
bool WorkerThreadStarted;
...
};
and
Logger::Logger():WorkerThreadStarted(false)
{
EnterCriticalSection(&CritSection); //added
BufferInUse = &CyclicBuffer1; //buffer 1 will be used at beggining
...
}
void Logger::WorkerThread ( void *lpParam )
{
WorkerThreadStarted=true; //added
LeaveCriticalSection(&CritSection);
Logger *log = static_cast <Logger*> (lpParam);
if (NULL == log->hTimer)
{
log->LogStringToOutput("CreateWaitableTimer() failed , Logger will close \n");
return;
}
...
}
add such a function:
void Logger::EnsureInitiallized(){
EnterCriticalSection(&CritSection);
// Wait until the predicate is TRUE
while( !WorkerThreadStarted )
{
SleepConditionVariableCS(&ConditionVar, &CritSection, INFINITE);
}
LeaveCriticalSection(&CritSection);
}
and at every member function's entry, call EnsureInitiallized();
void Logger::yourFunction(){
EnsureInitiallized();
...
}
that is a example , you can also use a read_write lock , a atomic integer etc

EnterCriticalSection Deadlocking

I found some code that claimed to be able to make a thread sleep for an accurate amount of time. Testing the code out, it seems to work great, however it always deadlocks after a short amount of time.
Here is the original code. I put prints before entering and leaving the critical section, and saw that sometimes it leaves or enters twice in a row. It seems to deadlock at the EnterCriticalSection call within the Wait function.
Is there a way I can modify this code to retain its functionality while not deadlocking?
//----------------------------------------------------------------
class PreciseTimer
{
public:
PreciseTimer() : mRes(0), toLeave(false), stopCounter(-1)
{
InitializeCriticalSection(&crit);
mRes = timeSetEvent(1, 0, &TimerProc, (DWORD)this,
TIME_PERIODIC);
}
virtual ~PreciseTimer()
{
mRes = timeKillEvent(mRes);
DeleteCriticalSection(&crit);
}
///////////////////////////////////////////////////////////////
// Function name : Wait
// Description : Waits for the required duration of msecs.
// : Timer resolution is precisely 1 msec
// Return type : void :
// Argument : int timeout : timeout in msecs
///////////////////////////////////////////////////////////////
void Wait(int timeout)
{
if ( timeout )
{
stopCounter = timeout;
toLeave = true;
// this will do the actual delay - timer callback shares
// same crit section
EnterCriticalSection(&crit);
LeaveCriticalSection(&crit);
}
}
///////////////////////////////////////////////////////////////
// Function name : TimerProc
// Description : Timer callback procedure that is called
// : every 1msec
// : by high resolution media timers
// Return type : void CALLBACK :
// Argument : UINT uiID :
// Argument : UINT uiMsg :
// Argument : DWORD dwUser :
// Argument : DWORD dw1 :
// Argument : DWORD dw2 :
///////////////////////////////////////////////////////////////
static void CALLBACK TimerProc(UINT uiID, UINT uiMsg, DWORD
dwUser, DWORD dw1, DWORD dw2)
{
static volatile bool entered = false;
PreciseTimer* pThis = (PreciseTimer*)dwUser;
if ( pThis )
{
if ( !entered && !pThis->toLeave ) // block section as
// soon as we can
{
entered = true;
EnterCriticalSection(&pThis->crit);
}
else if ( pThis->toLeave && pThis->stopCounter == 0 )
// leave section
// when counter
// has expired
{
pThis->toLeave = false;
entered = false;
LeaveCriticalSection(&pThis->crit);
}
else if ( pThis->stopCounter > 0 ) // if counter is set
// to anything, then
// continue to drop
// it...
--pThis->stopCounter;
}
}
private:
MMRESULT mRes;
CRITICAL_SECTION crit;
volatile bool toLeave;
volatile int stopCounter;
};
A deadlock in EnterCriticalSection() usually means that another thread called EnterCriticalSection() but never called LeaveCriticalSection().
As shown, this code is not very thread-safe (and timeSetEvent() is a threaded timer). If multiple PreciseTimer timers are running at the same time, they are using the same TimerProc() callback, and thus are sharing the same entered variable without protecting it from concurrent access. And if multiple threads call Wait() on the same PreciseTimer object at the same time, they are going to step over each other's use of the stopCounter and toLeave members, which are also not protected them from concurrent access. Even a single thread calling Wait() on a single PreciseTimer is not safe since TimerProc() runs in its own thread and stopCounter is not adequately protected.
This code is full of race conditions.

win32 thread-safe queue implementation using native windows API

Because the lack of condition variable in windows(though it is introduced since vista, it's not supported in windows XP and 2003), it is not very easy to implement a thread-safe queue in c++. Strategies for Implementing POSIX Condition Variables on Win32. What I required is to just use CriticalSection or Mutex and Event without using semaphore and condition variable.
I also tried to find an exact implementation that just using win32 native API, but no luck. So I finished one by myself. The problem is I am not 100% sure the code is thread-safe. Who can tell me it is OK or not?
class CEventSyncQueue
{
public:
CEventSyncQueue(int nCapacity = -1);
virtual ~CEventSyncQueue();
virtual void Put(void* ptr);
virtual void* Get();
protected:
int m_nCapacity;
CPtrList m_list;
CRITICAL_SECTION m_lock;
HANDLE m_hGetEvent;
HANDLE m_hPutEvent;
};
CEventSyncQueue::CEventSyncQueue(int nCapacity)
{
m_nCapacity = nCapacity;
::InitializeCriticalSection(&m_lock);
m_hPutEvent = ::CreateEvent(NULL, FALSE, FALSE, NULL);
m_hGetEvent = ::CreateEvent(NULL, FALSE, FALSE, NULL);
}
CEventSyncQueue::~CEventSyncQueue()
{
m_list.RemoveAll();
::CloseHandle(m_hGetEvent);
::CloseHandle(m_hPutEvent);
::DeleteCriticalSection(&m_lock);
}
void CEventSyncQueue::Put(void* ptr)
{
::EnterCriticalSection(&m_lock);
while(m_nCapacity > 0 && m_list.GetCount() >= m_nCapacity)
{
::LeaveCriticalSection(&m_lock);
//wait
if(::WaitForSingleObject(m_hPutEvent, INFINITE) != WAIT_OBJECT_0)
{
ASSERT(FALSE);
}
::EnterCriticalSection(&m_lock);
}
if(m_nCapacity > 0)
{
ASSERT(m_list.GetCount() < m_nCapacity);
}
m_list.AddTail(ptr);
::SetEvent(m_hGetEvent); //notifyAll
::LeaveCriticalSection(&m_lock);
}
void* CEventSyncQueue::Get()
{
::EnterCriticalSection(&m_lock);
while(m_list.IsEmpty())
{
::LeaveCriticalSection(&m_lock);
//wait
if(::WaitForSingleObject(m_hGetEvent, INFINITE) != WAIT_OBJECT_0)
{
ASSERT(FALSE);
}
::EnterCriticalSection(&m_lock);
}
ASSERT(!m_list.IsEmpty());
void* ptr = m_list.RemoveHead();
::SetEvent(m_hPutEvent); //notifyAll
::LeaveCriticalSection(&m_lock);
return ptr;
}
It's trivial to implement a thread-safe queue in Windows. I've done it in Delphi, C++, BCB etc.
Why do you think that a condition variable is required? How do you think that Windows Message Queues work?
Events are the wrong primitive to use for P-C queues. Easiest/clearest way is to use a semaphore.
Simple unbounded producer-consumer queue.
template <typename T> class PCSqueue{
CRITICAL_SECTION access;
deque<T> *objectQueue;
HANDLE queueSema;
public:
PCSqueue(){
objectQueue=new deque<T>;
InitializeCriticalSection(&access);
queueSema=CreateSemaphore(NULL,0,MAXINT,NULL);
};
void push(T ref){
EnterCriticalSection(&access);
objectQueue->push_front(ref);
LeaveCriticalSection(&access);
ReleaseSemaphore(queueSema,1,NULL);
};
bool pop(T *ref,DWORD timeout){
if (WAIT_OBJECT_0==WaitForSingleObject(queueSema,timeout)) {
EnterCriticalSection(&access);
*ref=objectQueue->back();
objectQueue->pop_back();
LeaveCriticalSection(&access);
return(true);
}
else
return(false);
};
};
Edit - a bounded queue would not be much more difficult - you need another semaphre to count the empty spaces. I don't use bounded queues, but I'm sure it would be OK - a bounded queue with 2 semaphores and a mutex/CS is s standard pattern.
Edit: Use PostMessage() or PostThreadMessage() API calls - they are explicitly declared to be safe from the 'waveOutProc' callback. MSDN says that calling 'other wave functions' will cause deadlock - semaphore calls are not in that set and I would be very surprised indeed if SetEvent() was allowed but ReleaseSemaphore() was not. In fact, I would be surprised if SetEvent() was allowed while ReleaseSemaphore() was not ANYWHERE in Windows.
On second thoughts, it's hardly necessary to explicitly implement a semaphore. Instead, just think about how you would implement a semaphore using events, and approach your the problem that way. My first attempt used manual-reset events, which was inefficient but manifestly correct, and then I optimized.
Please note that I haven't debugged (or even compiled!) either of these code fragments, but they should give you the right idea. Here's the manual-reset version:
class CEventSyncQueue
{
public:
CEventSyncQueue(int nCapacity = -1);
virtual ~CEventSyncQueue();
virtual void Put(void* ptr);
virtual void* Get();
protected:
int m_nCapacity;
CPtrList m_list;
CRITICAL_SECTION m_lock;
HANDLE m_queue_not_empty;
HANDLE m_queue_not_full;
};
CEventSyncQueue::CEventSyncQueue(int nCapacity)
{
m_nCapacity = nCapacity;
::InitializeCriticalSection(&m_lock);
m_queue_not_empty = ::CreateEvent(NULL, TRUE, FALSE, NULL);
m_queue_not_full = ::CreateEvent(NULL, TRUE, TRUE, NULL);
}
CEventSyncQueue::~CEventSyncQueue()
{
m_list.RemoveAll();
::CloseHandle(m_queue_not_empty);
::CloseHandle(m_queue_not_full);
::DeleteCriticalSection(&m_lock);
}
void CEventSyncQueue::Put(void* ptr)
{
bool done = false;
while (!done)
{
// If the queue is full, we must wait until it isn't.
if(::WaitForSingleObject(m_queue_not_full, INFINITE) != WAIT_OBJECT_0)
{
ASSERT(FALSE);
}
// However, we might not be the first to respond to the event,
// so we still need to check whether the queue is full and loop
// if it is.
::EnterCriticalSection(&m_lock);
if (m_nCapacity <= 0 || m_list.GetCount() < m_nCapacity)
{
m_list.AddTail(ptr);
done = true;
// The queue is definitely not empty.
SetEvent(m_queue_not_empty);
// Check whether the queue is now full.
if (m_nCapacity > 0 && m_list.GetCount() >= m_nCapacity)
{
ResetEvent(m_queue_not_full);
}
}
::LeaveCriticalSection(&m_lock);
}
}
void* CEventSyncQueue::Get()
{
void *result = nullptr;
while (result == nullptr)
{
// If the queue is empty, we must wait until it isn't.
if(::WaitForSingleObject(m_queue_not_empty, INFINITE) != WAIT_OBJECT_0)
{
ASSERT(FALSE);
}
// However, we might not be the first to respond to the event,
// so we still need to check whether the queue is empty and loop
// if it is.
::EnterCriticalSection(&m_lock);
if (!m_list.IsEmpty())
{
result = m_list.RemoveHead();
ASSERT(result != nullptr);
// The queue shouldn't be full at this point!
ASSERT(m_nCapacity <= 0 || m_list.GetCount() < m_nCapacity);
SetEvent(m_queue_not_full);
// Check whether the queue is now empty.
if (m_list.IsEmpty())
{
ResetEvent(m_queue_not_empty);
}
}
::LeaveCriticalSection(&m_lock);
}
}
And here's the more efficient, auto-reset events version:
class CEventSyncQueue
{
public:
CEventSyncQueue(int nCapacity = -1);
virtual ~CEventSyncQueue();
virtual void Put(void* ptr);
virtual void* Get();
protected:
int m_nCapacity;
CPtrList m_list;
CRITICAL_SECTION m_lock;
HANDLE m_queue_not_empty;
HANDLE m_queue_not_full;
};
CEventSyncQueue::CEventSyncQueue(int nCapacity)
{
m_nCapacity = nCapacity;
::InitializeCriticalSection(&m_lock);
m_queue_not_empty = ::CreateEvent(NULL, FALSE, FALSE, NULL);
m_queue_not_full = ::CreateEvent(NULL, FALSE, TRUE, NULL);
}
CEventSyncQueue::~CEventSyncQueue()
{
m_list.RemoveAll();
::CloseHandle(m_queue_not_empty);
::CloseHandle(m_queue_not_full);
::DeleteCriticalSection(&m_lock);
}
void CEventSyncQueue::Put(void* ptr)
{
if (m_nCapacity <= 0)
{
::EnterCriticalSection(&m_lock);
m_list.AddTail(ptr);
SetEvent(m_queue_not_empty);
::LeaveCriticalSection(&m_lock);
return;
}
bool done = false;
while (!done)
{
// If the queue is full, we must wait until it isn't.
if(::WaitForSingleObject(m_queue_not_full, INFINITE) != WAIT_OBJECT_0)
{
ASSERT(FALSE);
}
// However, under some (rare) conditions we'll get here and find
// the queue is already full again, so be prepared to loop.
::EnterCriticalSection(&m_lock);
if (m_list.GetCount() < m_nCapacity)
{
m_list.AddTail(ptr);
done = true;
SetEvent(m_queue_not_empty);
if (m_list.GetCount() < m_nCapacity)
{
SetEvent(m_queue_not_full);
}
}
::LeaveCriticalSection(&m_lock);
}
}
void* CEventSyncQueue::Get()
{
void *result = nullptr;
while (result == nullptr)
{
// If the queue is empty, we must wait until it isn't.
if(::WaitForSingleObject(m_queue_not_empty, INFINITE) != WAIT_OBJECT_0)
{
ASSERT(FALSE);
}
// However, under some (rare) conditions we'll get here and find
// the queue is already empty again, so be prepared to loop.
::EnterCriticalSection(&m_lock);
if (!m_list.IsEmpty())
{
result = m_list.RemoveHead();
ASSERT(result != nullptr);
// The queue shouldn't be full at this point!
if (m_nCapacity <= 0) ASSERT(m_list.GetCount() < m_nCapacity);
SetEvent(m_queue_not_full);
if (!m_list.IsEmpty())
{
SetEvent(m_queue_not_empty);
}
}
::LeaveCriticalSection(&m_lock);
}
}
condition variable? Do you mean Interlocked* functions? These have been around for a long time - I used them in Windows 2000. you can use them to build a concurrency system, but you'll still have to do a bit of work yourself.
Alternatively, try OpenMP. To use this you'll need Visual Studio 2008 or greater.

Win32 Read/Write Lock Using Only Critical Sections

I have to implement a read/write lock in C++ using the Win32 api as part of a project at work. All of the existing solutions use kernel objects (semaphores and mutexes) that require a context switch during execution. This is far too slow for my application.
I would like implement one using only critical sections, if possible. The lock does not have to be process safe, only threadsafe. Any ideas on how to go about this?
If you can target Vista or greater, you should use the built-in SRWLock's. They are lightweight like critical sections, entirely user-mode when there is no contention.
Joe Duffy's blog has some recent entries on implementing different types of non-blocking reader/writer locks. These locks do spin, so they would not be appropriate if you intend to do a lot of work while holding the lock. The code is C#, but should be straightforward to port to native.
You can implement a reader/writer lock using critical sections and events - you just need to keep enough state to only signal the event when necessary to avoid an unnecessary kernel mode call.
I don't think this can be done without using at least one kernel-level object (Mutex or Semaphore), because you need the help of the kernel to make the calling process block until the lock is available.
Critical sections do provide blocking, but the API is too limited. e.g. you cannot grab a CS, discover that a read lock is available but not a write lock, and wait for the other process to finish reading (because if the other process has the critical section it will block other readers which is wrong, and if it doesn't then your process will not block but spin, burning CPU cycles.)
However what you can do is use a spin lock and fall back to a mutex whenever there is contention. The critical section is itself implemented this way. I would take an existing critical section implementation and replace the PID field with separate reader & writer counts.
Old question, but this is something that should work. It doesn't spin on contention. Readers incur limited extra cost if they have little or no contention, because SetEvent is called lazily (look at the edit history for a more heavyweight version that doesn't have this optimization).
#include <windows.h>
typedef struct _RW_LOCK {
CRITICAL_SECTION countsLock;
CRITICAL_SECTION writerLock;
HANDLE noReaders;
int readerCount;
BOOL waitingWriter;
} RW_LOCK, *PRW_LOCK;
void rwlock_init(PRW_LOCK rwlock)
{
InitializeCriticalSection(&rwlock->writerLock);
InitializeCriticalSection(&rwlock->countsLock);
/*
* Could use a semaphore as well. There can only be one waiter ever,
* so I'm showing an auto-reset event here.
*/
rwlock->noReaders = CreateEvent (NULL, FALSE, FALSE, NULL);
}
void rwlock_rdlock(PRW_LOCK rwlock)
{
/*
* We need to lock the writerLock too, otherwise a writer could
* do the whole of rwlock_wrlock after the readerCount changed
* from 0 to 1, but before the event was reset.
*/
EnterCriticalSection(&rwlock->writerLock);
EnterCriticalSection(&rwlock->countsLock);
++rwlock->readerCount;
LeaveCriticalSection(&rwlock->countsLock);
LeaveCriticalSection(&rwlock->writerLock);
}
int rwlock_wrlock(PRW_LOCK rwlock)
{
EnterCriticalSection(&rwlock->writerLock);
/*
* readerCount cannot become non-zero within the writerLock CS,
* but it can become zero...
*/
if (rwlock->readerCount > 0) {
EnterCriticalSection(&rwlock->countsLock);
/* ... so test it again. */
if (rwlock->readerCount > 0) {
rwlock->waitingWriter = TRUE;
LeaveCriticalSection(&rwlock->countsLock);
WaitForSingleObject(rwlock->noReaders, INFINITE);
} else {
/* How lucky, no need to wait. */
LeaveCriticalSection(&rwlock->countsLock);
}
}
/* writerLock remains locked. */
}
void rwlock_rdunlock(PRW_LOCK rwlock)
{
EnterCriticalSection(&rwlock->countsLock);
assert (rwlock->readerCount > 0);
if (--rwlock->readerCount == 0) {
if (rwlock->waitingWriter) {
/*
* Clear waitingWriter here to avoid taking countsLock
* again in wrlock.
*/
rwlock->waitingWriter = FALSE;
SetEvent(rwlock->noReaders);
}
}
LeaveCriticalSection(&rwlock->countsLock);
}
void rwlock_wrunlock(PRW_LOCK rwlock)
{
LeaveCriticalSection(&rwlock->writerLock);
}
You could decrease the cost for readers by using a single CRITICAL_SECTION:
countsLock is replaced with writerLock in rdlock and rdunlock
rwlock->waitingWriter = FALSE is removed in wrunlock
wrlock's body is changed to
EnterCriticalSection(&rwlock->writerLock);
rwlock->waitingWriter = TRUE;
while (rwlock->readerCount > 0) {
LeaveCriticalSection(&rwlock->writerLock);
WaitForSingleObject(rwlock->noReaders, INFINITE);
EnterCriticalSection(&rwlock->writerLock);
}
rwlock->waitingWriter = FALSE;
/* writerLock remains locked. */
However this loses in fairness, so I prefer the above solution.
Take a look at the book "Concurrent Programming on Windows" which has lots of different reference examples for reader/writer locks.
Check out the spin_rw_mutex from Intel's Thread Building Blocks ...
spin_rw_mutex is strictly in user-land
and employs spin-wait for blocking
This is an old question but perhaps someone will find this useful. We developed a high-performance, open-source RWLock for Windows that automatically uses Vista+ SRWLock Michael mentioned if available, or otherwise falls back to a userspace implementation.
As an added bonus, there are four different "flavors" of it (though you can stick to the basic, which is also the fastest), each providing more synchronization options. It starts with the basic RWLock() which is non-reentrant, limited to single-process synchronization, and no swapping of read/write locks to a full-fledged cross-process IPC RWLock with re-entrance support and read/write de-elevation.
As mentioned, they dynamically swap out to the Vista+ slim read-write locks for best performance when possible, but you don't have to worry about that at all as it'll fall back to a fully-compatible implementation on Windows XP and its ilk.
If you already know of a solution that only uses mutexes, you should be able to modify it to use critical sections instead.
We rolled our own using two critical sections and some counters. It suits our needs - we have a very low writer count, writers get precedence over readers, etc. I'm not at liberty to publish ours but can say that it is possible without mutexes and semaphores.
Here is the smallest solution that I could come up with:
http://www.baboonz.org/rwlock.php
And pasted verbatim:
/** A simple Reader/Writer Lock.
This RWL has no events - we rely solely on spinlocks and sleep() to yield control to other threads.
I don't know what the exact penalty is for using sleep vs events, but at least when there is no contention, we are basically
as fast as a critical section. This code is written for Windows, but it should be trivial to find the appropriate
equivalents on another OS.
**/
class TinyReaderWriterLock
{
public:
volatile uint32 Main;
static const uint32 WriteDesireBit = 0x80000000;
void Noop( uint32 tick )
{
if ( ((tick + 1) & 0xfff) == 0 ) // Sleep after 4k cycles. Crude, but usually better than spinning indefinitely.
Sleep(0);
}
TinyReaderWriterLock() { Main = 0; }
~TinyReaderWriterLock() { ASSERT( Main == 0 ); }
void EnterRead()
{
for ( uint32 tick = 0 ;; tick++ )
{
uint32 oldVal = Main;
if ( (oldVal & WriteDesireBit) == 0 )
{
if ( InterlockedCompareExchange( (LONG*) &Main, oldVal + 1, oldVal ) == oldVal )
break;
}
Noop(tick);
}
}
void EnterWrite()
{
for ( uint32 tick = 0 ;; tick++ )
{
if ( (tick & 0xfff) == 0 ) // Set the write-desire bit every 4k cycles (including cycle 0).
_InterlockedOr( (LONG*) &Main, WriteDesireBit );
uint32 oldVal = Main;
if ( oldVal == WriteDesireBit )
{
if ( InterlockedCompareExchange( (LONG*) &Main, -1, WriteDesireBit ) == WriteDesireBit )
break;
}
Noop(tick);
}
}
void LeaveRead()
{
ASSERT( Main != -1 );
InterlockedDecrement( (LONG*) &Main );
}
void LeaveWrite()
{
ASSERT( Main == -1 );
InterlockedIncrement( (LONG*) &Main );
}
};
I wrote the following code using only critical sections.
class ReadWriteLock {
volatile LONG writelockcount;
volatile LONG readlockcount;
CRITICAL_SECTION cs;
public:
ReadWriteLock() {
InitializeCriticalSection(&cs);
writelockcount = 0;
readlockcount = 0;
}
~ReadWriteLock() {
DeleteCriticalSection(&cs);
}
void AcquireReaderLock() {
retry:
while (writelockcount) {
Sleep(0);
}
EnterCriticalSection(&cs);
if (!writelockcount) {
readlockcount++;
}
else {
LeaveCriticalSection(&cs);
goto retry;
}
LeaveCriticalSection(&cs);
}
void ReleaseReaderLock() {
EnterCriticalSection(&cs);
readlockcount--;
LeaveCriticalSection(&cs);
}
void AcquireWriterLock() {
retry:
while (writelockcount||readlockcount) {
Sleep(0);
}
EnterCriticalSection(&cs);
if (!writelockcount&&!readlockcount) {
writelockcount++;
}
else {
LeaveCriticalSection(&cs);
goto retry;
}
LeaveCriticalSection(&cs);
}
void ReleaseWriterLock() {
EnterCriticalSection(&cs);
writelockcount--;
LeaveCriticalSection(&cs);
}
};
To perform a spin-wait, comment the lines with Sleep(0).
Look my implementation here:
https://github.com/coolsoftware/LockLib
VRWLock is a C++ class that implements single writer - multiple readers logic.
Look also test project TestLock.sln.
UPD. Below is the simple code for reader and writer:
LONG gCounter = 0;
// reader
for (;;) //loop
{
LONG n = InterlockedIncrement(&gCounter);
// n = value of gCounter after increment
if (n <= MAX_READERS) break; // writer does not write anything - we can read
InterlockedDecrement(&gCounter);
}
// read data here
InterlockedDecrement(&gCounter); // release reader
// writer
for (;;) //loop
{
LONG n = InterlockedCompareExchange(&gCounter, (MAX_READERS+1), 0);
// n = value of gCounter before attempt to replace it by MAX_READERS+1 in InterlockedCompareExchange
// if gCounter was 0 - no readers/writers and in gCounter will be MAX_READERS+1
// if gCounter was not 0 - gCounter stays unchanged
if (n == 0) break;
}
// write data here
InterlockedExchangeAdd(&gCounter, -(MAX_READERS+1)); // release writer
VRWLock class supports spin count and thread-specific reference count that allows to release locks of terminated threads.