mutex / what is the mutex data being locked? - c++

#include <pthread.h>
#include <time.h>
#include "errors.h"
typedef struct alarm_tag {
struct alarm_tag *link;
int seconds;
time_t time; /* seconds from EPOCH */
char message[64];
} alarm_t;
pthread_mutex_t alarm_mutex = PTHREAD_MUTEX_INITIALIZER;
alarm_t *alarm_list = NULL;
void *alarm_thread (void *arg)
{
alarm_t *alarm;
int sleep_time;
time_t now;
int status;
while (1) {
status = pthread_mutex_lock (&alarm_mutex);
if (status != 0)
err_abort (status, "Lock mutex");
alarm = alarm_list;
/*
* If the alarm list is empty, wait for one second. This
* allows the main thread to run, and read another
* command. If the list is not empty, remove the first
* item. Compute the number of seconds to wait -- if the
* result is less than 0 (the time has passed), then set
* the sleep_time to 0.
*/
if (alarm == NULL)
sleep_time = 1;
else {
alarm_list = alarm->link;
now = time (NULL);
if (alarm->time <= now)
sleep_time = 0;
else
sleep_time = alarm->time - now;
#ifdef DEBUG
printf ("[waiting: %d(%d)\"%s\"]\n", alarm->time,
sleep_time, alarm->message);
#endif
}
/*
* Unlock the mutex before waiting, so that the main
* thread can lock it to insert a new alarm request. If
* the sleep_time is 0, then call sched_yield, giving
* the main thread a chance to run if it has been
* readied by user input, without delaying the message
* if there's no input.
*/
status = pthread_mutex_unlock (&alarm_mutex);
if (status != 0)
err_abort (status, "Unlock mutex");
if (sleep_time > 0)
sleep (sleep_time);
else
sched_yield ();
/*
* If a timer expired, print the message and free the
* structure.
*/
if (alarm != NULL) {
printf ("(%d) %s\n", alarm->seconds, alarm->message);
free (alarm);
}
}
}
int main (int argc, char *argv[])
{
int status;
char line[128];
alarm_t *alarm, **last, *next;
pthread_t thread;
status = pthread_create (
&thread, NULL, alarm_thread, NULL);
if (status != 0)
err_abort (status, "Create alarm thread");
while (1) {
printf ("alarm> ");
if (fgets (line, sizeof (line), stdin) == NULL) exit (0);
if (strlen (line) <= 1) continue;
alarm = (alarm_t*)malloc (sizeof (alarm_t));
if (alarm == NULL)
errno_abort ("Allocate alarm");
/*
* Parse input line into seconds (%d) and a message
* (%64[^\n]), consisting of up to 64 characters
* separated from the seconds by whitespace.
*/
if (sscanf (line, "%d %64[^\n]",
&alarm->seconds, alarm->message) < 2) {
fprintf (stderr, "Bad command\n");
free (alarm);
} else {
status = pthread_mutex_lock (&alarm_mutex);
if (status != 0)
err_abort (status, "Lock mutex");
alarm->time = time (NULL) + alarm->seconds;
/*
* Insert the new alarm into the list of alarms,
* sorted by expiration time.
*/
last = &alarm_list;
next = *last;
while (next != NULL) {
if (next->time >= alarm->time) {
alarm->link = next;
*last = alarm;
break;
}
last = &next->link;
next = next->link;
}
/*
* If we reached the end of the list, insert the new
* alarm there. ("next" is NULL, and "last" points
* to the link field of the last item, or to the
* list header).
*/
if (next == NULL) {
*last = alarm;
alarm->link = NULL;
}
#ifdef DEBUG
printf ("[list: ");
for (next = alarm_list; next != NULL; next = next->link)
printf ("%d(%d)[\"%s\"] ", next->time,
next->time - time (NULL), next->message);
printf ("]\n");
#endif
status = pthread_mutex_unlock (&alarm_mutex);
if (status != 0)
err_abort (status, "Unlock mutex");
}
}
}
Hi this is my code, can anyone tell me because the mutex is not declared in the struct. So when the mutex locks and unlocks, what data is actually being changed can someone enlighten me?

where is this set of data that is being protected by the mutex?
The mutex object is alarm_mutex. The data "protected" by it doesn't have to be explicitely mentioned in the code; as in, there doesn't need to be a semantic connection. A mutex is a low-level threading primitive and as such the user needs to build his own logic around that. In your case, that one place in memory is used to block other parts of your code, those accessing actual data, from interfering.
Think about it this way: std::atomic<int> x; expresses the atomicity of operations on it. int x; mutex m; requires every piece of the code accessing x to properly look at m to ensure the correctness of the program. This low-level acess is what we're looking at in your example.

pthread_mutex_t alarm_mutex = PTHREAD_MUTEX_INITIALIZER; creates a shared mutex object, used for locking/unlocking.
pthread_mutex_lock locks the mutex as soon as it is available. It becomes unavailable for all other threads after this line is executed.
pthread_mutex_unlock unlocks the mutex, making it available again for other threads (unlocks the pthread_mutex_lock of another thread)

The mutex doesn't know what it is protecting. It is the programmer's job to know that and only change the data that it is protecting while the mutex is locked.
In this specific case it seems that the alarm list is the data being locked.

Related

Semaphore and Critical Section issue on multiple threads

I am having an issue with my multithreaded code and hope someone can help me out.
I wish to print on the console all files and folder starting from a folder given as an argument. I use this function for the enumeration:
void enumerate(char* path) {
HANDLE hFind;
WIN32_FIND_DATA data;
char *fullpath = new char[strlen(path) - 1];
strcpy(fullpath, path);
fullpath[strlen(fullpath) - 1] = '\0';
hFind = FindFirstFile(path, &data);
do {
if (hFind != INVALID_HANDLE_VALUE) {
if (strcmp(data.cFileName, ".") != 0 && strcmp(data.cFileName, ".."))
{
EnterCriticalSection(&crit);
queue.push(data.cFileName);
LeaveCriticalSection(&crit);
ReleaseSemaphore(semaphore, 1, NULL);
if (data.dwFileAttributes == FILE_ATTRIBUTE_DIRECTORY)
{
strcat(fullpath, data.cFileName);
strcat(fullpath, "\\*");
enumerate(fullpath);
}
}
}
} while (FindNextFile(hFind, &data));
FindClose(hFind);
return;
}
When I find a file or a folder, I want to add it to a global queue and have my worker threads print it to the console. My worker threads function is:
DWORD WINAPI print_queue(LPVOID param) {
while (1) {
WaitForSingleObject(semaphore, INFINITE);
EnterCriticalSection(&crit);
char *rez = queue.front();
queue.pop();
LeaveCriticalSection(&crit);
if (strcmp(rez, "DONE") == 0)
break;
else
std::cout << rez << std::endl;
}
return 1;
}
In main, I initialize the semaphore and critical section, both variables declared globally:
semaphore = CreateSemaphore(NULL, 0,1, NULL);
InitializeCriticalSection(&crit);
Then create 4 threads:
thread1 = CreateThread(NULL, 0, print_queue, NULL, 0, &tId1);
thread2 = CreateThread(NULL, 0, print_queue, NULL, 0, &tId2);
thread3 = CreateThread(NULL, 0, print_queue, NULL, 0, &tId3);
thread4 = CreateThread(NULL, 0, print_queue, NULL, 0, &tId4);
I then call the enumerate() function and for strings to the queue that will signal my threads to stop when those strings are reached:
for (int p = 0; p<4; p++)
{
EnterCriticalSection(&crit);
queue.push(done);
LeaveCriticalSection(&crit);
ReleaseSemaphore(semaphore, 1, NULL);
}
Those 4 strings are the stop condition for my threads. I then wait for the threads:
HANDLE * threadArray = new HANDLE[4];
threadArray[0] = thread1;
threadArray[1] = thread2;
threadArray[2] = thread3;
threadArray[3] = thread4;
WaitForMultipleObjects(4, threadArray, TRUE, INFINITE);
And close the semaphore and critical section:
CloseHandle(semaphore);
DeleteCriticalSection(&crit);
For some reason, the output is random garbage and I can't figure out why.
This is an example output:
te(L┤(L
┤(L
╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠
╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠
╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠╠°┐*╧wM3╧weµFC4
╠╠╠╠╠
My logic was to start the semaphore on 0, enter the critical section whenever operations happened on a queue to protect my data, increment the semaphore in the enumerate() function and decrease it in print_queue().
What might be the problem?
enumerate() has MANY problems:
you are not using strcpy() and strcat() correctly, so you are trashing memory. You are not allocating enough memory to hold the result of strcpy(), which copies characters until it reaches a null terminator. You are allocating memory for 2 fewer characters than needed (the last char in the path, and the null terminator). You should be allocating strlen+1 characters instead of strlen-1 characters. And worse, you are using strcat() to concatenate a filename onto the allocated string without first reallocating the string to make room for the filename.
you are leaking the allocated string, as you never call delete[] for it.
the if inside the loop is missing != 0 when checking strcmp("..").
you are pushing pointers into queue to data that is local to enumerate() and gets overwritten on each loop iteration, and goes out of scope when enumerate() exits. Your threads are expecting pointers to data that are stable and do not disappear behind their backs. This is the root of your garbage output. Consider yourself lucky that your code is simply outputting garbage and not just crashing outright.
you are not testing the data.dwFileAttributes field correctly. You need to use the & (bitwise AND) operator instead of the == (equals) operator. Folders and files can have multiple attributes, but you are only interested in checking for one, so you have to test that specific bit by itself and ignore the rest.
You really should be using std::string instead for string management, and let it handle memory allocations for you.
Also, consider using std::filesystem or boost::filesystem to handle the enumeration.
Also, there is no need to push "DONE" strings into the queue after enumerating. When a thread is signaled and goes to extract a string and sees the queue is empty, just exit the thread.
Try something more like this instead:
#include <windows.h>
#include <iostream>
#include <string>
#include <queue>
#include <thread>
#include <mutex>
#include <conditional_variable>
std::queue<std::string> paths;
std::mutex mtx;
std::conditional_variable cv;
bool done = false;
void enumerate(const std::string &path)
{
std::string searchPath = path;
if ((!searchPath.empty()) && (searchPath[searchPath.length()-1] != '\\'))
searchPath += '\\';
WIN32_FIND_DATA data;
HANDLE hFind = FindFirstFileA((searchPath + "*").c_str(), &data);
if (hFind != INVALID_HANDLE_VALUE)
{
do
{
if ((strcmp(data.cFileName, ".") != 0) && (strcmp(data.cFileName, "..") != 0))
{
string fullpath = searchPath + data.cFileName;
{
std::lock_guard<std::mutex> lock(mtx);
paths.push(fullpath);
cv.notify_one();
}
if (data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)
enumerate(fullpath);
}
}
while (FindNextFileA(hFind, &data));
FindClose(hFind);
}
}
void print_queue()
{
std::unique_lock<std::mutex> lock(mtx);
while (true)
{
cv.wait(lock, [](){ return (!paths.empty()) || done; });
if (paths.empty())
return;
std::string rez = paths.front();
paths.pop();
std::cout << rez << std::endl;
}
}
int main()
{
std::thread thread1(print_queue);
std::thread thread2(print_queue);
std::thread thread3(print_queue);
std::thread thread4(print_queue);
enumerate("C:\\");
done = true;
cv.notify_all();
thread1.join();
thread2.join();
thread3.join();
thread4.join();
return 0;
}
You nowhere have written which kind of queue you use, but I guess it's a queue<char*>. This means it stores only pointers to memory which is owned somewhere else.
When you now do queue.push(data.cFileName); you write a pointer to the queue which is not valid after the next iteration, since data changes there. After enumerate exists the data pointers (and thereby queue elements) will even point to undefined memory, which would explain the output.
To fix this store copies of the file names inside the queue, e.g. by using a queue<std::string>

swapcontext failing in many-to-many thread implementation

I am tasked with implementing a many-to-many thread manager in C++. I've got most of it more or less down, but I'm having serious problems with swapcontext in my uthread_yield() method. Here's the code
/*
* uthread.cpp
*
* Created on: Oct 12, 2016
* Author: michael
*/
#include "uthread.h"
#include <semaphore.h>
#include <pthread.h>
#ifndef STDIO_H_
#include <stdio.h>
#endif
namespace std {
/*
* Initializes all the variables and allocates memory when needed
*/
int uthread::maxThreads;
int uthread::currentThreads;
pthread_mutex_t uthread::mapMutex;
pthread_mutex_t uthread::qMutex;
pthread_mutex_t uthread::threadMutex;
map<int,UserThread*>* uthread::threadMap;
priority_queue<UserThread*>* uthread::threadQueue;
void uthread::uthread_init(int numKernelThreads) {
pthread_mutex_t tester;
pthread_mutex_init(&tester,NULL);
uthread::maxThreads=numKernelThreads;
uthread::currentThreads=0;
pthread_mutex_init(&threadMutex,NULL);
pthread_mutex_init(&qMutex,NULL);
pthread_mutex_init(&mapMutex,NULL);
threadQueue= new priority_queue<UserThread*>;
threadMap=new map<int,UserThread*>;
}
int uthread::uthread_create(void (* func)( )) {
//Create ucontext to be used in in
ucontext_t* ucp=(ucontext_t*)malloc(sizeof(ucontext_t));
getcontext(ucp);
ucp->uc_stack.ss_sp=(void*)malloc(16384);
ucp->uc_stack.ss_size=16384;
makecontext(ucp, func, 0); //make the context for a thread running func
//Create UserThread
time_t currTime;
time(&currTime);
UserThread* newThread=new UserThread(ucp,currTime);
//Thread Creation Logic
pthread_mutex_lock(&threadMutex);
if (currentThreads>=maxThreads) {
pthread_mutex_unlock(&threadMutex);
pthread_mutex_lock(&qMutex);
threadQueue->push(newThread);
pthread_mutex_unlock(&qMutex);
return 0;
}
else {
int (*execute)(void *)= (int (*)(void *)) func;
int tid=clone(execute,ucp->uc_stack.ss_sp,CLONE_VM|CLONE_FILES,NULL);
if (tid==-1) { //clone failed
pthread_mutex_unlock(&threadMutex);
return -1;
}
currentThreads++;
pthread_mutex_unlock(&threadMutex);
/*
* Map tid -> UserThread in thread map
*/
threadMap->insert(pair<int,UserThread*>(tid,newThread));
pthread_mutex_unlock(&mapMutex);
return 0;
}
return -1;
}
void uthread::uthread_exit() {
/*
* Get the corresponding UserThread object from the map
*/
printf("Start Exit \n");
int threadID=syscall(SYS_gettid) ;
pthread_mutex_lock(&mapMutex);
if (threadMap->find(threadID)==threadMap->end()) { //Cannot find map;
pthread_mutex_lock(&threadMutex);
currentThreads--;
pthread_mutex_unlock(&threadMutex);
exit(0);
}
printf("Getting Curr Thread\n");
UserThread* currThread= threadMap->at(threadID);
pthread_mutex_unlock(&mapMutex);
pthread_mutex_lock(&qMutex);
if (threadQueue->empty()) { //No items on queue, delete memory references and exit
printf("Queue is Empty");
pthread_mutex_unlock(&qMutex);
pthread_mutex_lock(&mapMutex);
threadMap->erase(threadID);
pthread_mutex_unlock(&mapMutex);
pthread_mutex_lock(&threadMutex);
currentThreads--;
pthread_mutex_unlock(&threadMutex);
delete currThread;
exit(0);
}
else { //Remove and delete memory reference to old thread, set context to new thread
printf("Swapping Queue\n");
UserThread* newThread=threadQueue->top();
threadQueue->pop();
pthread_mutex_unlock(&qMutex);
pthread_mutex_lock(&mapMutex);
threadMap->insert(pair<int,UserThread*>(threadID,newThread)); //Update Map
pthread_mutex_unlock(&mapMutex);
printf("Deleting Current Thread\n");
delete currThread;
printf("Setting Context\n");
setcontext(newThread->ucp);
printf("set context failed\n");
}
}
void uthread::uthread_yield() {
printf("Start Yield \n");
int threadID=syscall(SYS_gettid) ;
pthread_mutex_lock(&mapMutex);
UserThread* currThread= threadMap->at(threadID);
pthread_mutex_unlock(&mapMutex);
pthread_mutex_lock(&qMutex);
if (threadQueue->empty()) {
printf("Queue is empty\n");
pthread_mutex_unlock(&qMutex);
return;
}
else {
printf("Queue Not Empty\n");
currThread->updateRuntime(time(NULL)); //updates run time account for time it's been on thread
UserThread* highestPriority=threadQueue->top();
if (highestPriority->getRunTime()>currThread->getRunTime()) { //highestPriority is lower priority than currently running thread
printf("lowest runtime is running\n");
pthread_mutex_unlock(&qMutex);
return;
}
else {
printf("SwapContext\n");
threadQueue->pop();
threadQueue->push(currThread);
pthread_mutex_unlock(&qMutex);
pthread_mutex_lock(&mapMutex);
threadMap->insert(pair<int,UserThread*>(threadID,highestPriority)); //Update Map reference
pthread_mutex_unlock(&mapMutex);
//Swaps contexts
swapcontext(currThread->ucp,highestPriority->ucp);
printf("Swapcontext Failed\n");
}
}
}
int uthread::startThread(void* arg ) {
printf("Thread Cloned\n");
pthread_mutex_lock(&mapMutex);
int threadID=syscall(SYS_gettid) ;
UserThread* currThread= threadMap->at(threadID);
pthread_mutex_unlock(&mapMutex);
setcontext(currThread->ucp);
return 0;
}
}
And this is the code of my corresponding UserThread object:
/*
* UserThread.cpp
*
* Created on: Oct 12, 2016
* Author: michael
*/
#include "UserThread.h"
/*
* Constructor. UCP is taken in as well as start time
* Run time initialized to 0
*
*/
UserThread::UserThread(ucontext_t *ucp,time_t st) {
this->ucp=ucp;
this->startTime=(time_t*)malloc(sizeof(time_t));
this->runTime=(double*)malloc(sizeof(double));
*startTime=st;
*runTime=0;
}
/**
* Deconstructor
*/
UserThread::~UserThread() {
//free(ucp->uc_stack.ss_sp);
//free(ucp);
free(startTime);
free(runTime);
}
/*
* adds the running time in seconds (as a double) to the current running time. Also updates the start time
*/
void UserThread::updateRuntime(time_t currTime) {
double diffTime=difftime(currTime,*startTime);
*runTime=*runTime+diffTime;
*startTime=currTime;
}
/*
* Just Updates the start time
*/
void UserThread::updateStartTime(time_t newTime) {
*startTime=newTime;
}
/*
* getter
*/
double UserThread::getRunTime() {
double rTime=*runTime;
return rTime;
}
/*
* getter
*/
time_t UserThread::getStartTime() {
return *startTime;
}
/*
* THIS IS REVERSED ON PURPOSE. C++ runs a maximum priority queue by default
* by overloading the < operator backwards, that isn't an issue. Sketchy? Yes
* Also functional
*/
bool UserThread::operator <(UserThread* t2) {
return this->getRunTime() > t2->getRunTime();
}
uthread_yield() will correctly work once for each thread, then fail. Any idea why this is? I've stared at this code for hours and at this point I'm out of ideas.
It's not actually failing: it's just printing your failure message. Your yield implementation finishes with:
swapcontext(currThread->ucp, highestPriority->ucp);
printf("Swapcontext Failed\n");
So the yielding thread swaps away after swapcontext(), which is fine - but when that thread is later swapped back to, it will return from swapcontext() and unconditionally execute the printf(). You need:
if (swapcontext(currThread->ucp, highestPriority->ucp) == -1) {
printf("Swapcontext Failed\n");
/* You need to fix up your queue and map here to account for the
* failure to context switch, and then probably loop back and look
* for another candidate thread to swap to. */
}
I also noticed that your uthread_create() function accesses the map and unlocks mapMutex without locking the mutex first.
You're mixing pthreads functions with the bare clone() syscall, which is unsupported. Use pthread_create() / pthread_exit() for managing the underlying threads. One way to do this is by having thread created by pthread_create() start at a scheduling function that pulls the thread to run from your queue:
void* uthread::newThread(void* arg)
{
pthread_mutex_lock(&qMutex);
if (threadQueue->empty()) {
printf("No thread to start.\n");
return NULL;
}
UserThread* highestPriority = threadQueue->top();
threadQueue->pop();
pthread_mutex_unlock(&qMutex);
int threadID = syscall(SYS_gettid);
pthread_mutex_lock(&mapMutex);
threadMap->insert(pair<int,UserThread*>(threadID,highestPriority)); //Update Map reference
pthread_mutex_unlock(&mapMutex);
setcontext(highestPriority->ucp);
printf("setcontext() Failed\n");
return NULL;
}
...then you can simplify uthread_create() by always pushing the new user thread onto the queue, and only conditionally creating the underlying thread:
// ... start of uthread_create() up to creating new UserThread ...
pthread_mutex_lock(&qMutex);
threadQueue->push(newThread);
pthread_mutex_unlock(&qMutex);
//Thread Creation Logic
pthread_mutex_lock(&threadMutex);
if (currentThreads < maxThreads) {
pthread_t new_pthread;
if (pthread_create(*new_pthread, NULL, uthread::newThread, NULL) != 0) {
printf("New pthread creation failed.\n");
} else {
currentThreads++;
}
}
pthread_mutex_unlock(&threadMutex);
return 0;
By the way, it seems that you're using the threadMap just as a way to implement thread-local-storage: you could instead use the built-in pthreads thread-local-storage API (pthread_key_create() / pthread_setspecific() / pthread_getspecific()).

multithread list shared performance

I am developing an application that reads data from a named pipe on Windows 7 at around 800 Mbps. I have to develop it with several threads since the FIFO at the other side of the pipe overflows if I am not able to read at the given speed. The performance though is really pitifull and I cannot understand why. I already read several things I tried to split the memory to avoid bad memory sharing.
At the beginning I has thinking I could be a problem with contiguous memory possitions, but the memory sections are queued in a list the main thread is not using them any more after queue it. The amount of memory are huge so I don't thing they lay on same pages or so.
This is the threaded function:
void splitMessage(){
char* bufferMSEO;
char* bufferMDO;
std::list<struct msgBufferStr*> localBufferList;
while(1)
{
long bytesProcessed = 0;
{
std::unique_lock<std::mutex> lk(bufferMutex);
while(bufferList.empty())
{
// Wait until the map has data
listReady.wait(lk);
}
//Extract the data from the list and copy to the local list
localBufferList.splice(localBufferList.end(),bufferList);
//Unlock the mutex and notify
// Manual unlocking is done before notifying, to avoid waking up
// the waiting thread only to block again (see notify_one for details)
lk.unlock();
//listReady.notify_one();
}
for(auto nextBuffer = localBufferList.begin(); nextBuffer != localBufferList.end(); nextBuffer++)
{
//nextBuffer = it->second();
bufferMDO = (*nextBuffer)->MDO;
bufferMSEO = (*nextBuffer)->MSEO;
bytesProcessed += (*nextBuffer)->size;
//Process the data Stream
for(int k=0; k<(*nextBuffer)->size; k++)
{
}
//localBufferList.remove(*nextBuffer);
free(bufferMDO);
free(bufferMSEO);
free(*nextBuffer);
}
localBufferList.clear();
}
}
And here the thread that reads the data and queue them:
DWORD WINAPI InstanceThread(LPVOID lpvParam)
// This routine is a thread processing function to read from and reply to a client
// via the open pipe connection passed from the main loop. Note this allows
// the main loop to continue executing, potentially creating more threads of
// of this procedure to run concurrently, depending on the number of incoming
// client connections.
{
HANDLE hHeap = GetProcessHeap();
TCHAR* pchRequest = (TCHAR*)HeapAlloc(hHeap, 0, BUFSIZE*sizeof(TCHAR));
DWORD cbBytesRead = 0, cbReplyBytes = 0, cbWritten = 0;
BOOL fSuccess = FALSE;
HANDLE hPipe = NULL;
double totalRxData = 0;
char* bufferPnt;
char* bufferMDO;
char* bufferMSEO;
char* destPnt;
// Do some extra error checking since the app will keep running even if this
// thread fails.
if (lpvParam == NULL)
{
printf( "\nERROR - Pipe Server Failure:\n");
printf( " InstanceThread got an unexpected NULL value in lpvParam.\n");
printf( " InstanceThread exitting.\n");
if (pchRequest != NULL) HeapFree(hHeap, 0, pchRequest);
return (DWORD)-1;
}
if (pchRequest == NULL)
{
printf( "\nERROR - Pipe Server Failure:\n");
printf( " InstanceThread got an unexpected NULL heap allocation.\n");
printf( " InstanceThread exitting.\n");
return (DWORD)-1;
}
// Print verbose messages. In production code, this should be for debugging only.
printf("InstanceThread created, receiving and processing messages.\n");
// The thread's parameter is a handle to a pipe object instance.
hPipe = (HANDLE) lpvParam;
try
{
msgSplitter = std::thread(&splitMessage);
//msgSplitter.detach();
}
catch(...)
{
_tprintf(TEXT("CreateThread failed, GLE=%d.\n"), GetLastError());
return -1;
}
while (1)
{
struct msgBufferStr *newBuffer = (struct msgBufferStr* )malloc(sizeof(struct msgBufferStr));
// Read client requests from the pipe. This simplistic code only allows messages
// up to BUFSIZE characters in length.
fSuccess = ReadFile(
hPipe, // handle to pipe
pchRequest, // buffer to receive data
BUFSIZE*sizeof(TCHAR), // size of buffer
&cbBytesRead, // number of bytes read
NULL); // not overlapped I/O
if (!fSuccess || cbBytesRead == 0)
{
if (GetLastError() == ERROR_BROKEN_PIPE)
{
_tprintf(TEXT("InstanceThread: client disconnected.\n"), GetLastError());
break;
}
else if (GetLastError() == ERROR_MORE_DATA)
{
}
else
{
_tprintf(TEXT("InstanceThread ReadFile failed, GLE=%d.\n"), GetLastError());
}
}
//timeStart = omp_get_wtime();
bufferPnt = (char*)pchRequest;
totalRxData += ((double)cbBytesRead)/1000000;
bufferMDO = (char*) malloc(cbBytesRead);
bufferMSEO = (char*) malloc(cbBytesRead/3);
destPnt = bufferMDO;
//#pragma omp parallel for
for(int i = 0; i < cbBytesRead/12; i++)
{
msgCounter++;
if(*(bufferPnt + (i * 12)) == 0) continue;
if(*(bufferPnt + (i * 12)) == 8)
{
errorCounter++;
continue;
}
//Use 64 bits variables in order to make less operations
unsigned long long *sourceAddrLong = (unsigned long long*) (bufferPnt + (i * 12));
unsigned long long *destPntLong = (unsigned long long*) (destPnt + (i * 8));
//Copy the data bytes from source to destination
*destPntLong = *sourceAddrLong;
//Copy and prepare the MSEO lines for the data processing
bufferMSEO[i*4]=(bufferPnt[(i * 12) + 8] & 0x03);
bufferMSEO[i*4 + 1]=(bufferPnt[(i * 12) + 8] & 0x0C) >> 2;
bufferMSEO[i*4 + 2]=(bufferPnt[(i * 12) + 8] & 0x30) >> 4;
bufferMSEO[i*4 + 3]=(bufferPnt[(i * 12) + 8] & 0xC0) >> 6;
}
newBuffer->size = cbBytesRead/3;
newBuffer->MDO = bufferMDO;
newBuffer->MSEO = bufferMSEO;
{
//lock the mutex
std::lock_guard<std::mutex> lk(bufferMutex);
//add data to the list
bufferList.push_back(newBuffer);
} // bufferMutex is automatically released when lk goes out of scope
//Notify
listReady.notify_one();
}
// Flush the pipe to allow the client to read the pipe's contents
// before disconnecting. Then disconnect the pipe, and close the
// handle to this pipe instance.
FlushFileBuffers(hPipe);
DisconnectNamedPipe(hPipe);
CloseHandle(hPipe);
HeapFree(hHeap, 0, pchRequest);
//Show memory leak isues
_CrtDumpMemoryLeaks();
//TODO: Join thread
printf("InstanceThread exitting.\n");
return 1;
}
The think that really blows my mind is that I a let it like this the splitMessage thread takes minutes to read the data even though the first thread finished reading the data long ago. I mean the read thread reads like 1,5Gb or information in seconds and waits for more data from the pipe. This data are processed by the split thread (the only one really "doing" something in almost one minute or more). The CPU is moreover only to less than 20% percent used. (It is a i7 labtop with 16 Gb RAM and 8 cores!)
On the other hand, if I just comment the for loop in the process thread:
for(int k=0; k<(*nextBuffer)->size; k++)
Then the data are read slowly and the FIFO on the other side of the pipe overflows. With 8 processors and at more than 2 GHz should be fast enought to go throw the buffers without many problems, isn't it? I think it has to be a memory access issue or that the scheduler is sending the thread somehow to sleep but I cannot figure out why!!. Other possibility is that the iteration throw the linked list with the iterator is not optimal.
Any help would be geat because I am trying to understand it since a couple of days, I made several changes in the code and tried to simplified at the maximum and I am getting crazy :).
best regards,
Manuel

Pthread program runs slower as thread increases

I'm a beginner in parallel programming and I tried to write a parallel program with pthread library. I ran the program on a 8 processor computer. The problem is that when I increase NumProcs, each thread slows down though their tasks are always the same. Can someone help me to figure out what is happening?
`
#define MAX_NUMP 16
using namespace std;
int NumProcs;
pthread_mutex_t SyncLock; /* mutex */
pthread_cond_t SyncCV; /* condition variable */
int SyncCount; /* number of processors at the barrier so far */
pthread_mutex_t ThreadLock; /* mutex */
// used only in solaris. use clock_gettime in linux
//hrtime_t StartTime;
//hrtime_t EndTime;
struct timespec StartTime;
struct timespec EndTime;
void Barrier()
{
int ret;
pthread_mutex_lock(&SyncLock); /* Get the thread lock */
SyncCount++;
if(SyncCount == NumProcs) {
ret = pthread_cond_broadcast(&SyncCV);
assert(ret == 0);
} else {
ret = pthread_cond_wait(&SyncCV, &SyncLock);
assert(ret == 0);
}
pthread_mutex_unlock(&SyncLock);
}
/* The function which is called once the thread is allocated */
void* ThreadLoop(void* tmp)
{
/* each thread has a private version of local variables */
long threadId = (long) tmp;
int ret;
int startTime, endTime;
int count=0;
/* ********************** Thread Synchronization*********************** */
Barrier();
/* ********************** Execute Job ********************************* */
startTime = clock();
for(int i=0;i<65536;i++)
for(int j=0;j<1024;j++)
count++;
endTime = clock();
printf("threadid:%ld, time:%d\n",threadId,endTime-startTime);
}
int main(int argc, char** argv)
{
pthread_t* threads;
pthread_attr_t attr;
int ret;
int dx;
if(argc != 2) {
fprintf(stderr, "USAGE: %s <numProcesors>\n", argv[0]);
exit(-1);
}
assert(argc == 2);
NumProcs = atoi(argv[1]);
assert(NumProcs > 0 && NumProcs <= MAX_NUMP);
/* Initialize array of thread structures */
threads = (pthread_t *) malloc(sizeof(pthread_t) * NumProcs);
assert(threads != NULL);
/* Initialize thread attribute */
pthread_attr_init(&attr);
pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM); // sys manages contention
/* Initialize mutexs */
ret = pthread_mutex_init(&SyncLock, NULL);
assert(ret == 0);
ret = pthread_mutex_init(&ThreadLock, NULL);
assert(ret == 0);
/* Init condition variable */
ret = pthread_cond_init(&SyncCV, NULL);
assert(ret == 0);
SyncCount = 0;
Count = 0;
/* get high resolution timer, timer is expressed in nanoseconds, relative
* to some arbitrary time.. so to get delta time must call gethrtime at
* the end of operation and subtract the two times.
*/
//StartTime = gethrtime();
ret = clock_gettime(CLOCK_MONOTONIC, &StartTime);
for(dx=0; dx < NumProcs; dx++) {
/* ************************************************************
* pthread_create takes 4 parameters
* p1: threads(output)
* p2: thread attribute
* p3: start routine, where new thread begins
* p4: arguments to the thread
* ************************************************************ */
ret = pthread_create(&threads[dx], &attr, ThreadLoop, (void*) dx);
assert(ret == 0);
}
/* Wait for each of the threads to terminate */
for(dx=0; dx < NumProcs; dx++) {
ret = pthread_join(threads[dx], NULL);
assert(ret == 0);
}
//EndTime = gethrtime();
ret = clock_gettime(CLOCK_MONOTONIC, &EndTime);
printf("Time = %ld nanoseconds\n", EndTime.tv_nsec - StartTime.tv_nsec);
pthread_mutex_destroy(&ThreadLock);
pthread_mutex_destroy(&SyncLock);
pthread_cond_destroy(&SyncCV);
pthread_attr_destroy(&attr);
return 0;
}
Your observation is expected.
The main factors that usually impact this situation (worker spinning on local computation) are:
The ratio nb_threads / nb_available_machine_cores
The affinity of each thread
The optimal scenario here is when you have a ratio of 1, and each thread has a unique affinity with one of the core.
The idea is to maximize each core throughput. You can do that by having one and only one thread running on each core. If you increase the number of threads (ratio > 1), several threads will share the same core, forcing the kernel (through the task scheduler) to switch between the execution of each of them. This is what you were observing.
Each time the kernel has to operate such a switch, you pay for a context switch. It may become a noticeable overhead.
Note:
You can use pthread_setaffinity to set the affinity of your threads.
If you are running this in release mode (O3 compiler flag) then there are two things wrong with ThreadLoop():
1) There is never any external usage of the 'count' result, so the compiler will omit computing it because it has no visible effect.
2) Even if there had been external usage of 'count' then the compiler will compute the result at compile time and simply emit the value directly.
You can see all this if you disassemble the binary.
You can declare 'volatile int count' to bypass both problems or you can compile with O1 compiler flag or do both.
The loop should scale pretty linearly with number of threads because there is no memory contention. By the way, you should increase the loop iterations because I think the duration could be close to the noise ratio...

how to wakeup select() within timeout from another thread

According to the "man select" information:
"On success, select() and pselect() return the number of file descrip‐
tors contained in the three returned descriptor sets which may be zero
if the timeout expires before anything interesting happens. On error,
-1 is returned, and errno is set appropriately; the sets and timeout become
undefined, so do not rely on their contents after an error."
Select will wakup because of:
1)read/write availability
2)select error
3)descriptoris closed.
However, how can we wake up the select() from another thread if there is no data available and the select is still within timeout?
[update]
Pseudo Code
// Thread blocks on Select
void *SocketReadThread(void *param){
...
while(!(ReadThread*)param->ExitThread()) {
struct timeval timeout;
timeout.tv_sec = 60; //one minute
timeout.tv_usec = 0;
fd_set rds;
FD_ZERO(&rds);
FD_SET(sockfd, &rds)'
//actually, the first parameter of select() is
//ignored on windows, though on linux this parameter
//should be (maximum socket value + 1)
int ret = select(sockfd + 1, &rds, NULL, NULL, &timeout );
//handle the result
//might break from here
}
return NULL;
}
//main Thread
int main(){
//create the SocketReadThread
ReaderThread* rthread = new ReaderThread;
pthread_create(&pthreadid, NULL, SocketReaderThread,
NULL, (void*)rthread);
// do lots of things here
............................
//now main thread wants to exit SocketReaderThread
//it sets the internal state of ReadThread as true
rthread->SetExitFlag(true);
//but how to wake up select ??????????????????
//if SocketReaderThread currently blocks on select
}
[UPDATE]
1) #trojanfoe provides a method to achieve this, his method writes socket data (maybe dirty data or exit message data) to wakeup select. I am going to have a test and update the result there.
2) Another thing to mention, closing a socket doesn't guarantee to wake up select function call, please see this post.
[UPDATE2]
After doing many tests, here are some facts about waking up select:
1) If the socket watched by select is closed by another application, then select() calling
will wakeup immediately. Hereafter, reading from or writing to the socket will get return value of 0 with an errno = 0
2) If the socket watched by select is closed by another thread of the same application,
then select() won't wake up until timeout if there is no data to read or write. After select timeouts, making read/write operation results in an error with errno = EBADF
(because the socket has been closed by another thread during timeout period)
I use an event object based on pipe():
IoEvent.h:
#pragma once
class IoEvent {
protected:
int m_pipe[2];
bool m_ownsFDs;
public:
IoEvent(); // Creates a user event
IoEvent(int fd); // Create a file event
IoEvent(const IoEvent &other);
virtual ~IoEvent();
/**
* Set the event to signalled state.
*/
void set();
/**
* Reset the event from signalled state.
*/
void reset();
inline int fd() const {
return m_pipe[0];
}
};
IoEvent.cpp:
#include "IoEvent.h"
#include <string.h>
#include <unistd.h>
#include <errno.h>
#include <fcntl.h>
#include <poll.h>
using namespace std;
IoEvent::IoEvent() :
m_ownsFDs(true) {
if (pipe(m_pipe) < 0)
throw MyException("Failed to create pipe: %s (%d)", strerror(errno), errno);
if (fcntl(m_pipe[0], F_SETFL, O_NONBLOCK) < 0)
throw MyException("Failed to set pipe non-blocking mode: %s (%d)", strerror(errno), errno);
}
IoEvent::IoEvent(int fd) :
m_ownsFDs(false) {
m_pipe[0] = fd;
m_pipe[1] = -1;
}
IoEvent::IoEvent(const IoEvent &other) {
m_pipe[0] = other.m_pipe[0];
m_pipe[1] = other.m_pipe[1];
m_ownsFDs = false;
}
IoEvent::~IoEvent() {
if (m_pipe[0] >= 0) {
if (m_ownsFDs)
close(m_pipe[0]);
m_pipe[0] = -1;
}
if (m_pipe[1] >= 0) {
if (m_ownsFDs)
close(m_pipe[1]);
m_pipe[1] = -1;
}
}
void IoEvent::set() {
if (m_ownsFDs)
write(m_pipe[1], "x", 1);
}
void IoEvent::reset() {
if (m_ownsFDs) {
uint8_t buf;
while (read(m_pipe[0], &buf, 1) == 1)
;
}
}
You could ditch the m_ownsFDs member; I'm not even sure I use that any more.