I am trying to compare performance of different lock-free queues, therefore, I want to create a unit test - which includes pushing/poping user-defined pre-built objects to and from the queue. Therefore, I want to ask you couple of questions:-
1) How to create pre-built objects in a simple manner. Does creating an array like I did would fulfill the purpose.
2) I am getting an error "terminate called after throwing an instance of 'std::system_error' what(): Invalid argument Aborted (core dumped)".
Thanx in advance.
#include <cstdlib>
#include <stdio.h>
#include <string>
#include <chrono>
#include <iostream>
#include <ctime>
#include <atomic>
#include <thread>
#include <boost/lockfree/queue.hpp>
using namespace std;
const long NUM_DATA = 10;
const int NUM_PROD_THREAD = 2;
const int NUM_CONSUM_THREAD = 2;
const long NUM_ITEM = 1000000;
class Data
{
public:
Data(){}
void dataPrint() {cout << "Hello";}
private:
long i;
double j;
};
Data *DataArray = new Data[NUM_DATA];
boost::lockfree::queue<Data*> BoostQueue(1000);
struct Producer
{
void operator()()
{
for(long i=0; i<1000000; i++)
BoostQueue.push( DataArray );
}
};
struct Consumer
{
Data *pData;
void operator()()
{
while ( BoostQueue.pop( pData ) ) ;
}
};
int main(int argc, char** argv)
{
std::thread thrd [NUM_PROD_THREAD + NUM_CONSUM_THREAD];
std::chrono::duration<double> elapsed_seconds;
auto start = std::chrono::high_resolution_clock::now();
for ( int i = 0; i < NUM_PROD_THREAD; i++ )
{
thrd[i] = std::thread{ Producer() };
}
for ( int i = 0; i < NUM_CONSUM_THREAD; i++ )
{
thrd[NUM_PROD_THREAD+i] = std::thread{Consumer()};
}
for ( int i = 0; i < NUM_CONSUM_THREAD; i++ )
{
thrd[i].join();
}
auto end = std::chrono::high_resolution_clock::now();
elapsed_seconds = end - start;
std::cout << "Enqueue and Dequeue 1 million item in:" << elapsed_seconds.count() << std::endl;
for ( int i = 0; i < NUM_PROD_THREAD; i++ )
{
thrd[i].join();
}
return 0;
}
Just illustrating how to use Data elements in the benchmark, though this does add a cout within the measured time which isn't ideal but probably isn't significant either.
class Data
{
public:
Data(long i) : i_(i) {}
void dataPrint() {cout << "Hello";}
private:
long i_;
double j;
};
Data* dataArray[1000000];
for (int i = 0; i < NUM_DATA; ++i) dataArray[i] = new Data(i);
boost::lockfree::queue<Data*> BoostQueue(1000);
struct Producer
{
void operator()()
{
for(long i=0; i<1000000; i++)
BoostQueue.push( dataArray[i] );
}
};
struct Consumer
{
Data *pData;
long result_;
void operator()()
{
result_ = 0;
while ( BoostQueue.pop( pData ) )
result_ += pData->i_;
std::cout << result_ << '\n';
}
};
int main(int argc, char** argv)
{
std::thread thrd [NUM_PROD_THREAD + NUM_CONSUM_THREAD];
std::chrono::duration<double> elapsed_seconds;
auto start = std::chrono::high_resolution_clock::now();
for ( int i = 0; i < NUM_PROD_THREAD; i++ )
thrd[i] = std::thread{ Producer() };
for ( int i = 0; i < NUM_CONSUM_THREAD; i++ )
thrd[NUM_PROD_THREAD+i] = std::thread{Consumer()};
for ( int i = 0; i < NUM_CONSUM_THREAD; i++ )
thrd[NUM_PROD_THREAD+i].join();
auto end = std::chrono::high_resolution_clock::now();
elapsed_seconds = end - start;
std::cout << "Enqueue and Dequeue 1 million item in:"
<< elapsed_seconds.count() << std::endl;
for ( int i = 0; i < NUM_PROD_THREAD; i++ )
thrd[i].join();
for (int i = 0; i < 1000000; ++i)
delete dataArray[i];
}
Related
I'm trying to create a program that solves the problem of dining philosophers using posix threads. However, I got stuck at the very beginning, since the output of std :: cout << id + 1 << "PHILOSOPHER: thinking" << std :: endl; ++ i; is incorrect and id takes too large values. Please point out my mistake.
pthread_mutex_t mutexSpoon[PHILOSOPHERS];
pthread_t createThread(int number){
pthread_t id;
int rc = pthread_create(&id, NULL, philFunc, &number);
if(rc){
abort();
}
return id;
}
void *philFunc(void *arg){
srand(time(0));
int id = *(int*)arg;
int leftSpoon = (id>0) ? id-1 : PHILOSOPHERS;
int rightSpoon = id;
int temp;
int i = 0;
while(i < 10){
usleep((200 - 50) * ( (double)rand() / RAND_MAX ) + 50);
std::cout << id+1 << " PHILOSOPHER: thinking" << std::endl; ++i;
}
return nullptr;
}
main.cpp
using namespace std;
extern pthread_mutex_t mutexSpoon[PHILOSOPHERS];
int main(){
setlocale(LC_ALL, "rus");
for(int i = 0; i < PHILOSOPHERS; ++i)
pthread_mutex_init(&mutexSpoon[i], NULL);
vector<pthread_t> vecID(PHILOSOPHERS);
for(int i = 0; i < PHILOSOPHERS; ++i)
vecID[i] = createThread(i);
for(int i = 0; i < PHILOSOPHERS; ++i)
pthread_join(vecID[i], NULL);
for(int i = 0; i < PHILOSOPHERS; ++i)
pthread_mutex_destroy(&mutexSpoon[i]);
return 0;
}
thread function uses an address for argument, which you pass as an address to a local variable of function createThread - number. The life span of argument should be not shorter than thread, so exactly same as the mutex. Using your snippets as base, I created an example which works around the issue:
#include <iostream>
#include <cstdlib>
#include <vector>
#include <pthread.h>
#include <unistd.h>
void *philFunc(void *arg);
#define PHILOSOPHERS 10
struct Philosopher {
pthread_mutex_t mutexSpoon;
pthread_t id;
int no;
};
Philosopher philosophers[PHILOSOPHERS] = {};
pthread_t createThread(int& number){
pthread_t id;
int rc = pthread_create(&id, NULL, philFunc, &number);
if(rc){
abort();
}
return id;
}
void *philFunc(void *arg){
srand(time(0));
int id = *(int*)arg;
int leftSpoon = (id>0) ? id-1 : PHILOSOPHERS;
int rightSpoon = id;
int temp;
int i = 0;
while(i < 10){
usleep((200 - 50) * ( (double)rand() / RAND_MAX ) + 50);
std::cout << id+1 << " PHILOSOPHER: thinking" << std::endl; ++i;
}
return nullptr;
}
extern pthread_mutex_t mutexSpoon[PHILOSOPHERS];
int main(){
setlocale(LC_ALL, "rus");
for(int i = 0; i < PHILOSOPHERS; ++i) {
pthread_mutex_init(&philosophers[i].mutexSpoon, NULL);
philosophers[i].no = i;
philosophers[i].id = createThread(philosophers[i].no);
}
for(int i = 0; i < PHILOSOPHERS; ++i)
pthread_join(philosophers[i].id, NULL);
for(int i = 0; i < PHILOSOPHERS; ++i)
pthread_mutex_destroy(&philosophers[i].mutexSpoon);
return 0;
}
As you see, there is now own structure Philosopher for each thread, storing its data as it should be. While philosophers here is an array, it can be any other container as long as its elements live long enough and aren't changing their addresses (requirement for some implementations of the pthread mutex).
Note that createThread(int& number) now takes its argument by reference, so the expression &number would get address of the actual object's location, not of local variable.
This code can be simpler, if using C++ thread support and std::future.
I have a struct with multi variables packed which is not aligned.Then I make an array of the struct and set up a reader thread and a writer thread to update the variable concurrently.I find error value which only half of the variable is updated from output.I guess this is caused by one variable lay in two cache lines.Change the variable to atomic doesn't solve the problem.So,is there a way to solve this without memory aligned?
#include <thread>
#include <atomic>
#include <iostream>
#include <mutex>
#include <stdalign.h>
#pragma pack(1)
struct Foo {
uint64_t key;
uint64_t key2;
uint64_t key3;
uint32_t key4;
uint64_t key5;
};
#pragma pack()
const int block_size = 10;
uint64_t keys[10];
void printEle(const Foo* ele) {
std::cout << "Key " << ele->key
<< " key2 " << ele->key2
<< " key3 " << ele->key3
<< " key5 " << ele->key5 << std::endl;
}
void reader(Foo* list) {
for (int i = 0; i < 1000000; ++i) {
for (int j = 0; j < block_size; ++j) {
Foo* ele = reinterpret_cast<Foo*>(list + j);
printEle(ele);
}
}
}
void writer(Foo* list) {
for (int i = 0; i < 1000000; ++i) {
for (int j = 0; j < block_size; ++j) {
Foo* ele = reinterpret_cast<Foo*>(list + j);
if (i % 2 == 0) {
ele->key = keys[j];
ele->key2 = keys[j];
ele->key3 = keys[j];
ele->key5 = keys[j];
} else {
ele->key = j;
ele->key2 = j;
ele->key3 = j;
ele->key5 = j;
}
}
}
}
void test() {
keys[0]= 1556273083026830079;
keys[1]= 6541630416163430395;
keys[2]= 2310622570815837826;
keys[3]= 12643974306886634761;
keys[4]= 15393333677141345392;
keys[5]= 3591765785331799809;
keys[6]= 5404586990109662840;
keys[7]= 1376395845958874653;
keys[8]= 7620513273959825252;
keys[9]= 16620834775579010287;
Foo* list = new Foo[block_size];
for (int i = 0; i < block_size; ++i) {
uint64_t k = keys[i];
Foo* ele = reinterpret_cast<Foo*>(list + i);
ele->key = k;
ele->key2 = k;
ele->key3 = k;
ele->key4 = 707406378;
ele->key5 = k;
}
std::thread write(writer, list);
std::thread read(reader, list);
read.join();
write.join();
}
int main(int argc, char* argv[]) {
std::cout << "Size " << sizeof(Foo) << std::endl;
test();
std::cout << "done." << std::endl;
return 0;
}
This is my class to print data
class PrintData
{
int data[20];
public:
void setData(int dataValue[])
{
for( int i = 0 ; i < 20; i++)
data[i] = dataValue[i];
}
void Print()
{
for (int i = 0; i < 20; i++)
std::cout << data[i];
std::cout << std::endl;
}
};
This is the main function
int number[20] ;
void updateNumber()
{
for (int i = 0; i < 1000; i++) {
// std::this_thread::sleep_for(std::chrono::milliseconds(1000));
for (int k = 0; k < 20; k++)
number[k] = k;
// after one iteration it should wait and after the print.Print() is executed than it should again update the data
}
}
int main()
{
PrintData print;
std::thread t(&updateNumber);
while (true)
{
// if upDateNumber has updated all the numbers than only than only set the number
print.setData(number);
print.Print();
}
return 0;
}
After iteration has finished in the thread it should wait for the print.setData(number) function to execute , once this function has executed it should again update the data.
if print.setData(number) is called and the thread is still not finished updating the array than print.setData(number) should not update the data.
A simple example of a producer consumer problem involving conditional variables would be something like that:
#include <thread>
#include <mutex>
#include <iostream>
#include <condition_variable>
#include <vector>
#include <unistd.h>
#define MAX_SIZE 2
struct Task
{
std::condition_variable m_cond;
std::mutex m_lock;
Task(){}
};
std::vector<int> m_data;
Task m_producer;
Task m_consumers[MAX_SIZE];
std::mutex m_lock;
static bool s_Busy = false;
static void producer(void)
{
for(;;)
{
size_t input=0;
std::unique_lock<std::mutex> lock{m_lock};//{m_producer.m_lock};
if (!s_Busy) {
std::cout << "Enter a number: ";
std::cin >> input;
std::cout << "Producer waiting..." << std::this_thread::get_id() << "\r\n";
m_producer.m_cond.wait(lock);
}
s_Busy = true;
if (m_data.size() < input) {
for (size_t i=0; i < input; ++i){
m_data.push_back(i);
}
}
for (int i=0; i < MAX_SIZE; ++i) {
m_consumers[i].m_cond.notify_one();
}
lock.unlock();
}
}
static void consumers(void)
{
for(;;)
{
std::unique_lock<std::mutex> lock{m_lock};
if (!s_Busy) {
std::cout <<"Consumers waiting....!" << std::this_thread::get_id() << "\r\n";
for (int i=0; i < MAX_SIZE; ++i) {
m_consumers[i].m_cond.notify_all();
}
}
if (!m_data.empty()) {
std::cout << "Remove: " << m_data.at(0) << std::endl;
m_data.erase(m_data.begin());
usleep(1);
}
s_Busy = false;
m_producer.m_cond.notify_one();
lock.unlock();
}
}
int main()
{
std::vector<std::thread> cnsmrs;
std::thread usr{producer};
for (int i=0; i < MAX_SIZE; ++i)
cnsmrs.push_back(std::thread{consumers});
usr.join();
for(int i=0 ; i < MAX_SIZE; ++i)
cnsmrs.at(i).join();
return 0;
}
You can play with different logic and implementation.
I hope this help you: (semaphore is a self implementation of Qt's QSemaphore)
#include <thread>
#include <mutex>
#include <condition_variable>
#include <iostream>
class semaphore
{
public:
semaphore(int n = 0) : m_n(n)
{
}
public:
void acquire(int n = 1)
{
std::unique_lock <std::mutex> lk(m_buf_mut);
while (m_n < n) {
m_cv.wait(lk);
}
m_n -= n;
}
void release(int n = 1)
{
{
std::unique_lock <std::mutex> lk(m_buf_mut);
m_n += n;
}
m_cv.notify_all();
}
bool tryAcquire(int n = 1)
{
std::unique_lock <std::mutex> lk(m_buf_mut);
if (m_n >= n) {
m_n -= n;
return true;
}
return false;
}
private:
std::mutex m_buf_mut;
int m_n;
std::condition_variable m_cv;
};
class PrintData
{
int data[20];
public:
void setData(int dataValue[])
{
for( int i = 0 ; i < 20; i++)
data[i] = dataValue[i];
}
void Print()
{
for (int i = 0; i < 20; i++)
std::cout << data[i];
std::cout << std::endl;
}
};
int number[20] ;
void updateNumber(semaphore *freeSem, semaphore *usedSem)
{
for (int i = 0; i < 1000; i++) {
// std::this_thread::sleep_for(std::chrono::milliseconds(1000));
//
freeSem->acquire();
for (int k = 0; k < 20; k++)
number[k] = k;
usedSem->release();
// after one iteration it should wait and after the print.Print() is executed than it should again update the data
}
}
int main()
{
PrintData print;
semaphore freeSem(1);
semaphore usedSem(0);
std::thread t(&updateNumber, &freeSem, &usedSem);
while (true)
{
// if upDateNumber has updated all the numbers than only than only set the number
usedSem.acquire();
print.setData(number);
print.Print();
freeSem.release();
}
return 0;
}
I wanted to learn how threads work, and I tried to make a program, which would use 2 threads, to copy a picture (just to test my newly acquired threading skills) . But I bumped into an error, probably because my interval (created by the interval function) is only working ( I believe) with one dimensional arrays.How can I change my program , to correctly create intervals , which work on 2 dimensional arrays, such as pictures ?
#include <iostream>
#include <vector>
#include <time.h>
#include <thread>
#include <mutex>
#include <png++/png.hpp>
std::mutex my_mutex;
std::vector<int> interval(int max, int n_threads)
{
std::vector<int> intervallum;
int ugras = max / n_threads;
int maradek = max % n_threads;
int n1 = 0;
int n2;
intervallum.push_back(n1);
for (int i = 0; i < n_threads; i++)
{
n2 = n1 + ugras;
if (i == n_threads - 1)
n2 += maradek;
intervallum.push_back(n2);
n1 = n2;
}
return intervallum;
}
void create_image(png::image<png::rgb_pixel> image, png::image<png::rgb_pixel> new_image, int start, int end)
{
std::lock_guard<std::mutex> lock(my_mutex);
for (int i = start; i < end; i++)
for (int j = start; j < end; j++)
{
new_image[i][j].red = image[i][j].red;
new_image[i][j].blue = image[i][j].blue;
new_image[i][j].green = image[i][j].green;
}
}
int main()
{
png::image<png::rgb_pixel> png_image("mandel.png");
int image_size = png_image.get_width() * png_image.get_height();
png::image<png::rgb_pixel> new_image(png_image.get_width(), png_image.get_height());
time_t start, end;
time(&start);
int size = 2;
std::vector<std::thread> threads;
std::vector<int> stuff_interval = interval(image_size, size);
for (int i = 0; i < size-1; i++)
threads.push_back(std::thread(create_image, std::ref(png_image), std::ref(new_image), stuff_interval[i], stuff_interval[i + 1]));
for (auto& i : threads)
i.join();
create_image(png_image,new_image,stuff_interval[size-2],stuff_interval[size-1]);
new_image.write("test.png");
time(&end);
std::cout << (start - end) << std::endl;
return 0;
}
Okay , I found a way around it (this way I am not getting segmentation error, but it does not copy the image correctly, the new image is fully black, here is the code :
EDIT : seems like, I was passing wrong the pictures, that is the reason why the picture was black.
#include <iostream>
#include <vector>
#include <time.h>
#include <thread>
#include <mutex>
#include <png++/png.hpp>
std::mutex my_mutex;
std::vector<int> interval(int max, int n_threads)
{
std::vector<int> intervallum;
int ugras = max / n_threads;
int maradek = max % n_threads;
int n1 = 0;
int n2;
intervallum.push_back(n1);
for (int i = 0; i < n_threads; i++)
{
n2 = n1 + ugras;
if (i == n_threads - 1)
n2 += maradek;
intervallum.push_back(n2);
n1 = n2;
}
return intervallum;
}
void create_image(png::image<png::rgb_pixel>& image, png::image<png::rgb_pixel>& new_image, int start, int end)
{
std::lock_guard<std::mutex> lock(my_mutex);
for (int i = start; i < end; i++)
for (int j = 0; j < image.get_height(); j++)
{
new_image[i][j].red = image[i][j].red;
new_image[i][j].blue = image[i][j].blue;
new_image[i][j].green = image[i][j].green;
}
}
int main()
{
png::image<png::rgb_pixel> png_image("mandel.png");
int image_size = png_image.get_width() * png_image.get_height();
png::image<png::rgb_pixel> new_image(png_image.get_width(), png_image.get_height());
time_t start, end;
time(&start);
int size = 2;
std::vector<std::thread> threads;
std::vector<int> stuff_interval = interval(png_image.get_width(), size);
new_image.write("test2.png");
for (int i = 0; i < size - 1; i++)
threads.push_back(std::thread(create_image, std::ref(png_image), std::ref(new_image), stuff_interval[i], stuff_interval[i + 1]));
for (auto &i : threads)
i.join();
create_image(std::ref(png_image), std::ref(new_image), stuff_interval[size - 1], stuff_interval[size]);
new_image.write("test.png");
time(&end);
std::cout << (start - end) << std::endl;
return 0;
}
I am java programmer but still finding this c++ code not working. When I run this project (Codeblock), I get the segmentation fault. I searched internet but couldn't find exactly what causing this error.
1) main.cpp
#include "performancetest.h"
int main(int argc, char *argv[])
{
performancetest *Obj = new performancetest;
Obj->test1();
Obj->test2();
Obj->~performancetest();
return 0;
}
2) performancetest.cpp
#include "performancetest.h"
#include <iostream>
using namespace std;
performancetest::performancetest()
{
}
performancetest::~performancetest()
{
}
void performancetest::test1()
{
clock_t t1, t2;
const int g_n = 500;
float TestData[g_n][g_n][g_n];
t1 = clock();
for (int k=0; k<g_n; k++) // K
{
for (int j=0; j<g_n; j++) // J
{
for (int i=0; i<g_n; i++) // I
{
TestData[i][j][k] = 0.0f;
}
}
}
//start time t2
t2 = clock();
double val = this->diffclock(t1, t2);
cout << "Time: " << val << endl;
}
void performancetest::test2()
{
clock_t t1, t2;
const int g_n = 500;
float TestData[g_n][g_n][g_n];
//start time t1
t1 = clock();
for (int k=0; k<g_n; k++) // K
{
for (int j=0; j<g_n; j++) // J
{
for (int i=0; i<g_n; i++) // I
{
TestData[i][j][k] = 0.0f;
}
}
}
//start time t2
t2 = clock();
double val = this->diffclock(t1, t2);
cout << "Time: " << val << endl;
}
double performancetest::diffclock(clock_t clock1,clock_t clock2)
{
double diffticks=clock1-clock2;
double diffms=(diffticks)/(CLOCKS_PER_SEC/1000);
return diffms;
}
3)performancetest.h
#ifndef PERFORMANCETEST_H
#define PERFORMANCETEST_H
#include <time.h>
class performancetest
{
public:
performancetest();
void test1();
double diffclock(clock_t, clock_t);
void test2();
virtual ~performancetest();
protected:
private:
};
#endif // PERFORMANCETEST_H
And, here comes, segmentation fault as shown in below picture
Your multidimensional arrays are too big for the stack, so a stack overflow exception is thrown. You must allocate TestData on the heap.
You can do it like this:
const int g_n = 500;
typedef float MultiDimArray[g_n][g_n];
MultiDimArray* TestData = new MultiDimArray[g_n];
//...
delete[] TestData; //deallocate