i'm trying to multithread a program wich take word from file then hash these and write to another.
If i make it without multithreading it's really fast, it's able to use only 15-20% of the CPU and has out like 300.000line/s
But when i tryied with multithreading it only slow down and start to hash only at 17000lines/s, can you please help me?
Thanks
#include <iostream> // std::cout, std::streambuf, std::streamsize
#include <fstream> // std::ifstream
#include <string>
#include <thread>
#include "md5.h"
using namespace std;
static const int num_threads = 10;
void call_from_thread(int tid) {
cout << "Launched by thread " << tid << std::endl;
int cl = 0;
int uscita = 0;
int parole = 0;
char* contents;
ifstream istr("test.txt");
if (istr) {
streambuf * pbuf = istr.rdbuf();
streamsize size = pbuf->pubseekoff(0, istr.end);
pbuf->pubseekoff(0, istr.beg); // rewind
contents = new char[size];
pbuf->sgetn(contents, size);
istr.close();
ofstream myfile;
myfile.open("out.txt");
do {
string prova("");
uscita = 0;
do {
if (contents[cl] == '\n') {
uscita = 1;
}
prova += contents[cl];
cl += 1;
} while (uscita != 1);
parole += 1;
//cout << prova << ":" << md5(prova) << endl;
myfile << prova << ":" << md5(prova) << endl;
} while (parole != 9586054);
myfile.close();
}
}
int main()
{
thread t[num_threads];
//Launch a group of threads
for (int i = 0; i < num_threads; ++i) {
t[i] = thread(call_from_thread, i);
}
cout << "Launched from the main\n";
//Join the threads with the main thread
for (int i = 0; i < num_threads; ++i) {
t[i].join();
}
return 0;
}
What you see here is that all the threads are reading the same file and doing the same work. This results in that the file has to be read num_threads times, probably causing a slowdown of about num_threads (since file I/O probably is on the critical path) or maybe even more because of more cache misses.
You could try reading the entire file into memory, splitting it up between your threads and then having each thread process a subset of that files content.
Related
Like in my title, my goal is to use the producer consumer model to mimic the linux cat command(ouput the contents of the file to screen). When I try running the program using lab3 5 animals.txt (lab3 the compiled/renamed executable, 5 being the size of the bounded buffer, and test_alarm.cpp being the name of the file to be printed), lines in the ouput seem to have been outputted multiple times. I am not sure what could be causing this(maybe I am not deleting the contents of the buffer properly? Though I'm not sure why that would cause multiple prints), so any help is appreciated.
An oddpoint is when I change the positions of 'ifstream infile' from global to the producer func, the output seems to change as well.
Animals.txt (output using linux's 'cat')
I have a dog
Dogs are large
I also have a cat
Animals.txt (output using my program)
I have a dog
Dogs are large
I also have a cat
I have a dog
Dogs are large
I also have a cat
I have a dog
Dogs are large
I have a dog
(Latest output)
counter in prod is: 1
counter in prod is: 2
counter in prod is: 3
I have a dog
counter in consumer is: 3
Dogs are large
counter in consumer is: 2
Segmentation fault (core dumped)
code
#include <iostream>
#include <string>
#include <semaphore.h>
#include <fstream>
#include <unistd.h>
#include <mutex>
#include <condition_variable>
#include <thread>
using namespace std;
int counter = 0; //number of words in buffer
int buffmax; //size of buffer
string filename;
mutex mu;
condition_variable cond;
string* buffer;
bool flag = false;
ifstream infile;
void produce()
{
infile.open(filename);
while(getline(infile, temp)) //!infile.eof()
{
unique_lock<mutex> locker(mu);
cond.wait(locker, []() {return counter < buffmax; }); //if true then produce, if false then wait
buffer[counter] = temp;
//infile >> buffer[counter];
counter++;
//cout << "counter in prod is: " << counter << endl;
locker.unlock();
cond.notify_one(); //notify consumer thread
}
infile.close();
flag = true;
}
void consume()
{
while(true)
{
if (flag == true && counter <= 0)
break;
unique_lock<mutex> locker(mu);
cond.wait(locker, []() {return counter > 0; });
cout << buffer[0] << endl;
string *x = new string[buffmax];
for(int i = 0; i < counter-1; i++) // reason for seg fault?
{
x[i]=buffer[i+1];
}
delete [] buffer;
buffer = NULL
buffer = x;
x = NULL;
//cout << "counter in consumer is: " << counter << endl;
counter--;
locker.unlock();
cond.notify_one(); //notify producer thread
}
}
int main(int argc, char* argv[])
{
string s = argv[1];
buffmax = stoi(s); //size of the buffer
filename = string(argv[2]); // name of file to be parsed
buffer = new string[buffmax]; // creating the bounded buffer
thread t1(produce);
thread t2(consume);
t1.join();
t2.join();
return 0;
}
In consume
for (int i = 0; i < counter; i++)
cout << buffer[i] << endl; //output contents of buffer
counter--;
consumes counter buffers and only decreases counter by 1. This could be solved by reducing counter to zero, but that pretty much defeats the point of using the buffer in the first place.
A potential proper solution would be to replace the linear buffer with a circular buffer or a queue.
Thanks in advance for any help.
Trying to make a program that would create 6 threads, then each 2 seconds randomly choose one and make it print its number. I am obviously doing something wrong, because it just keeps printing 0-1-2-3-4-5 endlessly. The code is below.
Main question is, what should i do to make random threads unlock?
#include <thread>
#include <memory>
#include <chrono>
#include <condition_variable>
std::condition_variable* cv = new std::condition_variable();
std::mutex cv_m;
void threadFunc(std::shared_ptr<bool> flag2, int id)
{
while (true)
{
std::unique_lock<std::mutex> lock(cv_m);
cv->wait(lock);
if (true)
if (*flag2) std::cout << "Thread" << " " << id << std::endl;
}
}
int main() {
std::shared_ptr<bool> f2 = std::make_shared<bool>(false);
std::thread threads[6];
for (int i = 0; i < 6; i++)
threads[i] = std::thread(threadFunc, f2, i);
*f2 = true;
while (true)
{
cv->notify_one();
std::this_thread::sleep_for(std::chrono::seconds(2));
}
return 0;
}
You can use a condition variable for each thread, it should be false for each thread at the beginning, then change a random condition variable to true and notify all, this will make a random thread to wake up (the thread that owns that condition variable)
here is the full solution
#include <iostream>
#include <thread>
#include <mutex>
#include <condition_variable>
#include <unistd.h>
#include "UserInterruptHandler.h"
using namespace std;
UserInterruptHandler h;
condition_variable conditionalVariable;
mutex mtx;
bool flag = true;
void myMethod(int id, bool *canWork) {
unique_lock<mutex> ul(mtx);
while (flag) {
conditionalVariable.wait(ul,[=]{return *canWork;});
if(!flag)
break;
cout << "thread " << id << endl;
}
cout << "thread " << id << " exits.." << endl;
}
int main() {
cout << "input thread count" << endl;
int n;
cin >> n;
thread myThreads[n];
bool *canWork = new bool[n];
for (int i = 0; i < n; i++) {
canWork[i] = false;
myThreads[i] = thread(myMethod, i + 1, &canWork[i]);
}
while (!h.checkInterruption()) {
int i = rand() % n;
canWork[i] = true;
conditionalVariable.notify_all();
canWork[i] = false;
usleep(1000);
}
flag = false;
int i = 0;
for (thread &th:myThreads) {
canWork[i++] = true;
conditionalVariable.notify_all();
if (th.joinable())
th.join();
}
}
notice that here I am using header UserInterruptHandler.h to handle CTR+C event to end all threads gracefully
I have a multi-threaded application(OpenMP), in which each thread do some calculations and find the value of _pre.
Then a file with name "file_pre.txt" is opened and write the part of the string starting from index _sub and _end.
It should not happen that two threads having the same file name open the file for writing.
Hence the writing part I wrote in critical section but it's expensive because I would like multiple threads having different file name to be able to write in the respective files simultaneously, and only lock them out when they have same value in _pre (same file name) (the updating thread could wait for the other threads to finish).
Does anyone have a solution?
PART OF CODE:
ofstream outfile;
#pragma omp parallel for num_threads(8) schedule(static) private(it) shared(array)
for (it = 0; it < array.size(); ++it)
{
int _pre, _sub = 0, _end;
if(---)
{
_pre = xyz;
_end = abc;
ostringstream strCounter;
strCounter << _pre;
string result = "file" + strCounter.str() + ".txt";
#pragma omp critical(outfile)
{
outfile.open(result.c_str(), std::ios_base::app);
if (outfile.is_open())
{
for (int count = _sub; count < _end; count++)
{
outfile << buffer[count];
}
outfile << "\n";
outfile.close();
} else cout << "Unable to open file";
_sub = _abc;
}
}
else
{
_pre = _xyz_;
_end = _abc_;
ostringstream strCounter;
strCounter << _pre;
string result = "file" + strCounter.str() + ".txt";
#pragma omp critical(outfile)
{
outfile.open(result.c_str(), std::ios_base::app);
if (outfile.is_open())
{
for (int count = _sub; count < _end; count++)
{
outfile << buffer[count];
}
outfile << "\n";
outfile.close();
} else cout << "Unable to open file";
_sub = _abc;
}
}
}
I am trying to remove a deadlock from a program. The problem is that the program keeps giving me aborted. The point is to write data to a file. But when a deadlock occurs the thread should wait and continue later on rather than aborting.
#include <iostream>
#include <unistd.h>
#include <fstream>
#include <vector>
#include <thread>
#include <mutex>
#include <exception>
#include <condition_variable>
using namespace std;
std::mutex mtx;
ofstream myfile;
condition_variable cv;
void lock()
{
mtx.lock();
}
void unlock()
{
mtx.unlock();
}
void writeToFile(int threadNumber){
myfile << "[";
for(int j =1; j <= 10; j++){
int num = j * threadNumber;
string line = std::to_string(num) + " ";
myfile << line;
}
myfile << "]";
//mtx.unlock();
}
void threadFunction(int threadNumber)
{
// int x = 0;
// int y = 0;
try{
lock();
if (threadNumber % 2 == 0)
sleep(rand() % 4 + 1);
writeToFile(threadNumber);
throw exception();
unlock();
}
catch(...){
cout << "Something went wrong!" << endl;
throw exception();
}
}
int main (int argc, char const *argv[]) {
myfile.open ("mutex.txt");
std::set_terminate([](){
std::cout << "Unhandled exception\n";
// Here I want to fix the deadlock if something goes wrong. But I keep getting Abroted
});
int len;
cout << "Enter Number of threads : ";
cin >> len;
std::thread t[len + 1];
for(int i =1; i <= len;i++){
t[i] = std::thread(threadFunction, i);
cout << "Created Thread : " <<t[i].get_id()<<endl;
}
for(int i =1; i <= len;i++){
t[i].join();
}
myfile.close();
return 0;
}
Output
Enter Number of threads : 5
Created Thread : 1992414288
Created Thread : 1982854224
Created Thread : 1974465616
Created Thread : 1966077008
Created Thread : 1957688400
Something went wrong!
Unhandled exception
Aborted
How can I avoid the aborted and let the thread wait.
Update: Included all the relevant code...
Do not lock() / unlock() mutexes manually. That's error prone. Use guards instead. mtx.unlock(); after throwing the exception won't be called.
Here's how your code should look:
try{
std::lock_guard<std::mutex> lock(mtx);
if (threadNumber % 2 == 0)
sleep(rand() % 4 + 1);
writeToFile(threadNumber);
throw exception();
}
catch(...){
cout << "Something went wrong!" << endl;
throw exception();
}
To avoid deadlocks in general that locking and unlocking of multiple mutexes needs to be done in reverse order. So if one thread uses something like
{
std::lock_guard<std::mutex> lock1(mtx1);
std::lock_guard<std::mutex> lock2(mtx2);
// ... exception thrown somewhere
}
this is guaranteed since the destructors of std::lock_guard are guaranteed to be called in the reverse order these were constructed.
I think it might be something related to the way my pointers are used/initialized in my struct, but I'm not completely sure. I use 'g++ -lpthread main.cpp' to compile. The program just hangs in Linux, while executing properly in windows. The program doesn't even spit out a cout I put in the beginning of the code for debugging purposes.
#include "pthread.h"
#include "semaphore.h"
#include "time_functions.h"
#include <iostream>
#include <fstream>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
using namespace std;
struct vars {
char buffer[10][1000];
int put;
int take;
sem_t openSlot;
sem_t slotInactive;
sem_t newData;
ifstream readFile;
ofstream writeFile;
};
void *write(void *in) //consumer, writes data to file and deletes data from buffer
{
vars* writeVars = (vars*)in;
while (true)
{
sem_wait(&(*writeVars).newData);
sem_wait(&(*writeVars).slotInactive);
if ((*writeVars).buffer[(*writeVars).take % 10][0] != '$')
{
(*writeVars).writeFile << (*writeVars).buffer[(*writeVars).take % 10];
if ((*writeVars).readFile.eof() != true)
{
(*writeVars).writeFile << endl;
}
else
{
break;
}
}
(*writeVars).take++;
sem_post(&(*writeVars).openSlot);
sem_post(&(*writeVars).slotInactive);
}
pthread_exit(0);
return 0;
}
void *read(void *in) //producer, reads data into buffer
{
vars* readVars = (vars*)in;
char read_line[1000];
while ((*readVars).readFile.getline(read_line, 1000))
{
sem_wait(&(*readVars).openSlot);
sem_wait(&(*readVars).slotInactive);
strcpy((*readVars).buffer[(*readVars).put % 10], read_line);
(*readVars).put++;
sem_post(&(*readVars).slotInactive);
sem_post(&(*readVars).newData);
}
sem_wait(&(*readVars).openSlot);
sem_wait(&(*readVars).slotInactive);
(*readVars).buffer[(*readVars).put % 10][0] = '$';
sem_post(&(*readVars).slotInactive);
sem_post(&(*readVars).newData);
pthread_exit(0);
return 0;
}
int main(int argc, char* argv[])
{
char pause[10];
vars *varsPointer, var;
varsPointer = &var;
var.take = 0;
var.put = 0;
var.writeFile.open(argv[2], ios::out);
var.readFile.open(argv[1], ios::in);
start_timing();
sem_init(&var.openSlot, 0, 10);
sem_init(&var.slotInactive, 0, 1);
sem_init(&var.newData, 0, 0);
pthread_t read_Thread, write_Thread;
pthread_create(&read_Thread, NULL, read, varsPointer);
pthread_create(&write_Thread, NULL, write, varsPointer);
pthread_join(read_Thread, NULL);
pthread_join(write_Thread, NULL);
sem_destroy(&var.openSlot);
sem_destroy(&var.slotInactive);
sem_destroy(&var.newData);
stop_timing();
var.readFile.close();
var.writeFile.close();
//Display timer
cout << "wall clock time (ms):" << get_wall_clock_diff() * 1000 << '\n';
cout << "cpu time (ms):" << get_CPU_time_diff() * 1000 << '\n';
cout << "Type Something and Press Enter To Continue";
cin >> pause; //Just used to keep cmd promt open in Windows after program execution
return 0;
}