The task:
I have many (500) .txt files. Each file contains other filenames. I have a filename to begin with. I need to copy that file and all files mentioned in it, and so on, to a subdirectory copy/. The solution must be multi-threaded.
I wrote the following code, but it executes a bit faster with one thread than with two. What's my mistake and how can I change this?
#include <iostream>
#include <fstream>
#include <string>
#include <queue>
#include <set>
#include <cassert>
#include <chrono>
#include <mutex>
#include <condition_variable>
#include <vector>
#include <thread>
template <typename T>
class UQueue {
private:
std::queue<T> m_queue;
std::set<T> m_set;
public:
bool push(const T& t) {
if (m_set.insert(t).second) {
m_queue.push(t);
return true;
}
return false;
}
void pop() {
assert(!m_queue.empty());
m_queue.pop();
}
const T& front() const {
return m_queue.front();
}
bool empty(){
return m_queue.empty();
}
int size_of_set(){
return m_set.size();
}
};
template <class T>
class SafeQueue {
public:
SafeQueue(void)
: q()
, m()
, c()
{}
~SafeQueue(void) {}
void enqueue(T t) {
std::lock_guard<std::mutex> lock(m);
q.push(t);
c.notify_one();
}
T dequeue(void) {
std::unique_lock<std::mutex> lock(m);
while(q.empty()) {
c.wait(lock);
}
T val = q.front();
q.pop();
return val;
}
int size_of_set() {
return q.size_of_set();
}
bool empty() {
return q.empty();
}
private:
UQueue<T> q;
mutable std::mutex m;
std::condition_variable c;
};
void copyFile(std::string fromFile, std::string toFile){
std::ifstream src(fromFile, std::ios::binary);
std::ofstream dst(toFile, std::ios::binary);
dst << src.rdbuf();
}
std::set<std::string> extract( std::string file_name ) {
std::ifstream file(file_name) ;
std::string line;
std::set<std::string> temp;
while (!file.eof()){
file >> line;
temp.insert(line);
}
return temp;
}
class Crawler{
public:
void init();
void work();
void thread_init();
int processed(){return q.size_of_set();};
Crawler(std::string start, int thr);
std::string target(){return m_conf.m_targetDir;};
private:
struct Config{
int m_threads;
std::string m_startFile = "1.txt";
std::string m_targetDir = "copy";
};
Config m_conf;
SafeQueue<std::string> q;
};
void Crawler::init(){
q.enqueue(m_conf.m_startFile);
}
Crawler::Crawler(std::string start, int thr){
m_conf.m_startFile = start;
m_conf.m_threads = thr;
}
void Crawler::work(){
std::string path;
std::set<std::string> tempSet;
while (!q.empty()){
path = q.dequeue();
copyFile(path, this->target() + "/" + path);
tempSet = extract(path);
for (auto f : tempSet){
q.enqueue(f);
}
}
}
void Crawler::thread_init(){
std::vector<std::thread> workers;
for (int i = 0; i < m_conf.m_threads; ++i){
workers.push_back(std::thread(&Crawler::work, this));
}
for (auto& thr : workers){
thr.join();
}
}
int main(){
Crawler c("1.txt", 2);
auto start = std::chrono::high_resolution_clock::now();
c.init();
c.thread_init();
auto end = std::chrono::high_resolution_clock::now();
std::cout << c.processed()<<
" " << std::chrono::duration<double, std::milli>(end - start).count() << " ms\n";
}
Related
I want to calculate number of even numbers among all pairwise sums till 100000. And I want to do it using threadpools. Previously I did it in a static way, i.e., I allocated work to all the threads in the beginning itself. I was able to achieve linear speedup in that case. But the bottleneck is that the threads which started early, finished early (because there were less pairs to compute). So instead of that I want to allocate work to the threads dynamically, i.e., I will initially assign some work to the threads and as soon as they complete the work, they come back to take more work from the queue. Below is my threadpool code,
main.cpp :
#include <iostream>
#include <random>
#include<chrono>
#include<iomanip>
#include<future>
#include<vector>
#include "../include/ThreadPool.h"
std::random_device rd;
std::mt19937 mt(rd());
std::uniform_int_distribution<int> dist(-10, 10);
auto rnd = std::bind(dist, mt);
int thread_work;
long long pairwise(const int start) {
long long sum = 0;
long long counter = 0;
for(int i = start+1; i <= start+thread_work; i++)
{
for(int j = i-1; j >= 0; j--)
{
sum = i + j;
if(sum%2 == 0)
counter++;
}
}
//std::cout<<counter<<std::endl;
return counter;
}
int main(int argc, char *argv[])
{
// Create pool with x threads
int x;
std::cout<<"Enter num of threads : ";
std::cin>>x;
std::cout<<"Enter thread_work : ";
std::cin>>thread_work;
ThreadPool pool(x);
// Initialize pool
pool.init();
int N = 100000;
long long res = 0;
auto start = std::chrono::high_resolution_clock::now();
for(int i = 0; i < N; i = i + thread_work)
{
std::future<long long int> fut = pool.submit(pairwise,i);
res += fut.get();
}
std::cout<<"total is "<<res<<std::endl;
pool.shutdown();
auto end = std::chrono::high_resolution_clock::now();
double time_taken = std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count();
time_taken *= 1e-9;
std::cout << "Time taken by program is : " << std::fixed << time_taken << std::setprecision(9)<<" secs"<<std::endl;
return 0;
}
my SafeQueue.h :
#pragma once
#include <mutex>
#include <queue>
// Thread safe implementation of a Queue using an std::queue
template <typename T>
class SafeQueue {
private:
std::queue<T> m_queue;
std::mutex m_mutex;
public:
SafeQueue() {
}
SafeQueue(SafeQueue& other) {
//TODO:
}
~SafeQueue() {
}
bool empty() {
std::unique_lock<std::mutex> lock(m_mutex);
return m_queue.empty();
}
int size() {
std::unique_lock<std::mutex> lock(m_mutex);
return m_queue.size();
}
void enqueue(T& t) {
std::unique_lock<std::mutex> lock(m_mutex);
m_queue.push(t);
}
bool dequeue(T& t) {
std::unique_lock<std::mutex> lock(m_mutex);
if (m_queue.empty()) {
return false;
}
t = std::move(m_queue.front());
m_queue.pop();
return true;
}
};
and my ThreadPool.h :
#pragma once
#include <functional>
#include <future>
#include <mutex>
#include <queue>
#include <thread>
#include <utility>
#include <vector>
#include "SafeQueue.h"
class ThreadPool {
private:
class ThreadWorker {
private:
int m_id;
ThreadPool * m_pool;
public:
ThreadWorker(ThreadPool * pool, const int id)
: m_pool(pool), m_id(id) {
}
void operator()() {
std::function<void()> func;
bool dequeued;
while (!m_pool->m_shutdown) {
{
std::unique_lock<std::mutex> lock(m_pool->m_conditional_mutex);
if (m_pool->m_queue.empty()) {
m_pool->m_conditional_lock.wait(lock);
}
dequeued = m_pool->m_queue.dequeue(func);
}
if (dequeued) {
func();
}
}
}
};
bool m_shutdown;
SafeQueue<std::function<void()>> m_queue;
std::vector<std::thread> m_threads;
std::mutex m_conditional_mutex;
std::condition_variable m_conditional_lock;
public:
ThreadPool(const int n_threads)
: m_threads(std::vector<std::thread>(n_threads)), m_shutdown(false) {
}
ThreadPool(const ThreadPool &) = delete;
ThreadPool(ThreadPool &&) = delete;
ThreadPool & operator=(const ThreadPool &) = delete;
ThreadPool & operator=(ThreadPool &&) = delete;
// Inits thread pool
void init() {
for (int i = 0; i < m_threads.size(); ++i) {
m_threads[i] = std::thread(ThreadWorker(this, i));
}
}
// Waits until threads finish their current task and shutdowns the pool
void shutdown() {
m_shutdown = true;
m_conditional_lock.notify_all();
for (int i = 0; i < m_threads.size(); ++i) {
if(m_threads[i].joinable()) {
m_threads[i].join();
}
}
}
// Submit a function to be executed asynchronously by the pool
template<typename F, typename...Args>
auto submit(F&& f, Args&&... args) -> std::future<decltype(f(args...))> {
// Create a function with bounded parameters ready to execute
std::function<decltype(f(args...))()> func = std::bind(std::forward<F>(f), std::forward<Args>(args)...);
// Encapsulate it into a shared ptr in order to be able to copy construct / assign
auto task_ptr = std::make_shared<std::packaged_task<decltype(f(args...))()>>(func);
// Wrap packaged task into void function
std::function<void()> wrapper_func = [task_ptr]() {
(*task_ptr)();
};
// Enqueue generic wrapper function
m_queue.enqueue(wrapper_func);
// Wake up one thread if its waiting
m_conditional_lock.notify_one();
// Return future from promise
return task_ptr->get_future();
}
};
I have asked a simpler version of this question before and got the correct answer: Thread pools not working with large number of tasks
Now I am trying to run tasks from an object of a class in parallel using a thread pool. My task is simple and only prints a number for that instance of class. I am expecting numbers 0->9 get printed but instead I get some numbers get printed more than once and some numbers not printed at all. Can anyone see what I am doing wrong with creating tasks in my loop?
#include "iostream"
#include "ThreadPool.h"
#include <chrono>
#include <thread>
using namespace std;
using namespace dynamicThreadPool;
class test {
int x;
public:
test(int x_in) : x(x_in) {}
void task()
{
cout << x << endl;
}
};
int main(void)
{
thread_pool pool;
for (int i = 0; i < 10; i++)
{
test* myTest = new test(i);
std::function<void()> myFunction = [&] {myTest->task(); };
pool.submit(myFunction);
}
while (!pool.isQueueEmpty())
{
std::this_thread::sleep_for(std::chrono::milliseconds(1000));
cout << "waiting for tasks to complete" << endl;
}
return 0;
}
And here is my thread pool, I got this definition from "C++ Concurrency in Action" book:
#pragma once
#include <queue>
#include <future>
#include <list>
#include <functional>
#include <memory>
template<typename T>
class threadsafe_queue
{
private:
mutable std::mutex mut;
std::queue<T> data_queue;
std::condition_variable data_cond;
public:
threadsafe_queue() {}
void push(T new_value)
{
std::lock_guard<std::mutex> lk(mut);
data_queue.push(std::move(new_value));
data_cond.notify_one();
}
void wait_and_pop(T& value)
{
std::unique_lock<std::mutex> lk(mut);
data_cond.wait(lk, [this] {return !data_queue.empty(); });
value = std::move(data_queue.front());
data_queue.pop();
}
bool try_pop(T& value)
{
std::lock_guard<std::mutex> lk(mut);
if (data_queue.empty())
return false;
value = std::move(data_queue.front());
data_queue.pop();
return true;
}
bool empty() const
{
std::lock_guard<std::mutex> lk(mut);
return data_queue.empty();
}
};
class join_threads
{
std::vector<std::thread>& threads;
public:
explicit join_threads(std::vector<std::thread>& threads_) : threads(threads_) {}
~join_threads()
{
for (unsigned long i = 0; i < threads.size(); i++)
{
if (threads[i].joinable())
{
threads[i].join();
}
}
}
};
class thread_pool
{
std::atomic_bool done;
threadsafe_queue<std::function<void()> > work_queue;
std::vector<std::thread> threads;
join_threads joiner;
void worker_thread()
{
while (!done)
{
std::function<void()> task;
if (work_queue.try_pop(task))
{
task();
}
else
{
std::this_thread::yield();
}
}
}
public:
thread_pool() : done(false), joiner(threads)
{
unsigned const thread_count = std::thread::hardware_concurrency();
try
{
for (unsigned i = 0; i < thread_count; i++)
{
threads.push_back(std::thread(&thread_pool::worker_thread, this));
}
}
catch (...)
{
done = true;
throw;
}
}
~thread_pool()
{
done = true;
}
template<typename FunctionType>
void submit(FunctionType f)
{
work_queue.push(std::function<void()>(f));
}
bool isQueueEmpty()
{
return work_queue.empty();
}
};
There's too much code to analyse all of it but you take a pointer by reference here:
{
test* myTest = new test(i);
std::function<void()> myFunction = [&] {myTest->task(); };
pool.submit(myFunction);
} // pointer goes out of scope
After that pointer has gone out of scope you will have undefined behavior if you later do myTest->task();.
To solve that immediate problem, copy the pointer and delete the object afterwards to not leak memory:
{
test* myTest = new test(i);
std::function<void()> myFunction = [=] {myTest->task(); delete myTest; };
pool.submit(myFunction);
}
I suspect this could be solved without using new at all, but I'll leave that up to you.
I would like to write a class that stores recursively the same class in a map.
I have the following source codes:
/* A.hpp */
#include <iostream>
#include <string>
#include <map>
class A
{
private:
int a;
std::map<int, A> data;
bool finishCycle;
public:
A(const int& input ) : a(input), finishCycle(false)
{
func1();
}
void func1();
};
/* A.cpp */
void A::func1()
{
A tmp(*this);
while(!finishCycle)
{
a--;
if (a == 0)
finishCycle=true;
else
func1();
}
data.emplace(tmp.a, tmp);
}
/* main.cpp*/
#include "A.hpp"
int main(int argCount, char *args[])
{
A myObj1 (3);
std::cout << "tst" << std::endl;
return 0;
}
This is putting all the entries in the main map. I would like to have it in a nested sequence:
first entry: <1, A>
inside first entry: <2,A>
inside inside first entry <3,A>
How can I change the script to do this? The nested loops still make me confused.
Maybe this is what you want?
#include <iostream>
#include <map>
class A
{
private:
std::map<int, A> data;
public:
A(int n, int i = 0) {
if (n == i) {
return;
}
data.emplace( i+1, A(n, i + 1) );
}
void test() {
if (!data.empty()) {
std::cout << data.begin()->first << "\n";
data.begin()->second.test();
}
}
};
int main()
{
A a(4);
a.test();
}
I came up with:
#include <iostream>
#include <map>
class A
{
private:
int a;
std::map<int, A> data;
bool finishCycle;
public:
A(const int& input ):a(input), finishCycle(false)
{
func1(*this);
}
void func1(A& tmp);
};
void A::func1(A& tmp)
{
A tmp2(tmp);
tmp2.a--;
while(!finishCycle)
{
if (tmp2.a == 0)
{
(*this).finishCycle=true;
}
else
{
func1(tmp2);
}
}
tmp.data.emplace(tmp2.a, tmp2);
}
int main(int argCount, char *args[])
{
A myObj1 (4);
std::cout << "tst" << std::endl;
return 0;
}
struct taskinfo
{
long int id;
bool cancel;
std::function<void()> func;
std::chrono::system_clock::time_point time;
std::chrono::system_clock::duration interval;
taskinfo(){ }
bool operator<(const taskinfo& task) const {
return time > task.time;
}
public:
taskinfo(long int id, std::function<void()>&& f, const std::chrono::system_clock::time_point& t)
: id(id), func(f),
time(t)
{
cancel = false;
}
}
....
std::priority_queue<taskinfo, std::vector<taskinfo>> tasks;
void at(taskinfo** task){
std::function<void()> threadFunc = [task]() { std::thread((*task)->func).detach(); };
(*task)->func = threadFunc;
tasks.push(**task);
}
In main()..
std::vector<taskinfo*> requests;
for(int i=1; i <=5; i++ )
{
taskinfo* t = new taskinfo(i, [i]{ timeoutFunc(i); }, std::chrono::system_clock::now() + std::chrono::milliseconds(timeout));
tT.at(&t);
requests.push_back(t);
std::cout << "Request " << i << " Registered.... Time:" << std::chrono::system_clock::now().time_since_epoch().count() << std::endl;
}
I think I am missing something here when I pop the function out of the queue to execute, the function may be empty, Nothing is getting executed.
If i copy taskinfo to locally
void at(taskinfo** task){
taskinfo t = **task;
//Replace everything else with t function works fine But
//I need to modify the same reference
}
How can I work with pointer reference herein lambda?
I have added the complete code of what i am trying to do here.
Complete Code:
#include <functional>
#include <chrono>
#include <future>
#include <queue>
#include <thread>
#include <memory>
#include <sstream>
#include <assert.h>
#include <iostream>
#include <ctime>
#include <sys/time.h>
#include <unistd.h>
#include <limits.h>
#define TIMER_NO_TASK_SLEEP_TIME 100
struct taskinfo
{
long int id;
bool cancel;
std::function<void()> func;
std::chrono::system_clock::time_point time;
std::chrono::system_clock::duration interval;
taskinfo(){ }
bool operator<(const taskinfo& task) const {
return time > task.time;
}
public:
taskinfo(long int id, std::function<void()>&& f, const std::chrono::system_clock::time_point& t)
: id(id), func(f),
time(t)
{
cancel = false;
}
};
class TimerTask
{
private:
std::priority_queue<taskinfo, std::vector<taskinfo>> tasks;
std::unique_ptr<std::thread> thread;
bool keepRunning;
public:
TimerTask()
:keepRunning(true),
thread(new std::thread([this]() {
while(keepRunning)
{
auto now = std::chrono::system_clock::now();
while(!tasks.empty() && tasks.top().time <= now) {
if(!tasks.top().cancel)
{
tasks.top().func();
}
tasks.pop();
}
if(tasks.empty()) {
std::this_thread::sleep_for(std::chrono::milliseconds(TIMER_NO_TASK_SLEEP_TIME));
} else {
std::this_thread::sleep_for(tasks.top().time - std::chrono::system_clock::now());
}
}
})){ }
~TimerTask()
{
keepRunning = false;
thread->join();
}
//Execute a task when the timer times out
void at(taskinfo** task){
std::function<void()> threadFunc = [task]() { std::thread((*task)->func).detach(); };
(*task)->func = threadFunc;
tasks.push(**task);
}
//Cancel the particular task with a flag
void cancel(taskinfo** task){
(* task)->cancel = true;
}
};
//The return type of the task must be void
void timeoutFunc(int id)
{
std::cout << "Request " << id << " Timeout.... Executed Timeout Function Time:" << std::chrono::system_clock::now().time_since_epoch().count() << std::endl;
}
int main(int argc, char* argv[])
{
if(argc != 2)
{
std::cout << "\n Usage <Process> <Timeout>" << std::endl;
return 0;
}
int timeout = atoi(argv[1]);
TimerTask tT;
std::vector<taskinfo*> requests;
requests.reserve(1000);
for(int i=1; i <=5; i++ )
{
taskinfo* t = new taskinfo(i, [i]{ timeoutFunc(i); }, std::chrono::system_clock::now() + std::chrono::milliseconds(timeout));
tT.at(&t);
requests.push_back(t);
std::cout << "Request " << i << " Registered.... Time:" << std::chrono::system_clock::now().time_since_epoch().count() << std::endl;
}
while(1) sleep(60);
return 0;
}
You are passing a pointer to a pointer which no longer exists:
taskinfo* t = new taskinfo(i, [i]{ timeoutFunc(i); }, std::chrono::system_clock::now() + std::chrono::milliseconds(timeout));
tT.at(&t);
requests.push_back(t);
In the above code t is a local variable instantiated for each iteration through the loop. You get a new t every time.
The code tT.at(&t); gets the address of this temporary.
The fix is at the calling site call: tT.at(t);. notice how this is just like requests.push_back(t);
Also:
//Execute a task when the timer times out
void TimerTask::at(taskinfo* task){
std::function<void()> threadFunc = [task]() { std::thread(task->func).detach(); };
task->func = threadFunc;
...
}
This is a simple program which has a function start() which waits for user to enter something(using infinite loop) and stores it in queue. start() runs in a separate thread. After user enters some value, the size of queue remains zero in main. How can the queue be synchronized?
code: source.cpp
#include <iostream>
#include "kl.h"
using namespace std;
int main()
{
std::thread t1(start);
while (1)
{
if (q.size() > 0)
{
std::cout << "never gets inside this if\n";
std::string first = q.front();
q.pop();
}
}
t1.join();
}
code: kl.h
#include <queue>
#include <iostream>
#include <string>
void start();
static std::queue<std::string> q;
code: kl.cpp
#include "kl.h"
using namespace std;
void start()
{
char i;
string str;
while (1)
{
for (i = 0; i <= 1000; i++)
{
//other stuff and str input
q.push(str);
}
}
}
Your code contains a race - by me it crashed; both threads are potentially modifying a shared queue. (Also, you're looping with char i for values up to 1000 - not a good idea, probably.)
You should protect your shared queue with a std::mutex, and use a std::condition_variable to notify that there is a reason to check the queue.
Specifically, you should consider the following (which is very common for your case of a producer consumer):
Access the queue only when holding the mutex.
Use the condition variable to notify that you've pushed something into it.
Use the condition variable to specify a condition on when there's a point to continue processing.
Here is a rewrite of your code:
#include <iostream>
#include <queue>
#include <thread>
#include <condition_variable>
#include <mutex>
using namespace std;
std::queue<std::string> q;
std::mutex m;
std::condition_variable cv;
void start()
{
string str;
for (std::size_t i = 0; i <= 1000; i++) {
//other stuff and str input
std::cout << "here" << std::endl;
std::unique_lock<std::mutex> lk(m);
q.push(str);
lk.unlock();
cv.notify_one();
}
}
int main()
{
std::thread t1(start);
for (std::size_t i = 0; i <= 1000; i++)
{
std::unique_lock<std::mutex> lk(m);
cv.wait(lk, []{return !q.empty();});
std::string first = q.front();
q.pop();
}
t1.join();
}
My synced queue class example and its usage:
template<typename T>
class SyncQueue
{
std::queue<T> m_Que;
std::mutex m_Lock;
std::condition_variable m_ConVar;
public:
void enque(T item)
{
std::unique_lock<std::mutex> lock(m_Lock);
m_Que.push(item);
lock.unlock();
m_ConVar.notify_all();
}
T deque()
{
std::unique_lock<std::mutex> lock(m_Lock);
do
{
m_ConVar.wait(lock);
} while(m_Que.size() == 0); // extra check from spontaneous notifications
auto ret = m_Que.front();
m_Que.pop();
return ret;
}
};
int main()
{
using namespace std::chrono_literals;
SyncQueue<int> sq;
std::thread consumer([&sq]()
{
std::cout << "consumer" << std::endl;
for(;;)
{
std::cout << sq.deque() << std::endl;
}
});
std::thread provider([&sq]()
{
std::this_thread::sleep_for(1s);
sq.enque(1);
std::this_thread::sleep_for(3s);
sq.enque(2);
std::this_thread::sleep_for(5s);
sq.enque(3);
});
consumer.join();
return 0;
}
/* Here I have a code snippate with Separate class for
Producing and Consuming along with buffer class */
#include <iostream>
#include <mutex>
#include <condition_variable>
#include <thread>
#include <deque>
#include <vector>
using namespace std;
mutex _mutex_1,_mutex_2;
condition_variable cv;
template <typename T>
class Queue
{
deque<T> _buffer;
const unsigned int max_size = 10;
public:
Queue() = default;
void push(const T& item)
{
while(1)
{
unique_lock<mutex> locker(_mutex_1);
cv.wait(locker,[this](){ return _buffer.size() < max_size; });
_buffer.push_back(item);
locker.unlock();
cv.notify_all();
return;
}
}
T pop()
{
while(1)
{
unique_lock<mutex> locker(_mutex_1);
cv.wait(locker,[this](){ return _buffer.size() > 0; });
int back = _buffer.back();
_buffer.pop_back();
locker.unlock();
cv.notify_all();
return back;
}
}
};
class Producer
{
Queue<int>* _buffer;
public:
Producer(Queue<int>* _buf)
{
this->_buffer = _buf;
}
void run()
{
while(1)
{
auto num = rand()%100;
_buffer->push(num);
_mutex_2.lock();
cout<<"Produced:"<<num<<endl;
this_thread::sleep_for(std::chrono::milliseconds(50));
_mutex_2.unlock();
}
}
};
class Consumer
{
Queue<int>* _buffer;
public:
Consumer(Queue<int>* _buf)
{
this->_buffer = _buf;
}
void run()
{
while(1)
{
auto num = _buffer->pop();
_mutex_2.lock();
cout<<"Consumed:"<<num<<endl;
this_thread::sleep_for(chrono::milliseconds(50));
_mutex_2.unlock();
}
}
};
void client()
{
Queue<int> b;
Producer p(&b);
Consumer c(&b);
thread producer_thread(&Producer::run, &p);
thread consumer_thread(&Consumer::run, &c);
producer_thread.join();
consumer_thread.join();
}
int main()
{
client();
return 0;
}