Error when I push back a pair in C++

Error when I push back a pair in C++ - c++

I've been trying to compile my program which should push a string and a float pair back on a vector:
typedef std::pair<string, float> Prediction;
std::vector<Prediction> predictions;
for ( int i = 0 ; i < output.size(); i++ ) {
std::vector<int> maxN = Argmax(output[i], 1);
int idx = maxN[0];
predictions.push_back(std::make_pair(labels_[idx], output[idx]));
}
return predictions;
However, every time I try to compile this, I get this error:
error: no matching member function for call to 'push_back'
predictions.push_back(std::make_pair(labels_[idx], output[idx]));
I also get a few other warnings saying things like
candidate function not viable: no known conversion from 'pair<[...],
typename __make_pair_return >
&>::type>' to 'const pair<[...], float>' for 1st argument
_LIBCPP_INLINE_VISIBILITY void push_back(const_reference __x);
and
candidate function not viable: no known conversion from 'pair<[...],
typename __make_pair_return >
&>::type>' to 'pair<[...], float>' for 1st argument
_LIBCPP_INLINE_VISIBILITY void push_back(value_type&& __x);
I've been trying to rewrite things and modify my functions but I can't work out why this error remains, does anyone know what I can do to fix this?
Here is the code in context if that helps, the header file:
/**
* Classification System
*/
#ifndef __CLASSIFIER_H__
#define __CLASSIFIER_H__
#include <caffe/caffe.hpp>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <algorithm>
#include <iosfwd>
#include <memory>
#include <string>
#include <utility>
#include <vector>
using namespace caffe; // NOLINT(build/namespaces)
using std::string;
/* Pair (label, confidence) representing a prediction. */
typedef std::pair<string, float> Prediction;
class Classifier {
public:
Classifier(const string& model_file,
const string& trained_file,
const string& label_file);
std::vector< Prediction > Classify(const std::vector<cv::Mat>& img);
private:
std::vector< std::vector<float> > Predict(const std::vector<cv::Mat>& img, int nImages);
void WrapInputLayer(std::vector<cv::Mat>* input_channels, int nImages);
void Preprocess(const std::vector<cv::Mat>& img,
std::vector<cv::Mat>* input_channels, int nImages);
private:
shared_ptr<Net<float> > net_;
cv::Size input_geometry_;
int num_channels_;
std::vector<string> labels_;
};
#endif /* __CLASSIFIER_H__ */
Class File:
#define CPU_ONLY
#include "Classifier.h"
using namespace caffe; // NOLINT(build/namespaces)
using std::string;
Classifier::Classifier(const string& model_file,
const string& trained_file,
const string& label_file) {
#ifdef CPU_ONLY
Caffe::set_mode(Caffe::CPU);
#else
Caffe::set_mode(Caffe::GPU);
#endif
/* Load the network. */
net_.reset(new Net<float>(model_file, TEST));
net_->CopyTrainedLayersFrom(trained_file);
CHECK_EQ(net_->num_inputs(), 1) << "Network should have exactly one input.";
CHECK_EQ(net_->num_outputs(), 1) << "Network should have exactly one output.";
Blob<float>* input_layer = net_->input_blobs()[0];
num_channels_ = input_layer->channels();
CHECK(num_channels_ == 3 || num_channels_ == 1)
<< "Input layer should have 1 or 3 channels.";
input_geometry_ = cv::Size(input_layer->width(), input_layer->height());
/* Load labels. */
std::ifstream labels(label_file.c_str());
CHECK(labels) << "Unable to open labels file " << label_file;
string line;
while (std::getline(labels, line))
labels_.push_back(string(line));
Blob<float>* output_layer = net_->output_blobs()[0];
CHECK_EQ(labels_.size(), output_layer->channels())
<< "Number of labels is different from the output layer dimension.";
}
static bool PairCompare(const std::pair<float, int>& lhs,
const std::pair<float, int>& rhs) {
return lhs.first > rhs.first;
}
/* Return the indices of the top N values of vector v. */
static std::vector<int> Argmax(const std::vector<float>& v, int N) {
std::vector<std::pair<float, int> > pairs;
for (size_t i = 0; i < v.size(); ++i)
pairs.push_back(std::make_pair(v[i], i));
std::partial_sort(pairs.begin(), pairs.begin() + N, pairs.end(), PairCompare);
std::vector<int> result;
for (int i = 0; i < N; ++i)
result.push_back(pairs[i].second);
return result;
}
std::vector<Prediction> Classifier::Classify(const std::vector<cv::Mat>& img) {
std::vector< std::vector<float> > output = Predict(img, img.size());
std::vector<Prediction> predictions;
for ( int i = 0 ; i < output.size(); i++ ) {
std::vector<int> maxN = Argmax(output[i], 1);
int idx = maxN[0];
predictions.push_back(std::make_pair(labels_[idx], output[idx]));
}
return predictions;
}
std::vector< std::vector<float> > Classifier::Predict(const std::vector<cv::Mat>& img, int nImages) {
Blob<float>* input_layer = net_->input_blobs()[0];
input_layer->Reshape(nImages, num_channels_,
input_geometry_.height, input_geometry_.width);
/* Forward dimension change to all layers. */
net_->Reshape();
std::vector<cv::Mat> input_channels;
WrapInputLayer(&input_channels, nImages);
Preprocess(img, &input_channels, nImages);
net_->ForwardPrefilled();
/* Copy the output layer to a std::vector */
Blob<float>* output_layer = net_->output_blobs()[0];
std::vector <std::vector<float> > ret;
for (int i = 0; i < nImages; i++) {
const float* begin = output_layer->cpu_data() + i*output_layer->channels();
const float* end = begin + output_layer->channels();
ret.push_back( std::vector<float>(begin, end) );
}
return ret;
}
/* Wrap the input layer of the network in separate cv::Mat objects
* (one per channel). This way we save one memcpy operation and we
* don't need to rely on cudaMemcpy2D. The last preprocessing
* operation will write the separate channels directly to the input
* layer. */
void Classifier::WrapInputLayer(std::vector<cv::Mat>* input_channels, int nImages) {
Blob<float>* input_layer = net_->input_blobs()[0];
int width = input_layer->width();
int height = input_layer->height();
float* input_data = input_layer->mutable_cpu_data();
for (int i = 0; i < input_layer->channels()* nImages; ++i) {
cv::Mat channel(height, width, CV_32FC1, input_data);
input_channels->push_back(channel);
input_data += width * height;
}
}
void Classifier::Preprocess(const std::vector<cv::Mat>& img,
std::vector<cv::Mat>* input_channels, int nImages) {
for (int i = 0; i < nImages; i++) {
vector<cv::Mat> channels;
cv::split(img[i], channels);
for (int j = 0; j < channels.size(); j++){
channels[j].copyTo((*input_channels)[i*num_channels_[0]+j]);
}
}
}
Thanks so much!

typedef std::pair<string, float> Prediction;
std::vector<Prediction> predictions;
std::vector< std::vector<float> > output = Predict(img, img.size());
make_pair expects a string and a float. output[idx] gives a vector of floats. So you need output[i][idx] for only a float.

Related

How to use a constructor variable in the operator() function?

I'm trying to call a variable which is declared in the constructor in the operator() function. Variable is declared of type boost::multi_array<float, 2>. But still it throws the error:
error: no match for ‘operator /’
I guess boost library has these predefined operators! Can anyone see what I'm doing wrong here?
#ifndef CORRELATOR_CHARACTERISTIC_FUNCTION_HPP
#define CORRELATOR_CHARACTERISTIC_FUNCTION_HPP
#include <halmd/numeric/blas/fixed_vector.hpp>
#include <cmath>
#include <boost/multi_array.hpp>
#include "read_box.hpp"
namespace correlator {
class Characteristic_function
{
public:
typedef std::shared_ptr<boost::multi_array<float, 2>> sample_type;
typedef halmd::fixed_vector<double, 3> result_type;
using k_type = boost::multi_array<float, 2>;
Characteristic_function()
{
// using array_2d_t = boost::multi_array<float, 2>;
read_box read_box_file;
// auto b = read_box_file.open_dataset("file.h5");
k_type frame_b = read_box_file.read_frame(1);
auto w = frame_b[0][0];
}
result_type operator()(sample_type const &first, sample_type const &second) const
{
result_type c_func = 0;
size_t N = first->size();
N = std::min(100UL, N);
Characteristic_function w;
// k_type Characteristic_function wave;
// std::cout << "First wave vector: " << wave[0][1] << std::endl;
double k = 2 * M_PI/w;
for (unsigned int i = 0; i < N; ++i) {
for (unsigned int j = 0; j <= 0; ++j) {
double dr = (*first)[i][j] - (*second)[i][j];
c_func[j] = exp(k*dr);
}
}
return c_func / static_cast<double>(N);
}
};
}
#endif /* ! CORRELATOR_CHARACTERISTIC_FUNCTION_HPP */
w just a float number and I want to use this number in the operator() function.

You can do something like this:
/* Characteristic function */
#ifndef CORRELATOR_CHARACTERISTIC_FUNCTION_HPP
#define CORRELATOR_CHARACTERISTIC_FUNCTION_HPP
#include <halmd/numeric/blas/fixed_vector.hpp>
#include <cmath>
#include <boost/multi_array.hpp>
#include <complex>
#include "read_box.hpp"
namespace correlator {
class Characteristic_function
{
private:
double w;
public:
typedef std::shared_ptr<boost::multi_array<float, 2>> sample_type;
typedef halmd::fixed_vector<double, 3> result_type;
// using k_type = boost::multi_array<float, 2>;
typedef boost::multi_array<float, 2> k_type;
Characteristic_function()
{
read_box read_box_file;
k_type frame_b = read_box_file.read_frame(1);
w = frame_b[0][0];
}
result_type operator()(sample_type const &first, sample_type const &second) const
{
result_type c_func = 0;
size_t N = first->size();
N = std::min(100000UL, N);
double k = 2 * M_PI / w;
for (unsigned int i = 0; i < N; ++i) {
for (unsigned int j = 0; j <= 0; ++j) {
double dr = exp( k*((*first)[i][j] - (*second)[i][j]) );
c_func[j] = dr;
}
}
return c_func / static_cast<double>(N);
}
};
}
#endif /* ! CORRELATOR_CHARACTERISTIC_FUNCTION_HPP */
It will automatically reads the value of w.

Nanoflann findNeighbors raise segfault

So I have the following class KdTree:
#include <nanoflann.hpp>
#include <ctime>
#include <cstdlib>
#include <iostream>
#include <memory>
#include <cstdlib>
#include <iostream>
struct PointCloud
{
struct Point
{
double x,y,z;
};
std::vector<Point> pts;
inline size_t kdtree_get_point_count() const { return pts.size(); }
inline double kdtree_get_pt(const size_t idx, const size_t dim) const
{
if (dim == 0) return pts[idx].x;
else if (dim == 1) return pts[idx].y;
else if (dim == 2) return pts[idx].z;
}
template <class BBOX>
bool kdtree_get_bbox(BBOX& /* bb */) const { return false; }
};
void generatePointCloud(PointCloud &point, const std::vector<std::vector<double>>& given)
{
point.pts.resize(given.size());
for (size_t i = 0; i < given.size(); ++i) {
point.pts[i].x = given[i][0];
point.pts[i].y = given[i][1];
point.pts[i].z = given[i][2];
}
}
using namespace nanoflann;
using Points = std::vector<std::vector<double>>;
class KdTree {
public:
KdTree(const Points& points) {
PointCloud cloud;
generatePointCloud(cloud, points); // just puts points into cloud format
index = std::make_shared<my_kd_tree>(3 /*dim*/, cloud, KDTreeSingleIndexAdaptorParams(10 /* max leaf */) );
index->buildIndex();
}
size_t GetNearest(const std::vector<double>& pt) const {
double query_pt[3] = { pt[0], pt[1], pt[2] };
const size_t num_results = 1;
size_t ret_index;
double out_dist_sqr;
nanoflann::KNNResultSet<double > resultSet(num_results);
resultSet.init(&ret_index, &out_dist_sqr );
index->findNeighbors(resultSet, &query_pt[0], nanoflann::SearchParams(10));
std::cout << "knnSearch(nn="<<num_results<<"): \n";
std::cout << "ret_index=" << ret_index << " out_dist_sqr=" << out_dist_sqr << endl;
return ret_index;
}
typedef KDTreeSingleIndexAdaptor<
L2_Simple_Adaptor<double , PointCloud > ,
PointCloud,
3 /* dim */
> my_kd_tree;
std::shared_ptr<my_kd_tree> index = nullptr;
};
The problem is that the following code raises segfault:
int main()
{
srand(static_cast<unsigned int>(time(nullptr)));
Points points{{1, 5, 8}, {3, 3, 3}, {1, 1, 0}};
KdTree tree(points);
tree.GetNearest({1, 1, 1});
return 0;
}
But if I put GetNearest code in constructor (so that I construct index and find pt's neighbor in constructor itself), or just write constructor and GetNearest code in main, then everything works just fine.
I am new to nanoflann, can't figure what's wrong. Thanks for the help in advance!

I had to dig into the source nanoflann.hpp to find that the second parameter to the constructor for KDTreeSingleIndexAdaptor (cloud in your KdTree constructor) is passed by reference and stored as a reference. This means that the cloud data you pass to nanoflann needs to stay around until you delete the KDTreeSingleIndexAdaptor object.
Because you declare PointCloud cloud as a local variable within your KdTree constructor, it will be destroyed when the constructor ends leaving the kdtree pointed to by index with an dangling internal reference.
One solution is to make cloud a member of KdTree.

Efficiently process each unique permutation of a vector when number of unique elements in vector is much smaller than vector size

In a program I need to apply a function in parallel to each unique permutation of a vector. The size of the vector is around N=15
I already have a function void parallel_for_each_permutation which I can use in combination with a std::set to only process each unique permutation exactly once.
This all works well for the general case. However, in my use case the number of unique elements k per vector is very limited, usually around k=4. This means that I'm currently wasting time constructing the same unique permutation over and over again, just to throw it away because it has already been processed.
Is it possible to process all unique permutations in this special case, without constructing all N! permutations?
Example use-case:
#include <algorithm>
#include <thread>
#include <vector>
#include <mutex>
#include <numeric>
#include <set>
#include <iostream>
template<class Container1, class Container2>
struct Comp{
//compare element-wise less than
bool operator()(const Container1& l, const Container2& r) const{
auto pair = std::mismatch(l.begin(), l.end(), r.begin());
if(pair.first == l.end() && pair.second == r.end())
return false;
return *(pair.first) < *(pair.second);
}
};
template<class Container, class Func>
void parallel_for_each_permutation(const Container& container, int num_threads, Func func){
auto ithPermutation = [](int n, size_t i) -> std::vector<size_t>{
// https://stackoverflow.com/questions/7918806/finding-n-th-permutation-without-computing-others
std::vector<size_t> fact(n);
std::vector<size_t> perm(n);
fact[0] = 1;
for(int k = 1; k < n; k++)
fact[k] = fact[k-1] * k;
for(int k = 0; k < n; k++){
perm[k] = i / fact[n-1-k];
i = i % fact[n-1-k];
}
for(int k = n-1; k > 0; k--){
for(int j = k-1; j >= 0; j--){
if(perm[j] <= perm[k])
perm[k]++;
}
}
return perm;
};
size_t totalNumPermutations = 1;
for(size_t i = 1; i <= container.size(); i++)
totalNumPermutations *= i;
std::vector<std::thread> threads;
for(int threadId = 0; threadId < num_threads; threadId++){
threads.emplace_back([&, threadId](){
const size_t firstPerm = size_t(float(threadId) * totalNumPermutations / num_threads);
const size_t last_excl = std::min(totalNumPermutations, size_t(float(threadId+1) * totalNumPermutations / num_threads));
Container permutation(container);
auto permIndices = ithPermutation(container.size(), firstPerm);
size_t count = firstPerm;
do{
for(int i = 0; i < int(permIndices.size()); i++){
permutation[i] = container[permIndices[i]];
}
func(threadId, permutation);
std::next_permutation(permIndices.begin(), permIndices.end());
++count;
}while(count < last_excl);
});
}
for(auto& thread : threads)
thread.join();
}
template<class Container, class Func>
void parallel_for_each_unique_permutation(const Container& container, Func func){
using Comparator = Comp<Container, Container>;
constexpr int numThreads = 4;
std::set<Container, Comparator> uniqueProcessedPermutations(Comparator{});
std::mutex m;
parallel_for_each_permutation(
container,
numThreads,
[&](int threadId, const auto& permutation){
{
std::lock_guard<std::mutex> lg(m);
if(uniqueProcessedPermutations.count(permutation) > 0){
return;
}else{
uniqueProcessedPermutations.insert(permutation);
}
}
func(permutation);
}
);
}
int main(){
std::vector<int> vector1{1,1,1,1,2,3,2,2,3,3,1};
auto func = [](const auto& vec){return;};
parallel_for_each_unique_permutation(vector1, func);
}

The permutations you have to work with are known in the field of combinatorics as multiset permutations.
They are described for example on The Combinatorial Object Server
with more detailed explanations in this paper by professor Tadao Takaoka.
You have some related Python code and some C++ code in the FXT open source library.
You might consider adding the "multiset" and "combinatorics" tags to your question.
One possibility is to borrow the (header-only) algorithmic code from the FXT library, which provides a simple generator class for those multiset permutations.
Performance level:
Using the FXT algorithm on a test vector of 15 objects, {1,1,1, 2,2,2, 3,3,3,3, 4,4,4,4,4}, one can generate all associated 12,612,600 "permutations" in less than 2 seconds on a plain vanilla Intel x86-64 machine; this is without diagnostics text I/O and without any attempt at optimization.
The algorithm generates exactly those "permutations" that are required, nothing more. So there is no longer a need to generate all 15! "raw" permutations nor to use mutual exclusion to update a shared data structure for filtering purposes.
An adaptor class for generating the permutations:
I will try below to provide code for an adaptor class, which allows your application to use the FXT algorithm while containing the dependency into a single implementation file. That way, the code will hopefully fit better into your application. Think FXT's ulong type and use of raw pointers, versus std::vector<std::size_t> in your code. Besides, FXT is a very extensive library.
Header file for the "adaptor" class:
// File: MSetPermGen.h
#ifndef MSET_PERM_GEN_H
#define MSET_PERM_GEN_H
#include <iostream>
#include <vector>
class MSetPermGenImpl; // from algorithmic backend
using IntVec = std::vector<int>;
using SizeVec = std::vector<std::size_t>;
// Generator class for multiset permutations:
class MSetPermGen {
public:
MSetPermGen(const IntVec& vec);
std::size_t getCycleLength() const;
bool forward(size_t incr);
bool next();
const SizeVec& getPermIndices() const;
const IntVec& getItems() const;
const IntVec& getItemValues() const;
private:
std::size_t cycleLength_;
MSetPermGenImpl* genImpl_; // implementation generator
IntVec itemValues_; // only once each
IntVec items_; // copy of ctor argument
SizeVec freqs_; // repetition counts
SizeVec state_; // array of indices in 0..n-1
};
#endif
The class constructor takes exactly the argument type provided in your main program. Of course, the key method is next(). You can also move the automaton by several steps at once using the forward(incr)method.
Example client program:
// File: test_main.cpp
#include <cassert>
#include "MSetPermGen.h"
using std::cout;
using std::cerr;
using std::endl;
// utility functions:
std::vector<int> getMSPermutation(const MSetPermGen& mspg)
{
std::vector<int> res;
auto indices = mspg.getPermIndices(); // always between 0 and n-1
auto values = mspg.getItemValues(); // whatever the user put in
std::size_t n = indices.size();
assert( n == items.size() );
res.reserve(n);
for (std::size_t i=0; i < n; i++) {
auto xi = indices[i];
res.push_back(values[xi]);
}
return res;
}
void printPermutation(const std::vector<int>& p, std::ostream& fh)
{
std::size_t n = p.size();
for (size_t i=0; i < n; i++)
fh << p[i] << " ";
fh << '\n';
}
int main(int argc, const char* argv[])
{
std::vector<int> vec0{1,1, 2,2,2}; // N=5
std::vector<int> vec1{1,1, 1,1, 2, 3, 2,2, 3,3, 1}; // N=11
std::vector<int> vec2{1,1,1, 2,2,2, 3,3,3,3, 4,4,4,4,4}; // N=15
MSetPermGen pg0{vec0};
MSetPermGen pg1{vec1};
MSetPermGen pg2{vec2};
auto pg = &pg0; // choice of 0, 1, 2 for sizing
auto cl = pg->getCycleLength();
auto permA = getMSPermutation(*pg);
printPermutation(permA, cout);
for (std::size_t pi=0; pi < (cl-1); pi++) {
pg->next();
auto permB = getMSPermutation(*pg);
printPermutation(permB, cout);
}
return EXIT_SUCCESS;
}
Text output from the above small program:
1 1 2 2 2
1 2 1 2 2
1 2 2 1 2
1 2 2 2 1
2 1 1 2 2
2 1 2 1 2
2 1 2 2 1
2 2 1 1 2
2 2 1 2 1
2 2 2 1 1
You get only 10 items from vector {1,1, 2,2,2}, because 5! / (2! * 3!) = 120/(2*6) = 10.
The implementation file for the adaptor class, MSetPermGen.cpp, consists of two parts. The first part is FXT code with minimal adaptations. The second part is the MSetPermGen class proper.
First part of implementation file:
// File: MSetPermGen.cpp - part 1 of 2 - FXT code
// -------------- Beginning of header-only FXT combinatorics code -----------
// This file is part of the FXT library.
// Copyright (C) 2010, 2012, 2014 Joerg Arndt
// License: GNU General Public License version 3 or later,
// see the file COPYING.txt in the main directory.
//-- https://www.jjj.de/fxt/
//-- https://fossies.org/dox/fxt-2018.07.03/mset-perm-lex_8h_source.html
#include <cstddef>
using ulong = std::size_t;
inline void swap2(ulong& xa, ulong& xb)
{
ulong save_xb = xb;
xb = xa;
xa = save_xb;
}
class mset_perm_lex
// Multiset permutations in lexicographic order, iterative algorithm.
{
public:
ulong k_; // number of different sorts of objects
ulong *r_; // number of elements '0' in r[0], '1' in r[1], ..., 'k-1' in r[k-1]
ulong n_; // number of objects
ulong *ms_; // multiset data in ms[0], ..., ms[n-1], sentinels at [-1] and [-2]
private: // have pointer data
mset_perm_lex(const mset_perm_lex&); // forbidden
mset_perm_lex & operator = (const mset_perm_lex&); // forbidden
public:
explicit mset_perm_lex(const ulong *r, ulong k)
{
k_ = k;
r_ = new ulong[k];
for (ulong j=0; j<k_; ++j) r_[j] = r[j]; // get buckets
n_ = 0;
for (ulong j=0; j<k_; ++j) n_ += r_[j];
ms_ = new ulong[n_+2];
ms_[0] = 0; ms_[1] = 1; // sentinels: ms[0] < ms[1]
ms_ += 2; // nota bene
first();
}
void first()
{
for (ulong j=0, i=0; j<k_; ++j)
for (ulong h=r_[j]; h!=0; --h, ++i)
ms_[i] = j;
}
~mset_perm_lex()
{
ms_ -= 2;
delete [] ms_;
delete [] r_;
}
const ulong * data() const { return ms_; }
ulong next()
// Return position of leftmost change,
// return n with last permutation.
{
// find rightmost pair with ms[i] < ms[i+1]:
const ulong n1 = n_ - 1;
ulong i = n1;
do { --i; } while ( ms_[i] >= ms_[i+1] ); // can read sentinel
if ( (long)i < 0 ) return n_; // last sequence is falling seq.
// find rightmost element ms[j] less than ms[i]:
ulong j = n1;
while ( ms_[i] >= ms_[j] ) { --j; }
swap2(ms_[i], ms_[j]);
// Here the elements ms[i+1], ..., ms[n-1] are a falling sequence.
// Reverse order to the right:
ulong r = n1;
ulong s = i + 1;
while ( r > s ) { swap2(ms_[r], ms_[s]); --r; ++s; }
return i;
}
};
// -------------- End of header-only FXT combinatorics code -----------
Second part of the class implementation file:
// Second part of file MSetPermGen.cpp: non-FXT code
#include <cassert>
#include <tuple>
#include <map>
#include <iostream>
#include <cstdio>
#include "MSetPermGen.h"
using std::cout;
using std::cerr;
using std::endl;
class MSetPermGenImpl { // wrapper class
public:
MSetPermGenImpl(const SizeVec& freqs) : fg(freqs.data(), freqs.size())
{}
private:
mset_perm_lex fg;
friend class MSetPermGen;
};
static std::size_t fact(size_t n)
{
std::size_t f = 1;
for (std::size_t i = 1; i <= n; i++)
f = f*i;
return f;
}
MSetPermGen::MSetPermGen(const IntVec& vec) : items_(vec)
{
std::map<int,int> ma;
for (int i: vec) {
ma[i]++;
}
int item, freq;
for (const auto& p : ma) {
std::tie(item, freq) = p;
itemValues_.push_back(item);
freqs_.push_back(freq);
}
cycleLength_ = fact(items_.size());
for (auto i: freqs_)
cycleLength_ /= fact(i);
// create FXT-level generator:
genImpl_ = new MSetPermGenImpl(freqs_);
for (std::size_t i=0; i < items_.size(); i++)
state_.push_back(genImpl_->fg.ms_[i]);
}
std::size_t MSetPermGen::getCycleLength() const
{
return cycleLength_;
}
bool MSetPermGen::forward(size_t incr)
{
std::size_t n = items_.size();
std::size_t rc = 0;
// move forward state by brute force, could be improved:
for (std::size_t i=0; i < incr; i++)
rc = genImpl_->fg.next();
for (std::size_t j=0; j < n; j++)
state_[j] = genImpl_->fg.ms_[j];
return (rc != n);
}
bool MSetPermGen::next()
{
return forward(1);
}
const SizeVec& MSetPermGen::getPermIndices() const
{
return (this->state_);
}
const IntVec& MSetPermGen::getItems() const
{
return (this->items_);
}
const IntVec& MSetPermGen::getItemValues() const
{
return (this->itemValues_);
}
Adapting the parallel application:
Regarding your multithreaded application, given that generating the "permutations" is cheap, you can afford to create one generator object per thread.
Before launching the actual computation, you forward each generator to its appropriate initial position, that is at step thread_id * (cycleLength / num_threads).
I have tried to adapt your code to this MSetPermGen class along these lines. See code below.
With 3 threads, an input vector {1,1,1, 2,2,2, 3,3,3,3, 4,4,4,4,4} of size 15 (giving 12,612,600 permutations) and all diagnostics enabled, your modified parallel program runs in less than 10 seconds; less than 2 seconds with all diagnostics switched off.
Modified parallel program:
#include <algorithm>
#include <thread>
#include <vector>
#include <atomic>
#include <mutex>
#include <numeric>
#include <set>
#include <iostream>
#include <fstream>
#include <sstream>
#include <cstdlib>
#include "MSetPermGen.h"
using std::cout;
using std::endl;
// debug and instrumentation:
static std::atomic<size_t> permCounter;
static bool doManagePermCounter = true;
static bool doThreadLogfiles = true;
static bool doLogfileHeaders = true;
template<class Container, class Func>
void parallel_for_each_permutation(const Container& container, int numThreads, Func mfunc) {
MSetPermGen gen0(container);
std::size_t totalNumPermutations = gen0.getCycleLength();
std::size_t permShare = totalNumPermutations / numThreads;
if ((totalNumPermutations % numThreads) != 0)
permShare++;
std::cout << "totalNumPermutations: " << totalNumPermutations << std::endl;
std::vector<std::thread> threads;
for (int threadId = 0; threadId < numThreads; threadId++) {
threads.emplace_back([&, threadId]() {
// generate some per-thread logfile name
std::ostringstream fnss;
fnss << "thrlog_" << threadId << ".txt";
std::string fileName = fnss.str();
std::ofstream fh(fileName);
MSetPermGen thrGen(container);
const std::size_t firstPerm = permShare * threadId;
thrGen.forward(firstPerm);
const std::size_t last_excl = std::min(totalNumPermutations,
(threadId+1) * permShare);
if (doLogfileHeaders) {
fh << "MSG threadId: " << threadId << '\n';
fh << "MSG firstPerm: " << firstPerm << '\n';
fh << "MSG lastExcl : " << last_excl << '\n';
}
Container permutation(container);
auto values = thrGen.getItemValues();
auto permIndices = thrGen.getPermIndices();
auto nsz = permIndices.size();
std::size_t count = firstPerm;
do {
for (std::size_t i = 0; i < nsz; i++) {
permutation[i] = values[permIndices[i]];
}
mfunc(threadId, permutation);
if (doThreadLogfiles) {
for (std::size_t i = 0; i < nsz; i++)
fh << permutation[i] << ' ';
fh << '\n';
}
thrGen.next();
permIndices = thrGen.getPermIndices();
++count;
if (doManagePermCounter) {
permCounter++;
}
} while (count < last_excl);
fh.close();
});
}
for(auto& thread : threads)
thread.join();
}
template<class Container, class Func>
void parallel_for_each_unique_permutation(const Container& container, Func func) {
constexpr int numThreads = 3;
parallel_for_each_permutation(
container,
numThreads,
[&](int threadId, const auto& permutation){
// no longer need any mutual exclusion
func(permutation);
}
);
}
int main()
{
std::vector<int> vector1{1,1,1,1,2,3,2,2,3,3,1}; // N=11
std::vector<int> vector0{1,1, 2,2,2}; // N=5
std::vector<int> vector2{1,1,1, 2,2,2, 3,3,3,3, 4,4,4,4,4}; // N=15
auto func = [](const auto& vec) { return; };
permCounter.store(0);
parallel_for_each_unique_permutation(vector2, func);
auto finalPermCounter = permCounter.load();
cout << "FinalPermCounter = " << finalPermCounter << endl;
}

#define function with variable name

I am continually writing something akin to
std::vector< std::vector< double > > A(N, std::vector< double >(M));
and I would like to replace this with something like
matrix A(N,M);
by using a #define directive. I've looked at #define directives and think I can create a function like matrix(A,N,M) that would declare a vector of vectors as follows:
#define matrix(A, N, M) std::vector< std::vector< double > > A(N, std::vector< double >(M))
but I would rather not declare my matrices as matrix(A,N,M), but rather matrix A(N,M). My question is - how do I use the #define directives to account for changing a variable name?

You can use typedef and define type, something like that:
#include <vector>
using namespace std;
int main()
{
int N = 10;
typedef std::vector< std::vector<double> matrix;
matrix A(N, std::vector< double >(N));
return 0;
}
or more safety (if you don't know, that matrix will be right)
int main()
{
int N = 10;
typedef std::vector< std::array<double, 5> > matrix;
matrix A(N, std::array< double , 5 >());
return 0;
}
my wrapper for matrix with vectors
#include <iostream>
#include <vector>
#include <exception>
#include <algorithm>
template< typename T >
class WrapperMatrix
{
public:
WrapperMatrix(const int& weight, const int& length);
void pushLine(const std::vector<T>&&);
void pushColumn(const std::vector<T>&&);
void display();
private:
std::vector<std::vector<T>> matrix;
};
template<typename T>
WrapperMatrix<T>::WrapperMatrix(const int& weight, const int& length)
{
this->matrix = std::vector<std::vector<T>>(weight, std::vector<T>(length));
}
template <typename T>
void WrapperMatrix<T>::pushLine(const std::vector<T>&& newLine)
{
if (newLine.size() == this->matrix.at(0).size())
matrix.emplace_back(std::move(newLine));
else
throw std::invalid_argument("Invalis syntax");
}
template <typename T>
void WrapperMatrix<T>::pushColumn(const std::vector<T>&& newColumn)
{
if (newColumn.size() == this->matrix.size())
{
for (int i = 0; i < matrix.size(); ++i)
matrix.at(i).emplace_back(std::move(newColumn.at(i)));
}
else
throw std::invalid_argument("Invalid syntax");
}
template<typename T>
void WrapperMatrix<T>::display()
{
for (int i = 0; i < matrix.size(); ++i)
{
for (int j = 0; j < matrix.at(0).size(); ++j)
std::cout << matrix.at(i).at(j);
std::cout << std::endl;
}
}
int main()
{
std::vector<int> v1{ 1,2,3,4,5 };
std::vector<int> v2{ 1,2,3,4,5,6 };
std::vector<int> v3{ 2,3,4,5,6 };
WrapperMatrix<int> vw(5,5);
try {
vw.pushLine(std::move(v1));
vw.pushColumn(std::move(v2));
//vw.pushLine(std::move(v3));
}
catch (const std::exception& e)
{
std::cout << e.what() << std::endl;
}
vw.display();
return 0;
}

Alternative answer to typedef
using matrix = std::vector< std::vector<double>>;
This form can be more readable, especially with function and array types. E.g. using arr10 = Foo[10] is clearer than typedef Foo arra10[10]. The = sign clearly separates what's being defined and how it's defined.
(Ignoring the whole "matrix is not a vector of vectors" discussion)

Slow performance using STL in NEH algorithm

I've got problem with NEH algorithm. I implemented it but it's veeeery slow and I can't figure out what the problem is. Could You verify what I did wrong, what causes bad performance? Here's my code:
#include <iostream>
#include <map>
#include <sstream>
#include <limits>
#include <vector>
#include <fstream>
#include <list>
#include <algorithm>
#include <numeric>
#include <queue>
#include <memory>
template <typename T>
class Matrix
{
public:
Matrix(unsigned int rowsNum, unsigned int columnsNum) :m_rows(rowsNum), m_columns(columnsNum){ resize(rowsNum, columnsNum); };
T& operator()(unsigned int r, unsigned int c) { return m_data[r*columns() + c]; }
const T& operator()(unsigned int r, unsigned int c) const { return m_data[r*columns() + c]; }
unsigned int rows() const { return m_rows; }
unsigned int columns() const { return m_columns; }
Matrix<T> operator+(const Matrix<T>& matrix) const;
private:
void resize(unsigned int rowsNum, unsigned int colsNum);
std::unique_ptr<T[]> m_data;
unsigned int m_rows;
unsigned int m_columns;
};
template <typename T>
Matrix<T> Matrix<T>::operator+(const Matrix& matrix) const
{
}
template <typename T>
void Matrix<T>::resize(unsigned int rowsNum, unsigned int colsNum)
{
m_rows = rowsNum;
m_columns = colsNum;
m_data.reset(new T[rowsNum*colsNum]);
}
class Task
{
public:
Task(unsigned int ordNum = 0) :m_ordNum(ordNum) { }
unsigned int readFromFile(std::istream& is);
unsigned int totalTasksTime() const;
unsigned int ordNum() const { return m_ordNum; }
unsigned int machinesNum() const { return m_params.size(); }
unsigned int machineTaskTime(unsigned int machineNum) const { return m_params[machineNum - 1]; }
protected:
std::vector<unsigned int> m_params;
unsigned int m_ordNum;
unsigned int m_totalTasksTime;
};
unsigned int Task::totalTasksTime() const
{
return m_totalTasksTime;
}
unsigned int Task::readFromFile(std::istream& is)
{
std::string line;
while (is.peek() == '\n') // omija puste linie
is.get();
std::getline(is, line);
m_params.clear();
std::istringstream istr(line);
unsigned int i = 1;
while (istr)
{
int taskNum, taskTime;
istr >> taskNum >> taskTime;
if (istr)
{
if (i == m_params.size() + 1)
m_params.push_back(taskTime);
else
throw "Zly numer indesku maszyny - musza być po kolei";
i++;
}
}
m_totalTasksTime = std::accumulate(m_params.begin(), m_params.end(), 0);
return m_params.size();
}
class Instance
{
public:
Instance() { }
Instance(const std::string& name) :m_name(name) { }
void readFromFile(std::istream& is);
const std::string& name() const { return m_name; }
void name(std::string& newName) { m_name = newName; }
void neh(std::list<unsigned int>& permutation, unsigned int &totalTime) const;
const Task* getTask(unsigned int taskNum) const { return &m_tasks[taskNum]; }
private:
unsigned int calculateTotalTime(const std::list<unsigned int>& permutationList, unsigned int bestTimeFound) const;
std::vector<Task> m_tasks;
std::string m_name;
};
typedef std::map<unsigned int, unsigned int> MapIterator;
typedef std::vector<Task>::const_iterator TaskVecIterator;
bool compareTasksPtrBySumTime(const Task* t1, const Task* t2)
{
unsigned int t1TotalTime = t1->totalTasksTime(), t2TotalTime = t2->totalTasksTime();
bool w1 = t1TotalTime < t2TotalTime, w2 = t1TotalTime == t2TotalTime && t1->ordNum() > t2->ordNum();
return w1 || w2;
}
void Instance::neh(std::list<unsigned int>& permutation, unsigned int &totalTime) const
{
// sortowanie zadań po całkowitym czasie wykonania
std::list<const Task*> sortedTaskList;
for (unsigned int i = 0; i < m_tasks.size(); i++)
sortedTaskList.push_back(&m_tasks[i]);
sortedTaskList.sort(compareTasksPtrBySumTime);
while (!sortedTaskList.empty()) //
{
const Task* taskPtr = sortedTaskList.back(); sortedTaskList.pop_back();
unsigned int taskNum = taskPtr->ordNum();
std::list<unsigned int>::iterator bestPosition = permutation.begin();
unsigned int bestTotalTime = std::numeric_limits<unsigned int>::max();
permutation.push_front(taskNum);
for (std::list<unsigned int>::iterator it = permutation.begin(); // szukanie najlepszej pozycji
it != permutation.end(); it++)
{
unsigned int currentTotalTime = calculateTotalTime(permutation, bestTotalTime);
if (bestTotalTime > currentTotalTime)
{
bestTotalTime = currentTotalTime;
bestPosition = it;
}
auto nextIt = it; nextIt++;
if (nextIt != permutation.end())
std::swap(*it, *nextIt);
}
totalTime = bestTotalTime;
permutation.insert(bestPosition, taskNum);
permutation.pop_back();
}
std::cout << "Ukonczono:" << name() << std::endl;
}
unsigned int Instance::calculateTotalTime(const std::list<unsigned int>& permutationList, unsigned int bestTimeFound) const
{
unsigned int rows = m_tasks[*permutationList.begin() - 1].machinesNum() + 1, columns = permutationList.size() + 1;
Matrix<unsigned int> matrix(rows, columns);
unsigned int totalTime = 0;
for (unsigned int c = 0; c < columns; c++)
matrix(0, c) = 0;
for (unsigned int r = 0; r < rows; r++)
matrix(r, 0) = 0;
std::list<unsigned int>::const_iterator it = permutationList.begin();
for (unsigned int c = 1; c < columns; c++)
{
unsigned int taskNum = *it;
for (unsigned int r = 1; r < rows; r++)
(matrix(r, c) = std::max(matrix(r, c - 1), matrix(r - 1, c)) + m_tasks[taskNum - 1].machineTaskTime(r));// >bestTimeFound;
// return std::numeric_limits<unsigned int>::max();
it++;
}
return matrix(rows - 1, columns - 1);
}
void Instance::readFromFile(std::istream& is)
{
int taskNum, machineNum;
is >> taskNum >> machineNum;
for (int i = 0; i < taskNum; i++)
{
Task task(i + 1);
task.readFromFile(is);
if (is)
m_tasks.push_back(task);
}
}
class InstanceVector
{
public:
void readFromFile(std::istream& is);
void neh(std::list< std::list<unsigned int> >& result) const;
void neh(std::ostream& os) const;
private:
std::vector<Instance> m_instances;
};
void InstanceVector::readFromFile(std::istream& is)
{
while (is)
{
std::string name;
is >> name;
Instance instance(name);
instance.readFromFile(is);
std::list<unsigned int> l;
unsigned int totalTime;
if (is)
m_instances.push_back(instance);
}
}
void InstanceVector::neh(std::list< std::list<unsigned int> >& results) const
{
std::vector<Instance>::const_iterator it;
for (it = m_instances.begin(); it != m_instances.end(); it++)
{
std::list<unsigned int> resultInstance;
unsigned int totalTimeInstance;
it->neh(resultInstance, totalTimeInstance);
results.push_back(resultInstance);
}
}
void InstanceVector::neh(std::ostream& os) const
{
std::list< std::list<unsigned int> > results;
for (std::vector<Instance>::const_iterator it = m_instances.begin();
it != m_instances.end(); it++)
{
std::list<unsigned int> resultInstance;
unsigned int totalTimeInstance;
it->neh(resultInstance, totalTimeInstance);
results.push_back(std::move(resultInstance));
}
for (std::list< std::list<unsigned int> >::const_iterator it = results.begin();
it != results.end(); it++)
{
for (std::list<unsigned int>::const_iterator itPermutation = it->begin(); itPermutation != it->end(); itPermutation++)
os << *itPermutation << " ";
os << std::endl;
}
}
int main(int argc, char* argv[])
{
InstanceVector instanceVec;
std::ifstream input("bench_fs.txt");
if (input.is_open())
instanceVec.readFromFile(input);
std::ofstream output("output.txt");
instanceVec.neh(output);
std::cin.get();
}

The time taken by your program dependent both on the code and the data. Without the data we can only guess at why your program is taking more time than you expected. I will hazard some guesses as to where time is being lost but first I will strongly encourage you to try measuring how your program is spending its time. On Linux you can use perf to measure where your program is spending its time but there are plenty of tools to chose from. This will help in two ways. First, the following guesses may be completely wrong. Second, you will know how to find the performance problems in your next program.
In the absence of any hard information my gut is wary of two elements of your program.
You are storing your permutation as a std::list and I suspect that traversal and modification of this list is taking more time than you are saving by having an efficient push_front(). Try switching to std::vector. You can avoid push_front() by doing a push_back() and reversing the direction you move the new task through the permutation and keeping a best permutation rather than best position.
Inside calculateTotalTime() the matrix object is allocating and deallocating memory each time. This is potentially adding up to a significant amount of time. You could pass the matrix as an argument to calculateTotalTime() and store the matrix object at a level where it will be allocated/deallocated far less frequently. You probably need to do as #PaulMcKenzie suggests and use std::vector in your matrix class, rather than unique_ptr, so that you can reserve capacity ahead of time.
Hope that helps.

We Keep Coding

c++ django amazon-web-services regex python-2.7 google-cloud-platform list unit-testing opengl ember.js