C++ Find and save duplicates in vector

C++ Find and save duplicates in vector - c++

I have a custom vector of my user defined type vector
First vector gets filled with elements through stdin, then i sort it and try to find duplicates in it and save them
i've managed to find all unique elements, but i need to find and get a vector of duplicates
I need a hint or a simple solution for this problem
here's my code below:
Agressor.h
#ifndef Agressor_h
#define Agressor_h
#include <string>
#include <vector>
using namespace std;
class Agressor{
public:
/*const char**/ string traderIdentifier;
/*const char**/ string side;
int quantity;
int price;
vector<Agressor> v;
void display(){
cout << traderIdentifier << " " << side << " " << quantity << " " << price << endl;
}
explicit Agressor(){
}
~Agressor(){
}
friend ostream &operator<<(ostream& stream, const Agressor& item);
const friend bool operator > (const Agressor &a1, const Agressor &a2);
// const friend bool operator == (const Agressor &a1, const Agressor &a2);
/* vector<Agressor>& operator[](int i ){
return v[i];
}*/
};
ostream &operator<<(ostream& stream, const Agressor& item) {
string side = "";
if(item.side == "B"){
side = '+';
}else{
if(item.side == "S"){
side = "-";
}
}
stream << item.traderIdentifier << side << item.quantity << "#" << item.price << "\n";
return stream;
}
const bool operator == (const Agressor &a1, const Agressor &a2){
bool isEqual = false;
if((a1.price*a1.quantity == a2.price*a2.quantity) && (a1.traderIdentifier == a2.traderIdentifier) && (a1.side == a2.side)){
isEqual = true;
}
return(isEqual);
}
const bool operator > (const Agressor &a1, const Agressor &a2){
bool isGreater = false;
if(a1.price*a1.quantity > a2.price*a2.quantity){
isGreater = true;
}
return(isGreater);
}
#endif /* Agressor_h */
main.cpp
#include <iostream>
#include "Agressor.h"
#include <sstream>
using namespace std;
vector<string> &split(const string &s, char delim, vector<string> &elems)
{
stringstream ss(s);
string item;
while (getline(ss, item, delim))
{
elems.push_back(item);
}
return elems;
}
vector<string> split(const string &s, char delim)
{
vector<string> elems;
split(s, delim, elems);
return elems;
}
bool equal_comp(const Agressor& a1, const Agressor& a2){
if((a1.price*a1.quantity == a2.price*a2.quantity) && (a1.traderIdentifier == a2.traderIdentifier) && (a1.side == a2.side)){
return true;
}
return false;
}
int main(int argc, const char * argv[]) {
Agressor agr;
while (true) {
std::string sText;
cout << "enter query:" << endl;
std::getline(std::cin, sText);
if(sText == "q"){
cout << "Program terminated by user" << endl;
break;
}else{
std::vector<std::string> sWords = split(sText, ' ');
agr.traderIdentifier = sWords[0];
agr.side = sWords[1];
agr.quantity = stoi(sWords[2]);
agr.price = stoi(sWords[3]);
agr.v.push_back(agr);
vector<Agressor>::iterator it;
sort(agr.v.begin(), agr.v.end(), greater<Agressor>());
//unique(agr.v.begin(), agr.v.end(), equal_comp);
for (vector<Agressor>::const_iterator i = agr.v.begin(); i != agr.v.end(); ++i)
cout << *i << ' ';
}
}
cout << "here we go..." << endl;
vector<Agressor>::iterator it;
sort(agr.v.begin(), agr.v.end(), greater<Agressor>());
//it = unique(agr.v.begin(),agr.v.end(), equal_comp);
//agr.v.resize( distance(agr.v.begin(),it) );
agr.v.erase(unique(agr.v.begin(),agr.v.end(), equal_comp), agr.v.end());
copy(agr.v.begin(), agr.v.end(), ostream_iterator<Agressor>(cout, "\n"));
return 0;
}

You might use something like:
template <typename T>
std::vector<T> get_duplicates(const std::vector<T>& v)
{
// expect sorted vector
auto it = v.begin();
auto end = v.end();
std::vector<T> res;
while (it != end) {
it = std::adjacent_find(it, end);
if (it != end) {
++it;
res.push_back(*it);
}
}
return res;
}

std::unique overwrites duplicate values with later non-duplicate values. You can implement a similar algorithm that moves the values to somewhere.
template<class ForwardIt, class OutputIt, class BinaryPredicate>
ForwardIt unique_retain(ForwardIt first, ForwardIt last, OutputIt d_first, BinaryPredicate p)
{
if (first == last)
return last;
ForwardIt result = first;
while (++first != last) {
if (!p(*result, *first) && ++result != first) {
*d_first++ = std::move(*result);
*result = std::move(*first);
}
}
return ++result;
}
(adapted from this possible implementation of std::unique)
You would then use it like
vector<Agressor> dups;
sort(agr.v.begin(), agr.v.end(), greater<Agressor>());
auto it = unique_retain(agr.v.begin(),agr.v.end(), std::back_inserter(dups), equal_comp);
agr.v.erase(it, agr.v.end());

Related

Creating std::set copies only one element, how to fix this?

v_map has the correct amount of information stored, however when i try to use std::set it only copies one element ,I assume the first one. This is my first time using std::set , maybe I miss something here...Thanks for your help !
typedef std::map<std::string,std::pair<int,int>> points_map;
void list_average(points_map &v_map)
{
Comparator compFunctor = [](std::pair<std::string,std::pair<int,int>> elem1,std::pair<std::string,std::pair<int,int>> elem2)
{
std::pair<int,int> it = elem1.second;
std::pair<int,int> jt = elem2.second;
return it.first < jt.first;
};
std::set<std::pair<std::string,std::pair<int,int>>,Comparator> v_set(v_map.begin(),v_map.end(),compFunctor);
for (std::pair<std::string,std::pair<int,int>> it : v_set)
{
std::pair<int,int> jt = it.second;
std::cout << it.first << " " << (jt.second - jt.first) / jt.first<< std::endl;
}
}
Note the following is the full program, I apologize in advance for the ugly code , and length of the code ,also I rewrote the name in the upper part of my code, in the full code , this particular function is called list_atlag
#include <iostream>
#include <string>
#include <map>
#include <set>
#include <vector>
#include <codecvt>
#include <iterator>
#include <numeric>
#include <functional>
#include <boost/filesystem.hpp>
#include <boost/foreach.hpp>
#include <boost/program_options.hpp>
#include <boost/tokenizer.hpp>
class Adatok
{
public:
Adatok(std::string name, std::string path, std::string date, int points) : _name(name), _path(path), _date(date), _points(points) {}
Adatok(const Adatok &other) = default;
Adatok &operator=(const Adatok &other) = default;
std::string get_name() { return _name; }
std::string get_path() { return _path; }
std::string get_date() { return _date; }
int get_points() { return _points; }
private:
std::string _name;
std::string _path;
std::string _date;
int _points;
};
class Ranglista
{
public:
Ranglista(std::string name, int points) : _name(name), _points(points) {}
Ranglista(const Ranglista &other) = default;
Ranglista &operator=(const Ranglista &other) = default;
std::string get_name() { return _name; }
int get_points() { return _points; }
bool operator<(const Ranglista &other)
{
return _points > other._points;
}
private:
std::string _name;
int _points;
};
class Vedes
{
public:
Vedes(std::string name, int point) : _name(name), _point(point) { _count++; }
Vedes(const Vedes &other) = default;
Vedes &operator=(const Vedes &other) = default;
std::string get_name() { return _name; }
int get_point() { return _point; }
int get_count() { return _count; }
void set_stuff(int &points)
{
_point += points;
_count++;
}
bool operator<(const Vedes &other)
{
return _count > other._count;
}
private:
std::string _name;
int _point;
int _count = 0;
};
typedef std::map<std::string, int> path_value; //minden path + az erteke
typedef std::vector<Adatok> name_path_date; //bejegyzesek
typedef std::vector<Ranglista> ranglista; //ranglista
typedef std::map<std::string,std::pair<int,int>> vedes_vec; //vedesek
typedef std::function<bool(std::pair<std::string,std::pair<int,int>>,std::pair<std::string,std::pair<int,int>>)> Comparator;
void create_pv(path_value &, boost::filesystem::path); //feltolti a path+ertek map-ot
void create_npd(name_path_date &, path_value &, std::string input); //feltolti a bejegyzesek vektorat + mindenki pontszama map
void create_np(name_path_date &, path_value &); // name + path map
void list_np(path_value &name_point); // nam + path kiiratas
void list_bejegyzesek(name_path_date &bejegyzesek); // bejegyzesek vektora kiiratas
bool check_bejegyzesek(name_path_date &bejegyzesek, std::string name, std::string path); //van-e mar ilyen bejegyzes
void create_rl(ranglista &rl_vec, path_value &name_point); //ranglista feltoltes
void list_rl(ranglista &rl_vec); //ranglista kiiratas
void vedes_atlag(name_path_date &bejegyzesek, vedes_vec &v_vec); //vedes atlag map
void list_atlag(vedes_vec &v_vec); //vedes atlag kiiratas
bool check_vedes(vedes_vec &v_vec, std::string name);
void vedes_elem(vedes_vec &v_vec, std::string name, int &&points); //
//void accumulate_pv(path_value&);
int main(int argc, char **argv)
{
std::vector<std::string> roots = {"City/Debrecen/Oktatás/Informatika/Programozás/DEIK/Prog1/", "City/Debrecen/Oktatás/Informatika/Programozás/DEIK/"};
std::string input_file_name = "db-2018-05-06.csv";
/* OPTIONS */
boost::program_options::options_description desc("ALLOWED OPTIONS");
desc.add_options()("help", "help msg")("root,r", boost::program_options::value<std::vector<std::string>>())("csv", boost::program_options::value<std::string>(), "comma separated values")("rank", "rang lista")("vedes", "labor vedesek");
boost::program_options::positional_options_description pdesc;
pdesc.add("root", -1);
boost::program_options::variables_map vm;
boost::program_options::store(boost::program_options::command_line_parser(argc, argv).options(desc).positional(pdesc).run(), vm);
boost::program_options::notify(vm);
int sum = 0;
path_value pv_map;
if (vm.count("help") || argc == 1)
{
std::cout << desc << std::endl;
return 1;
}
if (vm.count("root"))
{
roots = vm["root"].as<std::vector<std::string>>();
for (auto &i : roots)
{
boost::filesystem::path path(i);
create_pv(pv_map, path);
}
for (path_value::iterator it{pv_map.begin()}; it != pv_map.end(); it++)
sum += it->second;
//std::cout << sum << std::endl;create_npd
std::cout << std::accumulate(pv_map.begin(), pv_map.end(), 0, [](int value, const std::map<std::string, int>::value_type &p) { return value + p.second; });
std::cout << std::endl;
}
if (vm.count("csv"))
{
//input_file_name = vm["csv"].as<std::string>();
std::ifstream input_file{vm["csv"].as<std::string>()};
name_path_date bejegyzesek;
std::string temp;
path_value name_point;
while (getline(input_file, temp))
create_npd(bejegyzesek, pv_map, temp);
create_np(bejegyzesek, name_point);
//list_bejegyzesek(bejegyzesek);
//list_np(name_point);
if (vm.count("rank"))
{
ranglista rl_vec;
create_rl(rl_vec, name_point);
list_rl(rl_vec);
}
if (vm.count("vedes"))
{
vedes_vec v_vec;
vedes_atlag(bejegyzesek, v_vec);
list_atlag(v_vec);
}
return 0;
}
return 0;
}
void create_pv(path_value &pv_map, boost::filesystem::path path)
{
boost::filesystem::directory_iterator it{path}, eod;
BOOST_FOREACH (boost::filesystem::path const &p, std::make_pair(it, eod))
{
if (boost::filesystem::is_regular_file(p))
{
boost::filesystem::ifstream regular_file{p};
std::string temp;
int sum = 0; //aktualis .props erteke
while (getline(regular_file, temp))
{
temp.erase(0, temp.find_last_of('/'));
temp.erase(0, temp.find_first_of(' '));
sum += std::atoi((temp.substr(temp.find_first_of("0123456789"), temp.find_last_of("0123456789"))).c_str());
}
std::string result = p.string();
std::string result_path = result.substr(0, result.find_last_of('/'));
//std::cout << result_path << std::endl;
//pv_map.insert(std::make_pair(result, sum));
pv_map[result_path] = sum;
}
else
create_pv(pv_map, p);
}
}
//void accumulate_pv(path_value& pv_map)
//{
// std::cout<<std::accumulate(pv_map.begin(),pv_map.end(),0,[](int value,const path_value::int& p){return value+p.second;});
//}
void create_npd(name_path_date &bejegyzesek, path_value &pv_map, std::string input)
{
boost::tokenizer<boost::escaped_list_separator<char>> tokenizer{input};
boost::tokenizer<boost::escaped_list_separator<char>>::iterator it{tokenizer.begin()};
std::string name = *it;
std::string path = *(++it);
std::string date = *(++it);
path = path.substr(2);
if (!check_bejegyzesek(bejegyzesek, name, path))
bejegyzesek.push_back(Adatok(name, path, date, pv_map["/home/erik/Documents/Programs/"+path]));
}
bool check_bejegyzesek(name_path_date &bejegyzesek, std::string name, std::string path)
{
bool ok = false;
for (name_path_date::iterator it{bejegyzesek.begin()}; it != bejegyzesek.end(); it++)
{
if ((it->get_name() == name) && (it->get_path() == path))
ok = true;
}
return ok;
}
bool check_vedes(vedes_vec &v_vec, std::string name)
{
vedes_vec::iterator it = v_vec.find(name);
if (it != v_vec.end()) return true;
else return false;
}
void vedes_elem(vedes_vec &v_vec, std::string name, int &&points)
{
/*for (auto &it : v_vec)
if (it.get_name() == name)
it.set_stuff(points);
*/
vedes_vec::iterator i = v_vec.find(name);
std::pair<int,int> it = i->second;
//auto& jt = it->second;
it.first++;
it.second += points;
}
void create_np(name_path_date &bejegyzesek, path_value &name_point)
{
for (name_path_date::iterator it{bejegyzesek.begin()}; it != bejegyzesek.end(); it++)
if (name_point.count(it->get_name()) == 0)
name_point.insert(std::make_pair(it->get_name(), it->get_points()));
else
name_point[it->get_name()] += it->get_points();
}
void list_np(path_value &name_point)
{
for (path_value::iterator it{name_point.begin()}; it != name_point.end(); it++)
{
if (it->second)
std::cout << it->first << " " << it->second << std::endl;
}
}
void list_bejegyzesek(name_path_date &bejegyzesek)
{
for (name_path_date::iterator it{bejegyzesek.begin()}; it != bejegyzesek.end(); it++)
if (it->get_name() == "Varga Erik")
std::cout << it->get_name() << " " << it->get_path() << " " << it->get_points() << std::endl;
}
void create_rl(ranglista &rl_vec, path_value &name_point)
{
for (auto &it : name_point)
{
if (it.second > 0)
rl_vec.push_back(Ranglista(it.first, it.second));
}
std::sort(rl_vec.begin(), rl_vec.end());
}
void list_rl(ranglista &rl_vec)
{
for (auto &it : rl_vec)
std::cout << it.get_name() << " " << it.get_points() << std::endl;
}
void vedes_atlag(name_path_date &bejegyzesek, vedes_vec &v_vec)
{
std::string key = "City/Debrecen/Oktatás/Informatika/Programozás/DEIK/Prog1/Labor/Védés/";
for (auto &it : bejegyzesek)
{
if ((it.get_path().find("City/Debrecen/Oktatás/Informatika/Programozás/DEIK/Prog1/Labor/Védés/") != std::string::npos) && (it.get_points()) && (!check_vedes(v_vec, it.get_name())))
v_vec.insert(std::make_pair(it.get_name(),std::make_pair(1,it.get_points())));
else if ((check_vedes(v_vec, it.get_name())) && (it.get_path().find("City/Debrecen/Oktatás/Informatika/Programozás/DEIK/Prog1/Labor/Védés/") != std::string::npos) && (it.get_points()))
vedes_elem(v_vec, it.get_name(), it.get_points());
}
}
void list_atlag(vedes_vec &v_vec)
{
//std::sort(v_vec.begin(), v_vec.end());
Comparator compFunctor = [](std::pair<std::string,std::pair<int,int>> elem1,std::pair<std::string,std::pair<int,int>> elem2)
{
std::pair<int,int> it = elem1.second;
std::pair<int,int> jt = elem2.second;
return it.first < jt.first;
};
std::set<std::pair<std::string,std::pair<int,int>>,Comparator> v_set(v_vec.begin(),v_vec.end(),compFunctor);
//int sum = 0;
//int csum = 0;
for (std::pair<std::string,std::pair<int,int>> it : v_set)
{
std::pair<int,int> jt = it.second;
std::cout << it.first << " " << (jt.second - jt.first) / jt.first<< std::endl;
//sum += it.get_point();
//csum += it.get_count();
//sum = std::accumulate(v_vec.begin(), v_vec.end(), 0, [](int i, Vedes &o) { return i + o.get_point(); });
//csum = std::accumulate(v_vec.begin(), v_vec.end(), 0, [](int i, Vedes &o) { return i + o.get_count(); });
}
//std::cout << (sum - csum) / csum << std::endl;
}

so, as described here
template<
class Key,
class Compare = std::less<Key>,
class Allocator = std::allocator<Key>
> class set;
std::set is an associative container that contains a sorted set of unique objects of type Key.
I cleaned up your code, and made a Minimal, Complete, and Verifiable example,
#include <iostream>
#include <map>
#include <set>
using point_pair = std::pair<int,int>;
using points_map = std::map<std::string, point_pair>;
using points_set_pair = std::pair<std::string, point_pair>;
auto compFunctor = [](const points_set_pair &elem1, const points_set_pair &elem2)
{
return elem1.second.first < elem2.second.first;
};
using points_set = std::set<points_set_pair, decltype(compFunctor)>;
void list_average(const points_map &v_map)
{
points_set v_set(v_map.begin(),v_map.end(),compFunctor);
for (auto &elem : v_set)
{
const point_pair &jt = elem.second;
std::cout << elem.first << " " << (jt.second - jt.first) / jt.first<< "\n";
}
}
Now consider the first version of main
int main()
{
points_map v_map = { {"foo", { 1, 2}}, {"bar", { 3, 4}}};
list_average(v_map);
}
output:
foo 1
bar 0
Now consider the second version of main:
int main()
{
points_map v_map = { {"foo", { 1, 2}}, {"bar", { 1, 4}}};
list_average(v_map);
}
output:
bar 3
See the problem? As .second.first of the elements are both 1, the latter replaces the first. It is not unique. That's the downside of std::set.
So, what then?
Don't use std::set, but use std::vector and std::sort. Example:
#include <iostream>
#include <map>
#include <vector>
#include <algorithm>
using point_pair = std::pair<int,int>;
using points_map = std::map<std::string, point_pair>;
using string_point_pair = std::pair<std::string, point_pair>;
auto compFunctor = [](string_point_pair const &elem1, string_point_pair const &elem2)
{
return
elem1.second.first != elem2.second.first?
elem1.second.first < elem2.second.first:
elem1.second.second < elem2.second.second;
};
void list_average(points_map const &v_map)
{
std::vector<string_point_pair> v_vec(v_map.begin(),v_map.end());
std::sort(v_vec.begin(), v_vec.end(), compFunctor);
for (auto &elem : v_vec)
{
const point_pair &jt = elem.second;
std::cout << elem.first << " " << (jt.second - jt.first) / jt.first<< "\n";
}
}
int main()
{
points_map v_map = { {"foo", { 1, 2}}, {"bar", { 1, 4}}, {"baz", { 2, 4}}};
list_average(v_map);
}
Output:
foo 1
bar 3
baz 1
live demo

Find the largest / smallest key in a multimap of pairs <Class object, enum>, using multimap algorithms (std::minmax_element)?

I have this program, where I put pairs of Class Object + enums in a multimap. The class object has a member type int filesize. I want to find the largest and smallest key in my multimap.
I have done this by making 3 iterators, comparing each object with the next one and if it's smaller(or larger, depending in what I search for) it gets assigned to the 3rd iterator. After that, I just print out the 3rd iterator. Is there any other elegant way to do this? I know this works, but I'm sure there is another way of doing this - I just can't seem to find it.
Here is my function for max file:
void getMaxFile() {
multimap<CFile, Filetype>::iterator p = m_DirectoryMap.begin();
multimap<CFile, Filetype>::iterator t = m_DirectoryMap.begin();
multimap<CFile, Filetype>::iterator x = m_DirectoryMap.begin();
t++;
while (p != m_DirectoryMap.end()) {
if (p->first.getFileSize() > t->first.getFileSize())
x = p;
++p, ++t;
}
cout << "The largest file is: " << endl << x->first.getFileName()
<< '\t' << x->first.getFileSize() << '\t' << x->second << endl;
}
The constructor from the second class, where I make the multimap and fill it with pairs of another class objects + enums (read from a file):
CDirectory (string n) {
fp.open (n, ios::in);
string dirName, fileName, fType;
int fileSize;
fp >> dirName;
m_strDirectory = dirName;
while (fp >> fileName >> fileSize >> fType) {
CFile obj (fileName, fileSize);
if (fType == "Archive")
filetype = Filetype::Archive;
else if (fType == "Hidden")
filetype = Filetype::Hidden;
else if (fType == "ReadOnly")
filetype = Filetype::ReadOnly;
else if (fType == "System")
filetype = Filetype::System;
else
filetype = Filetype::FileNotSupported;
m_DirectoryMap.insert(pair<CFile, Filetype>(CFile(obj.getFileName(), obj.getFileSize()), Filetype(filetype)));
}
multimap<CFile, Filetype>::iterator p = m_DirectoryMap.begin();
while ( p != m_DirectoryMap.end()) {
cout << endl << p->first.getFileName() << '\t' << p->first.getFileSize() << '\t' << p->second << endl;
++p;
}
}
And the first class (which objects are the key in my multimap):
class CFile {
string m_strFile;
unsigned int m_size;
public:
CFile () { m_strFile = ""; m_size = 0; }
CFile (string name, int size ) { m_strFile = name; m_size = size; }
string getFileName () const { return m_strFile; }
int getFileSize () const { return m_size; }
void setFileSize ( int size ) { m_size = size; }
bool operator< (CFile& obj) {
return ( m_size < obj.m_size );
}
bool operator== (const CFile& obj) {
return ( m_size == obj.m_size );
}
friend ostream& operator<< ( ostream& ost, const CFile& obj ) {
return ost << obj.m_strFile << obj.m_size;
}
friend istream& operator>> ( istream& ist, CFile& obj ) {
return ist >> obj.m_strFile >> obj.m_size;
}
static bool Greater(const CFile& obj1, const CFile& obj2) {
if ( obj1.m_size > obj2.m_size )
return true;
else
return false;
}
};

std::minmax_element allows you to pass a comparator to use when looking at the objects, so you can do something like this:
auto p = std::minmax_element(m_directoryMap.begin(), m_directoryMap.end(),
[](CFile const &a, CFile const &b) { return a.getFileSize() < b.getFileSize(); });
p will then be a pair of iterators into your collection, so you can (for example) print them out with:
std::cout << "Smallest: " << p.first->getFileSize() << " bytes\n";
std::cout << "Largest: " << p.second->getFileSize() << " bytes\n";
Looking more carefully, however, it appears that you're using the size member as the ordering for the files anyway. That being the case, you can use the fact that the map is already ordered based on the data you care about, so you can just use:
std::cout << "Smallest: " << m_directoryMap.begin()->getFileSize() << " bytes\n";
std::cout << "Largest: " << m_directoryMap.rbegin()->getFileSize() << " bytes\n";
Looking more at your code, however, you have a few other problems that are likely to affect being able to do what you're trying to do here. Here's a slightly simplified (and rewritten) version of your code, along with some code to find the minimum and maximum (and print them out):
#include <string>
#include <iostream>
#include <map>
#include <algorithm>
using std::string;
using std::istream;
using std::ostream;
class CFile {
string m_strFile;
unsigned int m_size;
public:
CFile() { m_strFile = ""; m_size = 0; }
CFile(string name, int size) { m_strFile = name; m_size = size; }
string getFileName() const { return m_strFile; }
int getFileSize() const { return m_size; }
void setFileSize(int size) { m_size = size; }
bool operator< (CFile const& obj) const {
return (m_size < obj.m_size);
}
bool operator== (const CFile& obj) const {
return (m_size == obj.m_size);
}
friend ostream& operator<< (ostream& ost, const CFile& obj) {
return ost << obj.m_strFile << obj.m_size;
}
friend istream& operator>> (istream& ist, CFile& obj) {
return ist >> obj.m_strFile >> obj.m_size;
}
static bool Greater(const CFile& obj1, const CFile& obj2) {
return (obj1.m_size > obj2.m_size);
}
};
struct cmp {
bool operator()(CFile const &a, CFile const &b) {
return a.getFileName() < b.getFileName();
}
};
int main() {
std::multimap<CFile, int, cmp> files {
{ CFile { "abc", 123 }, 1 },
{ CFile { "cde", 234 }, 2 },
{ CFile { "def", 345 }, 3 }
};
auto p = std::minmax_element(files.begin(), files.end(),
[](auto const &a, auto const &b) { return a.first.getFileSize() < b.first.getFileSize(); });
std::cout << p.first->first.getFileSize() << "\n";
std::cout << p.second->first.getFileSize() << "\n";
}

Merge two std::sets

I need to merge two sets into a resultant set on basis of one member variable qty if the prices are same. In the below example my resultant set s3 should contain:
Price : 100
Qty : 40
Price : 200
Qty : 60
Please note qty above is a sum of qty in both the sets respective when the price is same.
My question is how do I construct the set s3 below:
Please guide me with the same.
#include <set>
#include <iostream>
using namespace std;
class PriceLevel
{
public:
int price;
int qty;
PriceLevel(int _price, int _qty)
{
price = _price;
qty = _qty;
}
friend bool operator<(const PriceLevel &p, const PriceLevel &q);
};
bool operator<(const PriceLevel &p, const PriceLevel &q)
{
if(p.price < q.price)
{
return true;
}
else
{
return false;
}
}
int main()
{
std::set<PriceLevel> s1;
std::set<PriceLevel> s2;
PriceLevel p1(100,10);
PriceLevel p2(200,20);
PriceLevel p3(100,30);
PriceLevel p4(200,40);
s1.insert(p1);
s1.insert(p2);
s2.insert(p3);
s2.insert(p4);
std::set<PriceLevel> s3;
set<PriceLevel>::iterator it = s3.begin();
// How should I Initialize s3
for(; it != s3.end(); it++)
{
cout << "Price: " << it->price << endl;
cout << "Qty : " << it->qty << endl;
}
}

If you are absolutely sure that both source sets contain exactly the same prices, you can use the binary version of std::transform.
If they might contain unequal data, you'll have to do it manually, like this:
std::set<PriceLevel> s3;
// How should I Initialize s3
std::set<PriceLevel>::iterator
first1 = s1.begin(),
last1 = s1.end(),
first2 = s2.begin(),
last2 = s2.end();
while (first1 != last1 && first2 != last2) {
if (first1->price < first2->price) {
s3.insert(*first1++);
}
else if (first1->price > first2->price) {
s3.insert(*first2++);
}
else {
s3.insert(PriceLevel(first1->price, first1->qty + first2->qty));
++first1;
++first2;
}
}
while (first1 != last1) {
s3.insert(*first1++);
}
while (first2 != last2) {
s3.insert(*first2++);
}
This is best put in an extra function.
View on IdeOne
If you only need those prices in the result set which existed in both source sets, it is a bit simpler:
while (first1 != last1 && first2 != last2) {
if (first1->price < first2->price) {
++first1;
}
else if (first1->price > first2->price) {
++first2;
}
else {
s3.insert(PriceLevel(first1->price, first1->qty + first2->qty));
++first1;
++first2;
}
}

You can merge two sets with just two lines
#include <set>
template <typename _Ty>
std::set<_Ty> merge(const std::set<_Ty> &x, const std::set<_Ty> &y) const
{
std::set<_Ty> merged = x; //initial merged set from x
merged.insert(y.begin(), y.end()); //add contents of y to merged
return move(merged);
}

set is not an appropriate data structure for your application here. Consider using a map<int, int> instead:
map<int, int> p1, p2, p3; // map price -> quantity
p1[100] = 10;
p1[200] = 20;
p2[100] = 30;
p2[200] = 40;
p3 = p1;
for(auto &i : p2) {
p3[i.first] += i.second;
}
// Now p3[100]=40 and p3[200]=60.
You can also use a set kind of like a map using set::find:
s3 = s1;
for(auto &i : s2) {
auto it = s3.find(i);
if(it == s3.end()) {
s3.insert(i);
} else {
it->qty += i.qty;
}
}
For this to work, you will have to declare qty as a mutable int, so that it can be modified even if the PriceLevel struct is const (since elements of a set are const).
If you can't make the variable mutable, then you can try removing the existing set element and then adding a new, merged element.

You are essentially trying to use a set as a map AND merge values with equal keys. You will need to roll your own result (not to mention that it really isn't advisable...). Here is something to get you started.
#include <iostream>
#include <set>
using namespace std;
class PriceLevel
{
public:
int price;
int qty;
PriceLevel() {
price = 0;
qty = 0;
}
PriceLevel(int _price, int _qty)
{
price = _price;
qty = _qty;
}
friend bool operator<(const PriceLevel &p, const PriceLevel &q);
//Compares two PriceLevel objects and merges their values if their keys are the same.
//Return value is a std::pair that
//denotes if the compare was successful and the result is meaningful.
static std::pair<bool, PriceLevel> merge_equal(const PriceLevel& p, const PriceLevel& q) {
std::pair<bool, PriceLevel> result;
result.first = false;
if(p.price == q.price) {
result.first = true;
result.second.price = p.price;
result.second.qty = p.qty + q.qty;
}
return result;
}
};
bool operator<(const PriceLevel &p, const PriceLevel &q)
{
if(p.price < q.price)
{
return true;
}
else
{
return false;
}
}
int main()
{
std::set<PriceLevel> s1;
std::set<PriceLevel> s2;
PriceLevel p1(100,10);
PriceLevel p2(200,20);
PriceLevel p3(100,30);
PriceLevel p4(200,40);
s1.insert(p1);
s1.insert(p2);
s2.insert(p3);
s2.insert(p4);
std::set<PriceLevel> s3;
//Just in case...the world may explode otherwise.
if(s1.size() == s2.size()) {
for(const auto& pl1 : s1) {
for(const auto& pl2 : s2) {
//Only insert valid values.
auto r = PriceLevel::merge_equal(pl1, pl2);
if(r.first) s3.insert(r.second);
}
}
for(auto it = s3.begin(); it != s3.end(); it++) {
cout << "Price: " << it->price << endl;
cout << "Qty : " << it->qty << endl;
}
}
}

Iterating over mmaped gzip file with boost

I am trying to learn boost and some template programming in C++ but I am really having such an hard time to implement a simple class for iterating over Gzip files using mapped_file_source. I essentially have an edge list in TSV format such that each line in the gzip file is of the format: <src:int><tab><dst:int>. What I want is to implement a gz_file class that exposes a begin and end iterator over which I can get an edge (std::pair<int,int>) each time I query the iterator.
The problem is the copy constructor which is broken since I cannot known where I am positioned in the gzip file.
Here is the code I have so far:
class gz_graph {
public:
gz_graph(const char * filename)
{
m_file.open(filename);
if (!m_file.is_open()) {
throw std::runtime_error("Error opening file");
}
m_data = m_file.data();
m_data_size = m_file.size() / sizeof(m_data[0]);
auto ret = posix_madvise((void*)m_data, m_data_size, POSIX_MADV_SEQUENTIAL);
}
class iterator;
iterator begin() const
{
return iterator(this, false);
}
iterator end() const
{
return iterator(this, true);
}
class iterator : public std::iterator<std::forward_iterator_tag, Edge> {
public:
iterator(gz_graph const * ref, bool consumed)
: m_ref(ref),
m_cur_edge(-1, -1),
m_consumed(consumed)
{
if (!consumed) {
initialize();
advance();
}
}
iterator(const iterator& x)
: m_ref(x.m_ref),
m_cur_edge(x.m_cur_edge)
{
if (!x.m_consumed) {
initialize();
advance();
}
std::cout << "Copy constructor" << std::endl;
}
value_type const& operator*() const
{
return m_cur_edge;
}
value_type const* operator->() const
{
return &m_cur_edge;
}
iterator& operator++()
{
advance();
return *this;
}
bool operator==(iterator const& other) const
{
assert(m_ref == other.m_ref);
return m_cur_edge == other.m_cur_edge;
}
bool operator!=(iterator const& other) const
{
return !(*this == other);
}
private:
void initialize()
{
boost::iostreams::array_source source(m_ref->m_data, m_ref->m_data_size);
m_in.push(boost::iostreams::gzip_decompressor());
m_in.push(source);
}
void advance()
{
std::string line_str;
if (!getline(m_in, line_str)) {
m_consumed = true;
m_cur_edge = Edge(-1, -1);
return;
}
std::vector<std::string> strs;
boost::split(strs, line_str, boost::is_any_of("\t"));
if (strs.size() != 2)
throw std::runtime_error("Required 2 fields per line");
int src = boost::lexical_cast<int>(strs.at(0));
int dst = boost::lexical_cast<int>(strs.at(1));
m_cur_edge = Edge(src, dst);
// std::cout << "Read line " << line_str << std::endl;
}
gz_graph const * m_ref;
Edge m_cur_edge;
boost::iostreams::filtering_istream m_in;
bool m_consumed;
};
private:
boost::iostreams::mapped_file_source m_file;
char const* m_data;
size_t m_data_size;
};

I would just use a std::istream_iterator here. I'm not sure how exactly to interpret your "input pseudo-code", so let me humor you and do the "complicated" parsing:
struct Edge : std::pair<int, int> { };
std::istream& operator>>(std::istream& is, Edge& edge)
{
using namespace boost::spirit::qi;
return is >> match("src:" > int_ > '\t' > "dst:" > int_ >> eol, edge.first, edge.second);
}
I expect you would be happy to have it much simpler, but simpler is easier, right?
Now the main program looks like
for (
std::istream_iterator<Edge> it(fs >> std::noskipws), end;
it != end;
++it)
{
std::cout << it->first << " to " << it->second << "\n";
}
Where fs is the filtering_istream that has the gzip_decompressor. See it Live On Coliru
Full Code
#include <boost/iostreams/device/mapped_file.hpp>
#include <boost/iostreams/filtering_stream.hpp>
#include <boost/iostreams/filter/gzip.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/qi_match.hpp>
#include <iterator>
struct Edge : std::pair<int, int> {
};
std::istream& operator>>(std::istream& is, Edge& edge)
{
using namespace boost::spirit::qi;
return is >> match("src:" > int_ > '\t' > "dst:" > int_ >> eol, edge.first, edge.second);
}
namespace io = boost::iostreams;
int main()
{
io::mapped_file_source csv("csv.txt.gz");
io::stream<io::mapped_file_source> textstream(csv);
io::filtering_istream fs;
fs.push(io::gzip_decompressor{});
fs.push(textstream);
for (
std::istream_iterator<Edge> it(fs >> std::noskipws), last;
it != last;
++it)
{
std::cout << it->first << " to " << it->second << "\n";
}
}

Binary Cosine Cofficient

I was given the following forumulae for calculating this
sim=|Q∩D| / √|Q|√|D|
I went ahed and implemented a class to compare strings consisting of a series of words
#pragma once
#include <vector>
#include <string>
#include <iostream>
#include <vector>
using namespace std;
class StringSet
{
public:
StringSet(void);
StringSet( const string the_strings[], const int no_of_strings);
~StringSet(void);
StringSet( const vector<string> the_strings);
void add_string( const string the_string);
bool remove_string( const string the_string);
void clear_set(void);
int no_of_strings(void) const;
friend ostream& operator <<(ostream& outs, StringSet& the_strings);
friend StringSet operator *(const StringSet& first, const StringSet& second);
friend StringSet operator +(const StringSet& first, const StringSet& second);
double binary_coefficient( const StringSet& the_second_set);
private:
vector<string> set;
};
#include "StdAfx.h"
#include "StringSet.h"
#include <iterator>
#include <algorithm>
#include <stdexcept>
#include <iostream>
#include <cmath>
StringSet::StringSet(void)
{
}
StringSet::~StringSet(void)
{
}
StringSet::StringSet( const vector<string> the_strings)
{
set = the_strings;
}
StringSet::StringSet( const string the_strings[], const int no_of_strings)
{
copy( the_strings, &the_strings[no_of_strings], back_inserter(set));
}
void StringSet::add_string( const string the_string)
{
try
{
if( find( set.begin(), set.end(), the_string) == set.end())
{
set.push_back(the_string);
}
else
{
//String is already in the set.
throw domain_error("String is already in the set");
}
}
catch( domain_error e)
{
cout << e.what();
exit(1);
}
}
bool StringSet::remove_string( const string the_string)
{
//Found the occurrence of the string. return it an iterator pointing to it.
vector<string>::iterator iter;
if( ( iter = find( set.begin(), set.end(), the_string) ) != set.end())
{
set.erase(iter);
return true;
}
return false;
}
void StringSet::clear_set(void)
{
set.clear();
}
int StringSet::no_of_strings(void) const
{
return set.size();
}
ostream& operator <<(ostream& outs, StringSet& the_strings)
{
vector<string>::const_iterator const_iter = the_strings.set.begin();
for( ; const_iter != the_strings.set.end(); const_iter++)
{
cout << *const_iter << " ";
}
cout << endl;
return outs;
}
//This function returns the union of the two string sets.
StringSet operator *(const StringSet& first, const StringSet& second)
{
vector<string> new_string_set;
new_string_set = first.set;
for( unsigned int i = 0; i < second.set.size(); i++)
{
vector<string>::const_iterator const_iter = find(new_string_set.begin(), new_string_set.end(), second.set[i]);
//String is new - include it.
if( const_iter == new_string_set.end() )
{
new_string_set.push_back(second.set[i]);
}
}
StringSet the_set(new_string_set);
return the_set;
}
//This method returns the intersection of the two string sets.
StringSet operator +(const StringSet& first, const StringSet& second)
{
//For each string in the first string look though the second and see if
//there is a matching pair, in which case include the string in the set.
vector<string> new_string_set;
vector<string>::const_iterator const_iter = first.set.begin();
for ( ; const_iter != first.set.end(); ++const_iter)
{
//Then search through the entire second string to see if
//there is a duplicate.
vector<string>::const_iterator const_iter2 = second.set.begin();
for( ; const_iter2 != second.set.end(); const_iter2++)
{
if( *const_iter == *const_iter2 )
{
new_string_set.push_back(*const_iter);
}
}
}
StringSet new_set(new_string_set);
return new_set;
}
double StringSet::binary_coefficient( const StringSet& the_second_set)
{
double coefficient;
StringSet intersection = the_second_set + set;
coefficient = intersection.no_of_strings() / sqrt((double) no_of_strings()) * sqrt((double)the_second_set.no_of_strings());
return coefficient;
}
However when I try and calculate the coefficient using the following main function:
// Exercise13.cpp : main project file.
#include "stdafx.h"
#include <boost/regex.hpp>
#include "StringSet.h"
using namespace System;
using namespace System::Runtime::InteropServices;
using namespace boost;
//This function takes as input a string, which
//is then broken down into a series of words
//where the punctuaction is ignored.
StringSet break_string( const string the_string)
{
regex re;
cmatch matches;
StringSet words;
string search_pattern = "\\b(\\w)+\\b";
try
{
// Assign the regular expression for parsing.
re = search_pattern;
}
catch( regex_error& e)
{
cout << search_pattern << " is not a valid regular expression: \""
<< e.what() << "\"" << endl;
exit(1);
}
sregex_token_iterator p(the_string.begin(), the_string.end(), re, 0);
sregex_token_iterator end;
for( ; p != end; ++p)
{
string new_string(p->first, p->second);
String^ copy_han = gcnew String(new_string.c_str());
String^ copy_han2 = copy_han->ToLower();
char* str2 = (char*)(void*)Marshal::StringToHGlobalAnsi(copy_han2);
string new_string2(str2);
words.add_string(new_string2);
}
return words;
}
int main(array<System::String ^> ^args)
{
StringSet words = break_string("Here is a string, with some; words");
StringSet words2 = break_string("There is another string,");
cout << words.binary_coefficient(words2);
return 0;
}
I get an index which is 1.5116 rather than a value from 0 to 1.
Does anybody have a clue why this is the case?
Any help would be appreciated.

You need more parentheses in the final calculation. a / b * c is parsed as (a / b) * c, but you want a / (b * c).

Maybe it's just a precedence matter
coefficient = intersection.no_of_strings() / sqrt((double) no_of_strings()) * sqrt((double)the_second_set.no_of_strings());
doesn't specify that you have to first multiply, then divide. Their precedence is the same but I'm not sure about choosen behaviour.. did you try specifying it:
coefficient = intersection.no_of_strings() / (sqrt((double) no_of_strings()) * sqrt((double)the_second_set.no_of_strings()));

We Keep Coding

c++ django amazon-web-services regex python-2.7 google-cloud-platform list unit-testing opengl ember.js

C++ Find and save duplicates in vector - c++

Related

Creating std::set copies only one element, how to fix this?

Find the largest / smallest key in a multimap of pairs <Class object, enum>, using multimap algorithms (std::minmax_element)?

Merge two std::sets

Iterating over mmaped gzip file with boost

Binary Cosine Cofficient

Categories

Resources