I have the following class declaration:
class Dictionnary{
private:
map< int,list<string> > data;
public:
bool isPrime();
class prime_iterator{
private:
map< int,list<string> >::iterator it;
public:
iterator(){}
prime_iterator & operator++(){
++it;
while(it != data.end() && !isPrime(it->first)){
++it;
}
return it;
}
...
};
which is intended to provide an iterator over prime keys of a map<int,list<string>>. I'm not sure operator++ is well implemented.
First, is it a good design to do it != data.end()accessing the outer class? Second, is operator++ returning the right thing or should return only prime_iterator?. Also, can you think about any better solution?
My advice is to always try to find solution elsewhere - if not found - then only create by yourself:
You can use boost::fiter_iterator
For your case it would look in this way:
#include <boost/iterator/filter_iterator.hpp>
using DataMap = std::map<int, std::list<std::string>>;
struct is_prime_number {
bool operator()(const DataMap::value_type& x) { return x.first % 2 == 0; }
}; // I know this is just is_even - not is_prime :D
using DataMapPrimeIter = boost::filter_iterator<is_prime_number, DataMap::iterator>;
inline DataMapPrimeIter only_prime_begin(DataMap& dataMap)
{
return boost::make_filter_iterator<is_prime_number>(dataMap.begin(), dataMap.end());
}
inline DataMapPrimeIter only_prime_end(DataMap& dataMap)
{
return boost::make_filter_iterator<is_prime_number>(dataMap.end(), dataMap.end());
}
And usage:
int main()
{
DataMap dataMap{{1,{"A","B"}}, {2,{"C", "D", "E"}}};
for (auto i = only_prime_begin(dataMap), end = only_prime_end(dataMap); i != end; ++i)
{
std::cout << i->first << i->second.front() << std::endl;
}
}
If you want to have your own implementation, or you cannot use boost in your project - then look at boost implementation - it is for free to look at...
My humble solution. The typedef is just for convenience.
#include <iterator>
#include <iostream>
#include <map>
#include <string>
#include <vector>
#include <algorithm>
typedef std::map<int, std::string> map_t;
class Dictionary {
private:
map_t& m_map;
public:
class prime_iterator {
public:
prime_iterator( map_t::iterator begin, map_t::iterator end )
: m_current(begin), m_end(end) {
runUntilPrime();
}
prime_iterator& operator++() {
m_current++;
runUntilPrime();
return *this;
}
bool operator != (prime_iterator other) {
return other.m_current != m_current;
}
map_t::value_type& operator* () {
return *m_current;
}
private:
map_t::iterator m_current;
map_t::iterator m_end;
bool isPrime(int x) {
std::vector<int> primes = { 1, 2, 3, 5, 7, 11, 13, 17 };
return std::find( primes.begin(), primes.end(), x ) != primes.end();
}
void runUntilPrime() {
while( m_current != m_end && !isPrime(m_current->first) ) {
m_current++;
}
}
};
Dictionary( map_t& tmap )
: m_map(tmap) {}
prime_iterator begin() {
return prime_iterator( m_map.begin(), m_map.end() );
}
prime_iterator end () {
return prime_iterator(m_map.end(), m_map.end());
}
};
int main( int argc, char** argv ) {
map_t map;
map.emplace(0, "zero");
map.emplace(1, "one");
map.emplace(2, "two");
map.emplace(3, "three");
map.emplace(4, "four");
map.emplace(5, "five");
map.emplace(6, "six");
map.emplace(7, "seven");
map.emplace(8, "eight");
map.emplace(9, "nine");
map.emplace(10, "ten");
map.emplace(13, "thirteen");
Dictionary dict( map );
for( auto p : dict ) {
std::cout << p.first << "\t" << p.second << std::endl;
}
return 0;
}
Related
v_map has the correct amount of information stored, however when i try to use std::set it only copies one element ,I assume the first one. This is my first time using std::set , maybe I miss something here...Thanks for your help !
typedef std::map<std::string,std::pair<int,int>> points_map;
void list_average(points_map &v_map)
{
Comparator compFunctor = [](std::pair<std::string,std::pair<int,int>> elem1,std::pair<std::string,std::pair<int,int>> elem2)
{
std::pair<int,int> it = elem1.second;
std::pair<int,int> jt = elem2.second;
return it.first < jt.first;
};
std::set<std::pair<std::string,std::pair<int,int>>,Comparator> v_set(v_map.begin(),v_map.end(),compFunctor);
for (std::pair<std::string,std::pair<int,int>> it : v_set)
{
std::pair<int,int> jt = it.second;
std::cout << it.first << " " << (jt.second - jt.first) / jt.first<< std::endl;
}
}
Note the following is the full program, I apologize in advance for the ugly code , and length of the code ,also I rewrote the name in the upper part of my code, in the full code , this particular function is called list_atlag
#include <iostream>
#include <string>
#include <map>
#include <set>
#include <vector>
#include <codecvt>
#include <iterator>
#include <numeric>
#include <functional>
#include <boost/filesystem.hpp>
#include <boost/foreach.hpp>
#include <boost/program_options.hpp>
#include <boost/tokenizer.hpp>
class Adatok
{
public:
Adatok(std::string name, std::string path, std::string date, int points) : _name(name), _path(path), _date(date), _points(points) {}
Adatok(const Adatok &other) = default;
Adatok &operator=(const Adatok &other) = default;
std::string get_name() { return _name; }
std::string get_path() { return _path; }
std::string get_date() { return _date; }
int get_points() { return _points; }
private:
std::string _name;
std::string _path;
std::string _date;
int _points;
};
class Ranglista
{
public:
Ranglista(std::string name, int points) : _name(name), _points(points) {}
Ranglista(const Ranglista &other) = default;
Ranglista &operator=(const Ranglista &other) = default;
std::string get_name() { return _name; }
int get_points() { return _points; }
bool operator<(const Ranglista &other)
{
return _points > other._points;
}
private:
std::string _name;
int _points;
};
class Vedes
{
public:
Vedes(std::string name, int point) : _name(name), _point(point) { _count++; }
Vedes(const Vedes &other) = default;
Vedes &operator=(const Vedes &other) = default;
std::string get_name() { return _name; }
int get_point() { return _point; }
int get_count() { return _count; }
void set_stuff(int &points)
{
_point += points;
_count++;
}
bool operator<(const Vedes &other)
{
return _count > other._count;
}
private:
std::string _name;
int _point;
int _count = 0;
};
typedef std::map<std::string, int> path_value; //minden path + az erteke
typedef std::vector<Adatok> name_path_date; //bejegyzesek
typedef std::vector<Ranglista> ranglista; //ranglista
typedef std::map<std::string,std::pair<int,int>> vedes_vec; //vedesek
typedef std::function<bool(std::pair<std::string,std::pair<int,int>>,std::pair<std::string,std::pair<int,int>>)> Comparator;
void create_pv(path_value &, boost::filesystem::path); //feltolti a path+ertek map-ot
void create_npd(name_path_date &, path_value &, std::string input); //feltolti a bejegyzesek vektorat + mindenki pontszama map
void create_np(name_path_date &, path_value &); // name + path map
void list_np(path_value &name_point); // nam + path kiiratas
void list_bejegyzesek(name_path_date &bejegyzesek); // bejegyzesek vektora kiiratas
bool check_bejegyzesek(name_path_date &bejegyzesek, std::string name, std::string path); //van-e mar ilyen bejegyzes
void create_rl(ranglista &rl_vec, path_value &name_point); //ranglista feltoltes
void list_rl(ranglista &rl_vec); //ranglista kiiratas
void vedes_atlag(name_path_date &bejegyzesek, vedes_vec &v_vec); //vedes atlag map
void list_atlag(vedes_vec &v_vec); //vedes atlag kiiratas
bool check_vedes(vedes_vec &v_vec, std::string name);
void vedes_elem(vedes_vec &v_vec, std::string name, int &&points); //
//void accumulate_pv(path_value&);
int main(int argc, char **argv)
{
std::vector<std::string> roots = {"City/Debrecen/Oktatás/Informatika/Programozás/DEIK/Prog1/", "City/Debrecen/Oktatás/Informatika/Programozás/DEIK/"};
std::string input_file_name = "db-2018-05-06.csv";
/* OPTIONS */
boost::program_options::options_description desc("ALLOWED OPTIONS");
desc.add_options()("help", "help msg")("root,r", boost::program_options::value<std::vector<std::string>>())("csv", boost::program_options::value<std::string>(), "comma separated values")("rank", "rang lista")("vedes", "labor vedesek");
boost::program_options::positional_options_description pdesc;
pdesc.add("root", -1);
boost::program_options::variables_map vm;
boost::program_options::store(boost::program_options::command_line_parser(argc, argv).options(desc).positional(pdesc).run(), vm);
boost::program_options::notify(vm);
int sum = 0;
path_value pv_map;
if (vm.count("help") || argc == 1)
{
std::cout << desc << std::endl;
return 1;
}
if (vm.count("root"))
{
roots = vm["root"].as<std::vector<std::string>>();
for (auto &i : roots)
{
boost::filesystem::path path(i);
create_pv(pv_map, path);
}
for (path_value::iterator it{pv_map.begin()}; it != pv_map.end(); it++)
sum += it->second;
//std::cout << sum << std::endl;create_npd
std::cout << std::accumulate(pv_map.begin(), pv_map.end(), 0, [](int value, const std::map<std::string, int>::value_type &p) { return value + p.second; });
std::cout << std::endl;
}
if (vm.count("csv"))
{
//input_file_name = vm["csv"].as<std::string>();
std::ifstream input_file{vm["csv"].as<std::string>()};
name_path_date bejegyzesek;
std::string temp;
path_value name_point;
while (getline(input_file, temp))
create_npd(bejegyzesek, pv_map, temp);
create_np(bejegyzesek, name_point);
//list_bejegyzesek(bejegyzesek);
//list_np(name_point);
if (vm.count("rank"))
{
ranglista rl_vec;
create_rl(rl_vec, name_point);
list_rl(rl_vec);
}
if (vm.count("vedes"))
{
vedes_vec v_vec;
vedes_atlag(bejegyzesek, v_vec);
list_atlag(v_vec);
}
return 0;
}
return 0;
}
void create_pv(path_value &pv_map, boost::filesystem::path path)
{
boost::filesystem::directory_iterator it{path}, eod;
BOOST_FOREACH (boost::filesystem::path const &p, std::make_pair(it, eod))
{
if (boost::filesystem::is_regular_file(p))
{
boost::filesystem::ifstream regular_file{p};
std::string temp;
int sum = 0; //aktualis .props erteke
while (getline(regular_file, temp))
{
temp.erase(0, temp.find_last_of('/'));
temp.erase(0, temp.find_first_of(' '));
sum += std::atoi((temp.substr(temp.find_first_of("0123456789"), temp.find_last_of("0123456789"))).c_str());
}
std::string result = p.string();
std::string result_path = result.substr(0, result.find_last_of('/'));
//std::cout << result_path << std::endl;
//pv_map.insert(std::make_pair(result, sum));
pv_map[result_path] = sum;
}
else
create_pv(pv_map, p);
}
}
//void accumulate_pv(path_value& pv_map)
//{
// std::cout<<std::accumulate(pv_map.begin(),pv_map.end(),0,[](int value,const path_value::int& p){return value+p.second;});
//}
void create_npd(name_path_date &bejegyzesek, path_value &pv_map, std::string input)
{
boost::tokenizer<boost::escaped_list_separator<char>> tokenizer{input};
boost::tokenizer<boost::escaped_list_separator<char>>::iterator it{tokenizer.begin()};
std::string name = *it;
std::string path = *(++it);
std::string date = *(++it);
path = path.substr(2);
if (!check_bejegyzesek(bejegyzesek, name, path))
bejegyzesek.push_back(Adatok(name, path, date, pv_map["/home/erik/Documents/Programs/"+path]));
}
bool check_bejegyzesek(name_path_date &bejegyzesek, std::string name, std::string path)
{
bool ok = false;
for (name_path_date::iterator it{bejegyzesek.begin()}; it != bejegyzesek.end(); it++)
{
if ((it->get_name() == name) && (it->get_path() == path))
ok = true;
}
return ok;
}
bool check_vedes(vedes_vec &v_vec, std::string name)
{
vedes_vec::iterator it = v_vec.find(name);
if (it != v_vec.end()) return true;
else return false;
}
void vedes_elem(vedes_vec &v_vec, std::string name, int &&points)
{
/*for (auto &it : v_vec)
if (it.get_name() == name)
it.set_stuff(points);
*/
vedes_vec::iterator i = v_vec.find(name);
std::pair<int,int> it = i->second;
//auto& jt = it->second;
it.first++;
it.second += points;
}
void create_np(name_path_date &bejegyzesek, path_value &name_point)
{
for (name_path_date::iterator it{bejegyzesek.begin()}; it != bejegyzesek.end(); it++)
if (name_point.count(it->get_name()) == 0)
name_point.insert(std::make_pair(it->get_name(), it->get_points()));
else
name_point[it->get_name()] += it->get_points();
}
void list_np(path_value &name_point)
{
for (path_value::iterator it{name_point.begin()}; it != name_point.end(); it++)
{
if (it->second)
std::cout << it->first << " " << it->second << std::endl;
}
}
void list_bejegyzesek(name_path_date &bejegyzesek)
{
for (name_path_date::iterator it{bejegyzesek.begin()}; it != bejegyzesek.end(); it++)
if (it->get_name() == "Varga Erik")
std::cout << it->get_name() << " " << it->get_path() << " " << it->get_points() << std::endl;
}
void create_rl(ranglista &rl_vec, path_value &name_point)
{
for (auto &it : name_point)
{
if (it.second > 0)
rl_vec.push_back(Ranglista(it.first, it.second));
}
std::sort(rl_vec.begin(), rl_vec.end());
}
void list_rl(ranglista &rl_vec)
{
for (auto &it : rl_vec)
std::cout << it.get_name() << " " << it.get_points() << std::endl;
}
void vedes_atlag(name_path_date &bejegyzesek, vedes_vec &v_vec)
{
std::string key = "City/Debrecen/Oktatás/Informatika/Programozás/DEIK/Prog1/Labor/Védés/";
for (auto &it : bejegyzesek)
{
if ((it.get_path().find("City/Debrecen/Oktatás/Informatika/Programozás/DEIK/Prog1/Labor/Védés/") != std::string::npos) && (it.get_points()) && (!check_vedes(v_vec, it.get_name())))
v_vec.insert(std::make_pair(it.get_name(),std::make_pair(1,it.get_points())));
else if ((check_vedes(v_vec, it.get_name())) && (it.get_path().find("City/Debrecen/Oktatás/Informatika/Programozás/DEIK/Prog1/Labor/Védés/") != std::string::npos) && (it.get_points()))
vedes_elem(v_vec, it.get_name(), it.get_points());
}
}
void list_atlag(vedes_vec &v_vec)
{
//std::sort(v_vec.begin(), v_vec.end());
Comparator compFunctor = [](std::pair<std::string,std::pair<int,int>> elem1,std::pair<std::string,std::pair<int,int>> elem2)
{
std::pair<int,int> it = elem1.second;
std::pair<int,int> jt = elem2.second;
return it.first < jt.first;
};
std::set<std::pair<std::string,std::pair<int,int>>,Comparator> v_set(v_vec.begin(),v_vec.end(),compFunctor);
//int sum = 0;
//int csum = 0;
for (std::pair<std::string,std::pair<int,int>> it : v_set)
{
std::pair<int,int> jt = it.second;
std::cout << it.first << " " << (jt.second - jt.first) / jt.first<< std::endl;
//sum += it.get_point();
//csum += it.get_count();
//sum = std::accumulate(v_vec.begin(), v_vec.end(), 0, [](int i, Vedes &o) { return i + o.get_point(); });
//csum = std::accumulate(v_vec.begin(), v_vec.end(), 0, [](int i, Vedes &o) { return i + o.get_count(); });
}
//std::cout << (sum - csum) / csum << std::endl;
}
so, as described here
template<
class Key,
class Compare = std::less<Key>,
class Allocator = std::allocator<Key>
> class set;
std::set is an associative container that contains a sorted set of unique objects of type Key.
I cleaned up your code, and made a Minimal, Complete, and Verifiable example,
#include <iostream>
#include <map>
#include <set>
using point_pair = std::pair<int,int>;
using points_map = std::map<std::string, point_pair>;
using points_set_pair = std::pair<std::string, point_pair>;
auto compFunctor = [](const points_set_pair &elem1, const points_set_pair &elem2)
{
return elem1.second.first < elem2.second.first;
};
using points_set = std::set<points_set_pair, decltype(compFunctor)>;
void list_average(const points_map &v_map)
{
points_set v_set(v_map.begin(),v_map.end(),compFunctor);
for (auto &elem : v_set)
{
const point_pair &jt = elem.second;
std::cout << elem.first << " " << (jt.second - jt.first) / jt.first<< "\n";
}
}
Now consider the first version of main
int main()
{
points_map v_map = { {"foo", { 1, 2}}, {"bar", { 3, 4}}};
list_average(v_map);
}
output:
foo 1
bar 0
Now consider the second version of main:
int main()
{
points_map v_map = { {"foo", { 1, 2}}, {"bar", { 1, 4}}};
list_average(v_map);
}
output:
bar 3
See the problem? As .second.first of the elements are both 1, the latter replaces the first. It is not unique. That's the downside of std::set.
So, what then?
Don't use std::set, but use std::vector and std::sort. Example:
#include <iostream>
#include <map>
#include <vector>
#include <algorithm>
using point_pair = std::pair<int,int>;
using points_map = std::map<std::string, point_pair>;
using string_point_pair = std::pair<std::string, point_pair>;
auto compFunctor = [](string_point_pair const &elem1, string_point_pair const &elem2)
{
return
elem1.second.first != elem2.second.first?
elem1.second.first < elem2.second.first:
elem1.second.second < elem2.second.second;
};
void list_average(points_map const &v_map)
{
std::vector<string_point_pair> v_vec(v_map.begin(),v_map.end());
std::sort(v_vec.begin(), v_vec.end(), compFunctor);
for (auto &elem : v_vec)
{
const point_pair &jt = elem.second;
std::cout << elem.first << " " << (jt.second - jt.first) / jt.first<< "\n";
}
}
int main()
{
points_map v_map = { {"foo", { 1, 2}}, {"bar", { 1, 4}}, {"baz", { 2, 4}}};
list_average(v_map);
}
Output:
foo 1
bar 3
baz 1
live demo
I implemented my first custom iterator, which is supposed to return an std::pair at each iteration. The only problem, is it loops infinitely and I do not know how to provide stop condition. The code looks like so:
#include <iostream>
#include <vector>
#include <utility>
class Simple
{
private:
std::vector<std::size_t> indices;
std::vector<int> values;
public:
void insert(std::size_t index, int value)
{
indices.push_back(index);
values.push_back(value);
}
int at(std::size_t index)
{
return values[indices[index]];
}
class Iterator
{
private:
const std::vector<std::size_t>* indices;
const std::vector<int>* values;
std::size_t pos = 0;
public:
Iterator(const std::vector<std::size_t>* indices_, const std::vector<int>* values_, const std::size_t &pos_ = 0):
values(values_), indices(indices_), pos(pos_){ }
bool operator==(const Iterator& other){
return this == &other;
}
bool operator!=(const Iterator& other){
return !operator==(other);
}
Iterator operator++() {
pos++;
Iterator i = *this;
return i;
}
std::pair<std::size_t, int> operator*()
{
if (pos < (*values).size())
{
return std::make_pair((*indices)[pos], (*values)[pos]);
}
std::cout << "EMPTY PAIR" << std::endl; //loops infinitely and prints this message
return std::pair<std::size_t,int>{};
}
std::pair<std::size_t, int>* operator->()
{
if (pos < (*values).size())
{
std::pair<std::size_t,int> *p;
*p = std::make_pair((*indices)[pos], (*values)[pos]);
return p;
}
return nullptr;
}
};
Iterator begin() const
{
return Iterator(&indices, &values, 0);
}
Iterator end() const
{
return Iterator(&indices, &values, values.size());
}
};
int main() {
Simple s;
s.insert(10, 100);
std::cout << s.at(10) << std::endl;
int i = 0;
for (const std::pair<std::size_t, int> &p : s)
{
std::cout << p.first << " " << p.second << std::endl;
if (i > 3) break; // otherwise it will loop infinitely
i++;
}
return 0;
}
I would like to stress, that this example is simplified and for demonstration purposes, so you may not ask me, why I'm not using map or unorded_map (just because what I'm implementing looks like a map). The question is solely about iterator and I need understanding what may be wrong with iterator itself and how to provide stop condition.
How to implement an iterator of just on values of a map/unordered_map using boost::iterator_adaptor? I've tried following code but it does not work because of the line with comment.
Is there a solution to avoid the problem?
The question here is slightly different from map_values adapter example shown in boost code as here the value field in map is another container like list or vector and the requirement here is to iterate over all elements of those lists for every key of the map.
The deref of iterator is of type of value_type of those list/vector.The end of iterator is the end of list of last key
#include <vector>
#include <boost/unordered_map.hpp>
#include <cassert>
#include <iostream>
#include <boost/iterator/iterator_adaptor.hpp>
class DS {
public:
DS() : _map() {}
~DS() {
for (Map::iterator it = _map.begin(); it != _map.end(); ++it) {
delete (it->second);
}
}
void add(int key_, const std::vector< int > &value_)
{
IntList *ptr = new IntList(value_);
assert(ptr);
_map.insert(Map::value_type(key_, ptr));
}
private:
typedef std::vector< int > IntList;
typedef boost::unordered_map< int, IntList* > Map;
Map _map;
public:
class KeyIter : public boost::iterator_adaptor< KeyIter,
Map::const_iterator,
int,
boost::forward_traversal_tag,
int>
{
public:
KeyIter() : KeyIter::iterator_adaptor_() {}
private:
friend class DS;
friend class boost::iterator_core_access;
explicit KeyIter(Map::const_iterator it) : KeyIter::iterator_adaptor_(it) {}
explicit KeyIter(Map::iterator it) : KeyIter::iterator_adaptor_(it) {}
int dereference() const { return this->base()->first; }
};
class ValueIter : public boost::iterator_adaptor< ValueIter,
Map::const_iterator,
int,
boost::forward_traversal_tag,
int>
{
public:
ValueIter()
: ValueIter::iterator_adaptor_()
, _lIt()
{}
private:
friend class DS;
friend class boost::iterator_core_access;
explicit ValueIter(Map::const_iterator it)
: ValueIter::iterator_adaptor_(it)
, _lIt()
, _mIt(it)
{
IntList *pt = it->second; // <<-- issue here is I can't find if I've already reached the end of the map
if (pt) {
_lIt = it->second->begin();
}
}
int dereference() const { return *_lIt; }
void increment()
{
if (_lIt == _mIt->second->end()) {
++_mIt;
_lIt = _mIt->second->begin();
} else {
++_lIt;
}
}
IntList::iterator _lIt;
Map::const_iterator _mIt;
};
KeyIter beginKey() const { return KeyIter(_map.begin()); }
KeyIter endKey() const { return KeyIter(_map.end()); }
ValueIter beginValue() const { return ValueIter(_map.begin()); }
ValueIter endValue() const { return ValueIter(_map.end()); }
};
int main(int argc, char** argv)
{
DS ds;
std::vector< int > v1;
v1.push_back(10);
v1.push_back(30);
v1.push_back(50);
ds.add(90, v1);
std::vector< int > v2;
v2.push_back(20);
v2.push_back(40);
v2.push_back(60);
ds.add(120, v2);
std::cout << "------------ keys ---------------" << std::endl;
for (DS::KeyIter it = ds.beginKey(); it != ds.endKey(); ++it) {
std::cout << (*it) << std::endl;
}
std::cout << "------------ values ---------------" << std::endl;
// std::cout << (*(ds.beginValue())) << std::endl;
for (DS::ValueIter it = ds.beginValue(); it != ds.endValue(); ++it) {
std::cout << (*it) << std::endl;
}
return 0;
}
Implemented in c++11. You should be able to do the conversion to boost/c++03 fairly simply.
This iterator is FORWARD ONLY and it's quite fragile (see the comparison operator).
user discretion advised.
#include <iostream>
#include <vector>
#include <unordered_map>
typedef std::vector< int > IntList;
typedef std::unordered_map< int, IntList* > Map;
struct whole_map_const_iterator
{
using C1 = IntList;
using C2 = Map;
using I1 = C1::const_iterator;
using I2 = C2::const_iterator;
using value_type = I1::value_type;
using reference = I1::reference;
whole_map_const_iterator(I2 i2) : _i2(i2) {}
bool operator==(const whole_map_const_iterator& r) const {
if (_i2 != r._i2)
return false;
if (deferred_i1 && r.deferred_i1)
return true;
if (deferred_i1 != r.deferred_i1)
return false;
return _i1 == r._i1;
}
bool operator!=(const whole_map_const_iterator& r) const { return !(*this == r); }
reference operator*() const {
check_deferred();
return *_i1;
}
void check_deferred() const {
if (deferred_i1) {
_i1 = _i2->second->begin();
_i1limit = _i2->second->end();
deferred_i1 = false;
}
}
void go_next()
{
check_deferred();
if (++_i1 == _i1limit) {
++_i2;
deferred_i1 = true;
}
}
whole_map_const_iterator& operator++() {
go_next();
return *this;
}
whole_map_const_iterator operator++(int) {
auto result = *this;
go_next();
return result;
}
I2 _i2;
mutable I1 _i1 = {}, _i1limit = {};
mutable bool deferred_i1 = true;
};
IntList a { 1, 2, 3, 4, 5 };
IntList b { 6, 7, 8, 9, 10 };
Map m { { 1, &a }, { 2, &b } };
int main()
{
using namespace std;
auto from = whole_map_const_iterator(m.begin());
auto to = whole_map_const_iterator(m.end());
for ( ; from != to ; ++from) {
std::cout << *from << std::endl;
}
return 0;
}
example output:
6
7
8
9
10
1
2
3
4
5
For bonus points, answer this question:
Q: Why all that damn complication over the deferred flag?
Are there any C++ transformations which are similar to itertools.groupby()?
Of course I could easily write my own, but I'd prefer to leverage the idiomatic behavior or compose one from the features provided by the STL or boost.
#include <cstdlib>
#include <map>
#include <algorithm>
#include <string>
#include <vector>
struct foo
{
int x;
std::string y;
float z;
};
bool lt_by_x(const foo &a, const foo &b)
{
return a.x < b.x;
}
void list_by_x(const std::vector<foo> &foos, std::map<int, std::vector<foo> > &foos_by_x)
{
/* ideas..? */
}
int main(int argc, const char *argv[])
{
std::vector<foo> foos;
std::map<int, std::vector<foo> > foos_by_x;
std::vector<foo> sorted_foos;
std::sort(foos.begin(), foos.end(), lt_by_x);
list_by_x(sorted_foos, foos_by_x);
return EXIT_SUCCESS;
}
This doesn't really answer your question, but for the fun of it, I implemented a group_by iterator. Maybe someone will find it useful:
#include <assert.h>
#include <iostream>
#include <set>
#include <sstream>
#include <string>
#include <vector>
using std::cout;
using std::cerr;
using std::multiset;
using std::ostringstream;
using std::pair;
using std::vector;
struct Foo
{
int x;
std::string y;
float z;
};
struct FooX {
typedef int value_type;
value_type operator()(const Foo &f) const { return f.x; }
};
template <typename Iterator,typename KeyFunc>
struct GroupBy {
typedef typename KeyFunc::value_type KeyValue;
struct Range {
Range(Iterator begin,Iterator end)
: iter_pair(begin,end)
{
}
Iterator begin() const { return iter_pair.first; }
Iterator end() const { return iter_pair.second; }
private:
pair<Iterator,Iterator> iter_pair;
};
struct Group {
KeyValue value;
Range range;
Group(KeyValue value,Range range)
: value(value), range(range)
{
}
};
struct GroupIterator {
typedef Group value_type;
GroupIterator(Iterator iter,Iterator end,KeyFunc key_func)
: range_begin(iter), range_end(iter), end(end), key_func(key_func)
{
advance_range_end();
}
bool operator==(const GroupIterator &that) const
{
return range_begin==that.range_begin;
}
bool operator!=(const GroupIterator &that) const
{
return !(*this==that);
}
GroupIterator operator++()
{
range_begin = range_end;
advance_range_end();
return *this;
}
value_type operator*() const
{
return value_type(key_func(*range_begin),Range(range_begin,range_end));
}
private:
void advance_range_end()
{
if (range_end!=end) {
typename KeyFunc::value_type value = key_func(*range_end++);
while (range_end!=end && key_func(*range_end)==value) {
++range_end;
}
}
}
Iterator range_begin;
Iterator range_end;
Iterator end;
KeyFunc key_func;
};
GroupBy(Iterator begin_iter,Iterator end_iter,KeyFunc key_func)
: begin_iter(begin_iter),
end_iter(end_iter),
key_func(key_func)
{
}
GroupIterator begin() { return GroupIterator(begin_iter,end_iter,key_func); }
GroupIterator end() { return GroupIterator(end_iter,end_iter,key_func); }
private:
Iterator begin_iter;
Iterator end_iter;
KeyFunc key_func;
};
template <typename Iterator,typename KeyFunc>
inline GroupBy<Iterator,KeyFunc>
group_by(
Iterator begin,
Iterator end,
const KeyFunc &key_func = KeyFunc()
)
{
return GroupBy<Iterator,KeyFunc>(begin,end,key_func);
}
static void test()
{
vector<Foo> foos;
foos.push_back({5,"bill",2.1});
foos.push_back({5,"rick",3.7});
foos.push_back({3,"tom",2.5});
foos.push_back({7,"joe",3.4});
foos.push_back({5,"bob",7.2});
ostringstream out;
for (auto group : group_by(foos.begin(),foos.end(),FooX())) {
out << group.value << ":";
for (auto elem : group.range) {
out << " " << elem.y;
}
out << "\n";
}
assert(out.str()==
"5: bill rick\n"
"3: tom\n"
"7: joe\n"
"5: bob\n"
);
}
int main(int argc,char **argv)
{
test();
return 0;
}
Eric Niebler's ranges library provides a group_by view.
according to the docs it is a header only library and can be included easily.
It's supposed to go into the standard C++ space, but can be used with a recent C++11 compiler.
minimal working example:
#include <map>
#include <vector>
#include <range/v3/all.hpp>
using namespace std;
using namespace ranges;
int main(int argc, char **argv) {
vector<int> l { 0,1,2,3,6,5,4,7,8,9 };
ranges::v3::sort(l);
auto x = l | view::group_by([](int x, int y) { return x / 5 == y / 5; });
map<int, vector<int>> res;
auto i = x.begin();
auto e = x.end();
for (;i != e; ++i) {
auto first = *((*i).begin());
res[first / 5] = to_vector(*i);
}
// res = { 0 : [0,1,2,3,4], 1: [5,6,7,8,9] }
}
(I compiled this with clang 3.9.0. and --std=c++11)
I recently discovered cppitertools.
It fulfills this need exactly as described.
https://github.com/ryanhaining/cppitertools#groupby
What is the point of bloating standard C++ library with an algorithm that is one line of code?
for (const auto & foo : foos) foos_by_x[foo.x].push_back(foo);
Also, take a look at std::multimap, it might be just what you need.
UPDATE:
The one-liner I have provided is not well-optimized for the case when your vector is already sorted. A number of map lookups can be reduced if we remember the iterator of previously inserted object, so it the "key" of the next object and do a lookup only when the key is changing. For example:
#include <map>
#include <vector>
#include <string>
#include <algorithm>
#include <iostream>
struct foo {
int x;
std::string y;
float z;
};
class optimized_inserter {
public:
typedef std::map<int, std::vector<foo> > map_type;
optimized_inserter(map_type & map) : map(&map), it(map.end()) {}
void operator()(const foo & obj) {
typedef map_type::value_type value_type;
if (it != map->end() && last_x == obj.x) {
it->second.push_back(obj);
return;
}
last_x = obj.x;
it = map->insert(value_type(obj.x, std::vector<foo>({ obj }))).first;
}
private:
map_type *map;
map_type::iterator it;
int last_x;
};
int main()
{
std::vector<foo> foos;
std::map<int, std::vector<foo>> foos_by_x;
foos.push_back({ 1, "one", 1.0 });
foos.push_back({ 3, "third", 2.5 });
foos.push_back({ 1, "one.. but third", 1.5 });
foos.push_back({ 2, "second", 1.8 });
foos.push_back({ 1, "one.. but second", 1.5 });
std::sort(foos.begin(), foos.end(), [](const foo & lhs, const foo & rhs) {
return lhs.x < rhs.x;
});
std::for_each(foos.begin(), foos.end(), optimized_inserter(foos_by_x));
for (const auto & p : foos_by_x) {
std::cout << "--- " << p.first << "---\n";
for (auto & f : p.second) {
std::cout << '\t' << f.x << " '" << f.y << "' / " << f.z << '\n';
}
}
}
How about this?
template <typename StructType, typename FieldSelectorUnaryFn>
auto GroupBy(const std::vector<StructType>& instances, const FieldSelectorUnaryFn& fieldChooser)
{
StructType _;
using FieldType = decltype(fieldChooser(_));
std::map<FieldType, std::vector<StructType>> instancesByField;
for (auto& instance : instances)
{
instancesByField[fieldChooser(instance)].push_back(instance);
}
return instancesByField;
}
and use it like this:
auto itemsByX = GroupBy(items, [](const auto& item){ return item.x; });
I wrote a C++ library to address this problem in an elegant way. Given your struct
struct foo
{
int x;
std::string y;
float z;
};
To group by y you simply do:
std::vector<foo> dataframe;
...
auto groups = group_by(dataframe, &foo::y);
You can also group by more than one variable:
auto groups = group_by(dataframe, &foo::y, &foo::x);
And then iterate through the groups normally:
for(auto& [key, group]: groups)
{
// do something
}
It also has other operations such as: subset, concat, and others.
I would simply use boolinq.h, which includes all of LINQ. No documentation, but very simple to use.
I've modified James' flattening iterator to act as a bidirectional iterator if possible, but I don't think my changes are very elegant (particularly relying on a bool to see if the inner iterator has been set). However, I can't seem to come up with a nicer solution. Does anyone have any ideas?
#include <algorithm>
#include <iostream>
#include <set>
#include <vector>
#include <iterator>
#include <type_traits>
// An iterator that "flattens" a container of containers. For example,
// a vector<vector<int>> containing { { 1, 2, 3 }, { 4, 5, 6 } } is iterated as
// a single range, { 1, 2, 3, 4, 5, 6 }.
template <typename OuterIterator>
class flattening_iterator
{
public:
typedef OuterIterator outer_iterator;
typedef typename std::iterator_traits<outer_iterator>::value_type::iterator inner_iterator;
typedef typename std::iterator_traits<outer_iterator>::iterator_category outer_category;
typedef typename std::iterator_traits<inner_iterator>::iterator_category inner_category;
typedef typename std::common_type<outer_category, inner_category>::type common_category;
typedef typename std::conditional<std::is_same<common_category, std::random_access_iterator_tag>::value,
std::bidirectional_iterator_tag,
common_category>::type iterator_category;
typedef typename std::iterator_traits<inner_iterator>::value_type value_type;
typedef typename std::iterator_traits<inner_iterator>::difference_type difference_type;
typedef typename std::iterator_traits<inner_iterator>::pointer pointer;
typedef typename std::iterator_traits<inner_iterator>::reference reference;
flattening_iterator() { }
flattening_iterator(outer_iterator it, outer_iterator begin, outer_iterator end)
: outer_it_(it),
outer_begin_(begin),
outer_end_(end),
inner_it_assigned_(false)
{
if (outer_begin_ == outer_end_) { return; }
if (outer_it_ == outer_end_) { return; }
inner_it_ = outer_it_->begin();
inner_it_assigned_ = true;
advance_past_empty_inner_containers();
}
reference operator*() const { return *inner_it_; }
pointer operator->() const { return &*inner_it_; }
flattening_iterator& operator++()
{
++inner_it_;
if (inner_it_ == outer_it_->end())
advance_past_empty_inner_containers();
return *this;
}
flattening_iterator operator++(int)
{
flattening_iterator it(*this);
++*this;
return it;
}
flattening_iterator& operator--()
{
if(!inner_it_assigned_)
{
if(outer_begin_ != outer_end_)
{
decrement_through_empty_inner_containers();
}
return *this;
}
if(inner_it_ == outer_it_->begin())
{
decrement_through_empty_inner_containers();
}
else
{
--inner_it_;
}
return *this;
}
flattening_iterator operator--(int)
{
flattening_iterator it(*this);
--*this;
return it;
}
friend bool operator==(const flattening_iterator& a,
const flattening_iterator& b)
{
if (a.outer_it_ != b.outer_it_)
return false;
if(a.outer_it_ != a.outer_end_ &&
b.outer_it_ != b.outer_end_ &&
a.inner_it_assigned_ == false &&
b.inner_it_assigned_ == false)
return true;
if (a.outer_it_ != a.outer_end_ &&
b.outer_it_ != b.outer_end_ &&
a.inner_it_ != b.inner_it_)
return false;
return true;
}
friend bool operator!=(const flattening_iterator& a,
const flattening_iterator& b)
{
return !(a == b);
}
private:
void advance_past_empty_inner_containers()
{
while (outer_it_ != outer_end_ && inner_it_ == outer_it_->end())
{
++outer_it_;
if (outer_it_ != outer_end_)
inner_it_ = outer_it_->begin();
}
}
void decrement_through_empty_inner_containers()
{
--outer_it_;
while(outer_it_ != outer_begin_ && outer_it_->begin() == outer_it_->end())
{
--outer_it_;
}
if(outer_it_->begin() != outer_it_->end())
{
inner_it_ = --outer_it_->end();
inner_it_assigned_ = true;
}
}
outer_iterator outer_it_;
outer_iterator outer_begin_;
outer_iterator outer_end_;
inner_iterator inner_it_;
bool inner_it_assigned_;
};
template <typename Iterator>
flattening_iterator<Iterator> flatten(Iterator start, Iterator first, Iterator last)
{
return flattening_iterator<Iterator>(start, first, last);
}
template <typename Iterator>
std::reverse_iterator<flattening_iterator<Iterator>> flatten_reverse(Iterator start, Iterator first, Iterator last)
{
return std::reverse_iterator<flattening_iterator<Iterator>>(flatten(start, first, last));
}
int main()
{
std::vector<std::vector<int>> v(3);
int i(0);
for (auto it(v.begin()); it != v.end(); ++it)
{
it->push_back(i++); it->push_back(i++);
it->push_back(i++); it->push_back(i++);
}
v.insert(v.begin(), std::vector<int>());
v.insert(v.begin(), std::vector<int>());
v.insert(v.begin() + 4, std::vector<int>());
v.push_back(std::vector<int>());
v.push_back(std::vector<int>());
for (auto it(flatten(v.begin(), v.begin(), v.end())), end = flatten(v.end(), v.begin(), v.end());
it != end;
++it)
{
std::cout << *it << ", ";
}
std::cout << "\n";
for (auto it(flatten_reverse(v.end(), v.begin(), v.end())), end = flatten_reverse(v.begin(), v.begin(), v.end());
it != end;
++it)
{
std::cout << *it << ", ";
}
std::cout << "\n";
std::vector<std::vector<int>> v2;
for (auto it(flatten(v2.end(), v2.begin(), v2.end())), end = flatten(v2.begin(), v2.begin(), v2.end());
it != end;
--it)
{
std::cout << *it << ", ";
}
std::cout << "\n";
}
Great question, and great attempt.
An iterator should always refer to a valid value, or one-past-the-end. *iter should always be valid unless iter == end where end is one-past-the-end. That "one-past-the-end" iterator is the cause of your worries. Either inner_it_ refers to a valid value, or your iterator is one-past-the-end.
James's "one-past-the-end" iterator exists when outer_it_ == outer_end_, and this is the situation you need to check for. This is the only situation in which inner_it_ should have an invalid value. As a result, you can get rid of the bool and just directly check outer_it_ == outer_end_.
Also, I find this line suspect:
inner_it_ = --outer_it_->end();
Is it possible that outer_it_ is a typedef to a pointer? If so, you can't call -- on a pointer value. This will definitely work:
inner_it_ = outer_it_->end();
--inner_it_;
and moreover, it conveys intent better, because the first looks like you're decrementing the end() iterator itself!