Iterating over mmaped gzip file with boost - c++

I am trying to learn boost and some template programming in C++ but I am really having such an hard time to implement a simple class for iterating over Gzip files using mapped_file_source. I essentially have an edge list in TSV format such that each line in the gzip file is of the format: <src:int><tab><dst:int>. What I want is to implement a gz_file class that exposes a begin and end iterator over which I can get an edge (std::pair<int,int>) each time I query the iterator.
The problem is the copy constructor which is broken since I cannot known where I am positioned in the gzip file.
Here is the code I have so far:
class gz_graph {
public:
gz_graph(const char * filename)
{
m_file.open(filename);
if (!m_file.is_open()) {
throw std::runtime_error("Error opening file");
}
m_data = m_file.data();
m_data_size = m_file.size() / sizeof(m_data[0]);
auto ret = posix_madvise((void*)m_data, m_data_size, POSIX_MADV_SEQUENTIAL);
}
class iterator;
iterator begin() const
{
return iterator(this, false);
}
iterator end() const
{
return iterator(this, true);
}
class iterator : public std::iterator<std::forward_iterator_tag, Edge> {
public:
iterator(gz_graph const * ref, bool consumed)
: m_ref(ref),
m_cur_edge(-1, -1),
m_consumed(consumed)
{
if (!consumed) {
initialize();
advance();
}
}
iterator(const iterator& x)
: m_ref(x.m_ref),
m_cur_edge(x.m_cur_edge)
{
if (!x.m_consumed) {
initialize();
advance();
}
std::cout << "Copy constructor" << std::endl;
}
value_type const& operator*() const
{
return m_cur_edge;
}
value_type const* operator->() const
{
return &m_cur_edge;
}
iterator& operator++()
{
advance();
return *this;
}
bool operator==(iterator const& other) const
{
assert(m_ref == other.m_ref);
return m_cur_edge == other.m_cur_edge;
}
bool operator!=(iterator const& other) const
{
return !(*this == other);
}
private:
void initialize()
{
boost::iostreams::array_source source(m_ref->m_data, m_ref->m_data_size);
m_in.push(boost::iostreams::gzip_decompressor());
m_in.push(source);
}
void advance()
{
std::string line_str;
if (!getline(m_in, line_str)) {
m_consumed = true;
m_cur_edge = Edge(-1, -1);
return;
}
std::vector<std::string> strs;
boost::split(strs, line_str, boost::is_any_of("\t"));
if (strs.size() != 2)
throw std::runtime_error("Required 2 fields per line");
int src = boost::lexical_cast<int>(strs.at(0));
int dst = boost::lexical_cast<int>(strs.at(1));
m_cur_edge = Edge(src, dst);
// std::cout << "Read line " << line_str << std::endl;
}
gz_graph const * m_ref;
Edge m_cur_edge;
boost::iostreams::filtering_istream m_in;
bool m_consumed;
};
private:
boost::iostreams::mapped_file_source m_file;
char const* m_data;
size_t m_data_size;
};

I would just use a std::istream_iterator here. I'm not sure how exactly to interpret your "input pseudo-code", so let me humor you and do the "complicated" parsing:
struct Edge : std::pair<int, int> { };
std::istream& operator>>(std::istream& is, Edge& edge)
{
using namespace boost::spirit::qi;
return is >> match("src:" > int_ > '\t' > "dst:" > int_ >> eol, edge.first, edge.second);
}
I expect you would be happy to have it much simpler, but simpler is easier, right?
Now the main program looks like
for (
std::istream_iterator<Edge> it(fs >> std::noskipws), end;
it != end;
++it)
{
std::cout << it->first << " to " << it->second << "\n";
}
Where fs is the filtering_istream that has the gzip_decompressor. See it Live On Coliru
Full Code
#include <boost/iostreams/device/mapped_file.hpp>
#include <boost/iostreams/filtering_stream.hpp>
#include <boost/iostreams/filter/gzip.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/qi_match.hpp>
#include <iterator>
struct Edge : std::pair<int, int> {
};
std::istream& operator>>(std::istream& is, Edge& edge)
{
using namespace boost::spirit::qi;
return is >> match("src:" > int_ > '\t' > "dst:" > int_ >> eol, edge.first, edge.second);
}
namespace io = boost::iostreams;
int main()
{
io::mapped_file_source csv("csv.txt.gz");
io::stream<io::mapped_file_source> textstream(csv);
io::filtering_istream fs;
fs.push(io::gzip_decompressor{});
fs.push(textstream);
for (
std::istream_iterator<Edge> it(fs >> std::noskipws), last;
it != last;
++it)
{
std::cout << it->first << " to " << it->second << "\n";
}
}

Related

Creating std::set copies only one element, how to fix this?

v_map has the correct amount of information stored, however when i try to use std::set it only copies one element ,I assume the first one. This is my first time using std::set , maybe I miss something here...Thanks for your help !
typedef std::map<std::string,std::pair<int,int>> points_map;
void list_average(points_map &v_map)
{
Comparator compFunctor = [](std::pair<std::string,std::pair<int,int>> elem1,std::pair<std::string,std::pair<int,int>> elem2)
{
std::pair<int,int> it = elem1.second;
std::pair<int,int> jt = elem2.second;
return it.first < jt.first;
};
std::set<std::pair<std::string,std::pair<int,int>>,Comparator> v_set(v_map.begin(),v_map.end(),compFunctor);
for (std::pair<std::string,std::pair<int,int>> it : v_set)
{
std::pair<int,int> jt = it.second;
std::cout << it.first << " " << (jt.second - jt.first) / jt.first<< std::endl;
}
}
Note the following is the full program, I apologize in advance for the ugly code , and length of the code ,also I rewrote the name in the upper part of my code, in the full code , this particular function is called list_atlag
#include <iostream>
#include <string>
#include <map>
#include <set>
#include <vector>
#include <codecvt>
#include <iterator>
#include <numeric>
#include <functional>
#include <boost/filesystem.hpp>
#include <boost/foreach.hpp>
#include <boost/program_options.hpp>
#include <boost/tokenizer.hpp>
class Adatok
{
public:
Adatok(std::string name, std::string path, std::string date, int points) : _name(name), _path(path), _date(date), _points(points) {}
Adatok(const Adatok &other) = default;
Adatok &operator=(const Adatok &other) = default;
std::string get_name() { return _name; }
std::string get_path() { return _path; }
std::string get_date() { return _date; }
int get_points() { return _points; }
private:
std::string _name;
std::string _path;
std::string _date;
int _points;
};
class Ranglista
{
public:
Ranglista(std::string name, int points) : _name(name), _points(points) {}
Ranglista(const Ranglista &other) = default;
Ranglista &operator=(const Ranglista &other) = default;
std::string get_name() { return _name; }
int get_points() { return _points; }
bool operator<(const Ranglista &other)
{
return _points > other._points;
}
private:
std::string _name;
int _points;
};
class Vedes
{
public:
Vedes(std::string name, int point) : _name(name), _point(point) { _count++; }
Vedes(const Vedes &other) = default;
Vedes &operator=(const Vedes &other) = default;
std::string get_name() { return _name; }
int get_point() { return _point; }
int get_count() { return _count; }
void set_stuff(int &points)
{
_point += points;
_count++;
}
bool operator<(const Vedes &other)
{
return _count > other._count;
}
private:
std::string _name;
int _point;
int _count = 0;
};
typedef std::map<std::string, int> path_value; //minden path + az erteke
typedef std::vector<Adatok> name_path_date; //bejegyzesek
typedef std::vector<Ranglista> ranglista; //ranglista
typedef std::map<std::string,std::pair<int,int>> vedes_vec; //vedesek
typedef std::function<bool(std::pair<std::string,std::pair<int,int>>,std::pair<std::string,std::pair<int,int>>)> Comparator;
void create_pv(path_value &, boost::filesystem::path); //feltolti a path+ertek map-ot
void create_npd(name_path_date &, path_value &, std::string input); //feltolti a bejegyzesek vektorat + mindenki pontszama map
void create_np(name_path_date &, path_value &); // name + path map
void list_np(path_value &name_point); // nam + path kiiratas
void list_bejegyzesek(name_path_date &bejegyzesek); // bejegyzesek vektora kiiratas
bool check_bejegyzesek(name_path_date &bejegyzesek, std::string name, std::string path); //van-e mar ilyen bejegyzes
void create_rl(ranglista &rl_vec, path_value &name_point); //ranglista feltoltes
void list_rl(ranglista &rl_vec); //ranglista kiiratas
void vedes_atlag(name_path_date &bejegyzesek, vedes_vec &v_vec); //vedes atlag map
void list_atlag(vedes_vec &v_vec); //vedes atlag kiiratas
bool check_vedes(vedes_vec &v_vec, std::string name);
void vedes_elem(vedes_vec &v_vec, std::string name, int &&points); //
//void accumulate_pv(path_value&);
int main(int argc, char **argv)
{
std::vector<std::string> roots = {"City/Debrecen/Oktatás/Informatika/Programozás/DEIK/Prog1/", "City/Debrecen/Oktatás/Informatika/Programozás/DEIK/"};
std::string input_file_name = "db-2018-05-06.csv";
/* OPTIONS */
boost::program_options::options_description desc("ALLOWED OPTIONS");
desc.add_options()("help", "help msg")("root,r", boost::program_options::value<std::vector<std::string>>())("csv", boost::program_options::value<std::string>(), "comma separated values")("rank", "rang lista")("vedes", "labor vedesek");
boost::program_options::positional_options_description pdesc;
pdesc.add("root", -1);
boost::program_options::variables_map vm;
boost::program_options::store(boost::program_options::command_line_parser(argc, argv).options(desc).positional(pdesc).run(), vm);
boost::program_options::notify(vm);
int sum = 0;
path_value pv_map;
if (vm.count("help") || argc == 1)
{
std::cout << desc << std::endl;
return 1;
}
if (vm.count("root"))
{
roots = vm["root"].as<std::vector<std::string>>();
for (auto &i : roots)
{
boost::filesystem::path path(i);
create_pv(pv_map, path);
}
for (path_value::iterator it{pv_map.begin()}; it != pv_map.end(); it++)
sum += it->second;
//std::cout << sum << std::endl;create_npd
std::cout << std::accumulate(pv_map.begin(), pv_map.end(), 0, [](int value, const std::map<std::string, int>::value_type &p) { return value + p.second; });
std::cout << std::endl;
}
if (vm.count("csv"))
{
//input_file_name = vm["csv"].as<std::string>();
std::ifstream input_file{vm["csv"].as<std::string>()};
name_path_date bejegyzesek;
std::string temp;
path_value name_point;
while (getline(input_file, temp))
create_npd(bejegyzesek, pv_map, temp);
create_np(bejegyzesek, name_point);
//list_bejegyzesek(bejegyzesek);
//list_np(name_point);
if (vm.count("rank"))
{
ranglista rl_vec;
create_rl(rl_vec, name_point);
list_rl(rl_vec);
}
if (vm.count("vedes"))
{
vedes_vec v_vec;
vedes_atlag(bejegyzesek, v_vec);
list_atlag(v_vec);
}
return 0;
}
return 0;
}
void create_pv(path_value &pv_map, boost::filesystem::path path)
{
boost::filesystem::directory_iterator it{path}, eod;
BOOST_FOREACH (boost::filesystem::path const &p, std::make_pair(it, eod))
{
if (boost::filesystem::is_regular_file(p))
{
boost::filesystem::ifstream regular_file{p};
std::string temp;
int sum = 0; //aktualis .props erteke
while (getline(regular_file, temp))
{
temp.erase(0, temp.find_last_of('/'));
temp.erase(0, temp.find_first_of(' '));
sum += std::atoi((temp.substr(temp.find_first_of("0123456789"), temp.find_last_of("0123456789"))).c_str());
}
std::string result = p.string();
std::string result_path = result.substr(0, result.find_last_of('/'));
//std::cout << result_path << std::endl;
//pv_map.insert(std::make_pair(result, sum));
pv_map[result_path] = sum;
}
else
create_pv(pv_map, p);
}
}
//void accumulate_pv(path_value& pv_map)
//{
// std::cout<<std::accumulate(pv_map.begin(),pv_map.end(),0,[](int value,const path_value::int& p){return value+p.second;});
//}
void create_npd(name_path_date &bejegyzesek, path_value &pv_map, std::string input)
{
boost::tokenizer<boost::escaped_list_separator<char>> tokenizer{input};
boost::tokenizer<boost::escaped_list_separator<char>>::iterator it{tokenizer.begin()};
std::string name = *it;
std::string path = *(++it);
std::string date = *(++it);
path = path.substr(2);
if (!check_bejegyzesek(bejegyzesek, name, path))
bejegyzesek.push_back(Adatok(name, path, date, pv_map["/home/erik/Documents/Programs/"+path]));
}
bool check_bejegyzesek(name_path_date &bejegyzesek, std::string name, std::string path)
{
bool ok = false;
for (name_path_date::iterator it{bejegyzesek.begin()}; it != bejegyzesek.end(); it++)
{
if ((it->get_name() == name) && (it->get_path() == path))
ok = true;
}
return ok;
}
bool check_vedes(vedes_vec &v_vec, std::string name)
{
vedes_vec::iterator it = v_vec.find(name);
if (it != v_vec.end()) return true;
else return false;
}
void vedes_elem(vedes_vec &v_vec, std::string name, int &&points)
{
/*for (auto &it : v_vec)
if (it.get_name() == name)
it.set_stuff(points);
*/
vedes_vec::iterator i = v_vec.find(name);
std::pair<int,int> it = i->second;
//auto& jt = it->second;
it.first++;
it.second += points;
}
void create_np(name_path_date &bejegyzesek, path_value &name_point)
{
for (name_path_date::iterator it{bejegyzesek.begin()}; it != bejegyzesek.end(); it++)
if (name_point.count(it->get_name()) == 0)
name_point.insert(std::make_pair(it->get_name(), it->get_points()));
else
name_point[it->get_name()] += it->get_points();
}
void list_np(path_value &name_point)
{
for (path_value::iterator it{name_point.begin()}; it != name_point.end(); it++)
{
if (it->second)
std::cout << it->first << " " << it->second << std::endl;
}
}
void list_bejegyzesek(name_path_date &bejegyzesek)
{
for (name_path_date::iterator it{bejegyzesek.begin()}; it != bejegyzesek.end(); it++)
if (it->get_name() == "Varga Erik")
std::cout << it->get_name() << " " << it->get_path() << " " << it->get_points() << std::endl;
}
void create_rl(ranglista &rl_vec, path_value &name_point)
{
for (auto &it : name_point)
{
if (it.second > 0)
rl_vec.push_back(Ranglista(it.first, it.second));
}
std::sort(rl_vec.begin(), rl_vec.end());
}
void list_rl(ranglista &rl_vec)
{
for (auto &it : rl_vec)
std::cout << it.get_name() << " " << it.get_points() << std::endl;
}
void vedes_atlag(name_path_date &bejegyzesek, vedes_vec &v_vec)
{
std::string key = "City/Debrecen/Oktatás/Informatika/Programozás/DEIK/Prog1/Labor/Védés/";
for (auto &it : bejegyzesek)
{
if ((it.get_path().find("City/Debrecen/Oktatás/Informatika/Programozás/DEIK/Prog1/Labor/Védés/") != std::string::npos) && (it.get_points()) && (!check_vedes(v_vec, it.get_name())))
v_vec.insert(std::make_pair(it.get_name(),std::make_pair(1,it.get_points())));
else if ((check_vedes(v_vec, it.get_name())) && (it.get_path().find("City/Debrecen/Oktatás/Informatika/Programozás/DEIK/Prog1/Labor/Védés/") != std::string::npos) && (it.get_points()))
vedes_elem(v_vec, it.get_name(), it.get_points());
}
}
void list_atlag(vedes_vec &v_vec)
{
//std::sort(v_vec.begin(), v_vec.end());
Comparator compFunctor = [](std::pair<std::string,std::pair<int,int>> elem1,std::pair<std::string,std::pair<int,int>> elem2)
{
std::pair<int,int> it = elem1.second;
std::pair<int,int> jt = elem2.second;
return it.first < jt.first;
};
std::set<std::pair<std::string,std::pair<int,int>>,Comparator> v_set(v_vec.begin(),v_vec.end(),compFunctor);
//int sum = 0;
//int csum = 0;
for (std::pair<std::string,std::pair<int,int>> it : v_set)
{
std::pair<int,int> jt = it.second;
std::cout << it.first << " " << (jt.second - jt.first) / jt.first<< std::endl;
//sum += it.get_point();
//csum += it.get_count();
//sum = std::accumulate(v_vec.begin(), v_vec.end(), 0, [](int i, Vedes &o) { return i + o.get_point(); });
//csum = std::accumulate(v_vec.begin(), v_vec.end(), 0, [](int i, Vedes &o) { return i + o.get_count(); });
}
//std::cout << (sum - csum) / csum << std::endl;
}
so, as described here
template<
class Key,
class Compare = std::less<Key>,
class Allocator = std::allocator<Key>
> class set;
std::set is an associative container that contains a sorted set of unique objects of type Key.
I cleaned up your code, and made a Minimal, Complete, and Verifiable example,
#include <iostream>
#include <map>
#include <set>
using point_pair = std::pair<int,int>;
using points_map = std::map<std::string, point_pair>;
using points_set_pair = std::pair<std::string, point_pair>;
auto compFunctor = [](const points_set_pair &elem1, const points_set_pair &elem2)
{
return elem1.second.first < elem2.second.first;
};
using points_set = std::set<points_set_pair, decltype(compFunctor)>;
void list_average(const points_map &v_map)
{
points_set v_set(v_map.begin(),v_map.end(),compFunctor);
for (auto &elem : v_set)
{
const point_pair &jt = elem.second;
std::cout << elem.first << " " << (jt.second - jt.first) / jt.first<< "\n";
}
}
Now consider the first version of main
int main()
{
points_map v_map = { {"foo", { 1, 2}}, {"bar", { 3, 4}}};
list_average(v_map);
}
output:
foo 1
bar 0
Now consider the second version of main:
int main()
{
points_map v_map = { {"foo", { 1, 2}}, {"bar", { 1, 4}}};
list_average(v_map);
}
output:
bar 3
See the problem? As .second.first of the elements are both 1, the latter replaces the first. It is not unique. That's the downside of std::set.
So, what then?
Don't use std::set, but use std::vector and std::sort. Example:
#include <iostream>
#include <map>
#include <vector>
#include <algorithm>
using point_pair = std::pair<int,int>;
using points_map = std::map<std::string, point_pair>;
using string_point_pair = std::pair<std::string, point_pair>;
auto compFunctor = [](string_point_pair const &elem1, string_point_pair const &elem2)
{
return
elem1.second.first != elem2.second.first?
elem1.second.first < elem2.second.first:
elem1.second.second < elem2.second.second;
};
void list_average(points_map const &v_map)
{
std::vector<string_point_pair> v_vec(v_map.begin(),v_map.end());
std::sort(v_vec.begin(), v_vec.end(), compFunctor);
for (auto &elem : v_vec)
{
const point_pair &jt = elem.second;
std::cout << elem.first << " " << (jt.second - jt.first) / jt.first<< "\n";
}
}
int main()
{
points_map v_map = { {"foo", { 1, 2}}, {"bar", { 1, 4}}, {"baz", { 2, 4}}};
list_average(v_map);
}
Output:
foo 1
bar 3
baz 1
live demo

C++ Find and save duplicates in vector

I have a custom vector of my user defined type vector
First vector gets filled with elements through stdin, then i sort it and try to find duplicates in it and save them
i've managed to find all unique elements, but i need to find and get a vector of duplicates
I need a hint or a simple solution for this problem
here's my code below:
Agressor.h
#ifndef Agressor_h
#define Agressor_h
#include <string>
#include <vector>
using namespace std;
class Agressor{
public:
/*const char**/ string traderIdentifier;
/*const char**/ string side;
int quantity;
int price;
vector<Agressor> v;
void display(){
cout << traderIdentifier << " " << side << " " << quantity << " " << price << endl;
}
explicit Agressor(){
}
~Agressor(){
}
friend ostream &operator<<(ostream& stream, const Agressor& item);
const friend bool operator > (const Agressor &a1, const Agressor &a2);
// const friend bool operator == (const Agressor &a1, const Agressor &a2);
/* vector<Agressor>& operator[](int i ){
return v[i];
}*/
};
ostream &operator<<(ostream& stream, const Agressor& item) {
string side = "";
if(item.side == "B"){
side = '+';
}else{
if(item.side == "S"){
side = "-";
}
}
stream << item.traderIdentifier << side << item.quantity << "#" << item.price << "\n";
return stream;
}
const bool operator == (const Agressor &a1, const Agressor &a2){
bool isEqual = false;
if((a1.price*a1.quantity == a2.price*a2.quantity) && (a1.traderIdentifier == a2.traderIdentifier) && (a1.side == a2.side)){
isEqual = true;
}
return(isEqual);
}
const bool operator > (const Agressor &a1, const Agressor &a2){
bool isGreater = false;
if(a1.price*a1.quantity > a2.price*a2.quantity){
isGreater = true;
}
return(isGreater);
}
#endif /* Agressor_h */
main.cpp
#include <iostream>
#include "Agressor.h"
#include <sstream>
using namespace std;
vector<string> &split(const string &s, char delim, vector<string> &elems)
{
stringstream ss(s);
string item;
while (getline(ss, item, delim))
{
elems.push_back(item);
}
return elems;
}
vector<string> split(const string &s, char delim)
{
vector<string> elems;
split(s, delim, elems);
return elems;
}
bool equal_comp(const Agressor& a1, const Agressor& a2){
if((a1.price*a1.quantity == a2.price*a2.quantity) && (a1.traderIdentifier == a2.traderIdentifier) && (a1.side == a2.side)){
return true;
}
return false;
}
int main(int argc, const char * argv[]) {
Agressor agr;
while (true) {
std::string sText;
cout << "enter query:" << endl;
std::getline(std::cin, sText);
if(sText == "q"){
cout << "Program terminated by user" << endl;
break;
}else{
std::vector<std::string> sWords = split(sText, ' ');
agr.traderIdentifier = sWords[0];
agr.side = sWords[1];
agr.quantity = stoi(sWords[2]);
agr.price = stoi(sWords[3]);
agr.v.push_back(agr);
vector<Agressor>::iterator it;
sort(agr.v.begin(), agr.v.end(), greater<Agressor>());
//unique(agr.v.begin(), agr.v.end(), equal_comp);
for (vector<Agressor>::const_iterator i = agr.v.begin(); i != agr.v.end(); ++i)
cout << *i << ' ';
}
}
cout << "here we go..." << endl;
vector<Agressor>::iterator it;
sort(agr.v.begin(), agr.v.end(), greater<Agressor>());
//it = unique(agr.v.begin(),agr.v.end(), equal_comp);
//agr.v.resize( distance(agr.v.begin(),it) );
agr.v.erase(unique(agr.v.begin(),agr.v.end(), equal_comp), agr.v.end());
copy(agr.v.begin(), agr.v.end(), ostream_iterator<Agressor>(cout, "\n"));
return 0;
}
You might use something like:
template <typename T>
std::vector<T> get_duplicates(const std::vector<T>& v)
{
// expect sorted vector
auto it = v.begin();
auto end = v.end();
std::vector<T> res;
while (it != end) {
it = std::adjacent_find(it, end);
if (it != end) {
++it;
res.push_back(*it);
}
}
return res;
}
std::unique overwrites duplicate values with later non-duplicate values. You can implement a similar algorithm that moves the values to somewhere.
template<class ForwardIt, class OutputIt, class BinaryPredicate>
ForwardIt unique_retain(ForwardIt first, ForwardIt last, OutputIt d_first, BinaryPredicate p)
{
if (first == last)
return last;
ForwardIt result = first;
while (++first != last) {
if (!p(*result, *first) && ++result != first) {
*d_first++ = std::move(*result);
*result = std::move(*first);
}
}
return ++result;
}
(adapted from this possible implementation of std::unique)
You would then use it like
vector<Agressor> dups;
sort(agr.v.begin(), agr.v.end(), greater<Agressor>());
auto it = unique_retain(agr.v.begin(),agr.v.end(), std::back_inserter(dups), equal_comp);
agr.v.erase(it, agr.v.end());

How to prevent infinite loop of custom iterator

I implemented my first custom iterator, which is supposed to return an std::pair at each iteration. The only problem, is it loops infinitely and I do not know how to provide stop condition. The code looks like so:
#include <iostream>
#include <vector>
#include <utility>
class Simple
{
private:
std::vector<std::size_t> indices;
std::vector<int> values;
public:
void insert(std::size_t index, int value)
{
indices.push_back(index);
values.push_back(value);
}
int at(std::size_t index)
{
return values[indices[index]];
}
class Iterator
{
private:
const std::vector<std::size_t>* indices;
const std::vector<int>* values;
std::size_t pos = 0;
public:
Iterator(const std::vector<std::size_t>* indices_, const std::vector<int>* values_, const std::size_t &pos_ = 0):
values(values_), indices(indices_), pos(pos_){ }
bool operator==(const Iterator& other){
return this == &other;
}
bool operator!=(const Iterator& other){
return !operator==(other);
}
Iterator operator++() {
pos++;
Iterator i = *this;
return i;
}
std::pair<std::size_t, int> operator*()
{
if (pos < (*values).size())
{
return std::make_pair((*indices)[pos], (*values)[pos]);
}
std::cout << "EMPTY PAIR" << std::endl; //loops infinitely and prints this message
return std::pair<std::size_t,int>{};
}
std::pair<std::size_t, int>* operator->()
{
if (pos < (*values).size())
{
std::pair<std::size_t,int> *p;
*p = std::make_pair((*indices)[pos], (*values)[pos]);
return p;
}
return nullptr;
}
};
Iterator begin() const
{
return Iterator(&indices, &values, 0);
}
Iterator end() const
{
return Iterator(&indices, &values, values.size());
}
};
int main() {
Simple s;
s.insert(10, 100);
std::cout << s.at(10) << std::endl;
int i = 0;
for (const std::pair<std::size_t, int> &p : s)
{
std::cout << p.first << " " << p.second << std::endl;
if (i > 3) break; // otherwise it will loop infinitely
i++;
}
return 0;
}
I would like to stress, that this example is simplified and for demonstration purposes, so you may not ask me, why I'm not using map or unorded_map (just because what I'm implementing looks like a map). The question is solely about iterator and I need understanding what may be wrong with iterator itself and how to provide stop condition.

implement iterator on every elements of value containers against each key of map using boost iterator

How to implement an iterator of just on values of a map/unordered_map using boost::iterator_adaptor? I've tried following code but it does not work because of the line with comment.
Is there a solution to avoid the problem?
The question here is slightly different from map_values adapter example shown in boost code as here the value field in map is another container like list or vector and the requirement here is to iterate over all elements of those lists for every key of the map.
The deref of iterator is of type of value_type of those list/vector.The end of iterator is the end of list of last key
#include <vector>
#include <boost/unordered_map.hpp>
#include <cassert>
#include <iostream>
#include <boost/iterator/iterator_adaptor.hpp>
class DS {
public:
DS() : _map() {}
~DS() {
for (Map::iterator it = _map.begin(); it != _map.end(); ++it) {
delete (it->second);
}
}
void add(int key_, const std::vector< int > &value_)
{
IntList *ptr = new IntList(value_);
assert(ptr);
_map.insert(Map::value_type(key_, ptr));
}
private:
typedef std::vector< int > IntList;
typedef boost::unordered_map< int, IntList* > Map;
Map _map;
public:
class KeyIter : public boost::iterator_adaptor< KeyIter,
Map::const_iterator,
int,
boost::forward_traversal_tag,
int>
{
public:
KeyIter() : KeyIter::iterator_adaptor_() {}
private:
friend class DS;
friend class boost::iterator_core_access;
explicit KeyIter(Map::const_iterator it) : KeyIter::iterator_adaptor_(it) {}
explicit KeyIter(Map::iterator it) : KeyIter::iterator_adaptor_(it) {}
int dereference() const { return this->base()->first; }
};
class ValueIter : public boost::iterator_adaptor< ValueIter,
Map::const_iterator,
int,
boost::forward_traversal_tag,
int>
{
public:
ValueIter()
: ValueIter::iterator_adaptor_()
, _lIt()
{}
private:
friend class DS;
friend class boost::iterator_core_access;
explicit ValueIter(Map::const_iterator it)
: ValueIter::iterator_adaptor_(it)
, _lIt()
, _mIt(it)
{
IntList *pt = it->second; // <<-- issue here is I can't find if I've already reached the end of the map
if (pt) {
_lIt = it->second->begin();
}
}
int dereference() const { return *_lIt; }
void increment()
{
if (_lIt == _mIt->second->end()) {
++_mIt;
_lIt = _mIt->second->begin();
} else {
++_lIt;
}
}
IntList::iterator _lIt;
Map::const_iterator _mIt;
};
KeyIter beginKey() const { return KeyIter(_map.begin()); }
KeyIter endKey() const { return KeyIter(_map.end()); }
ValueIter beginValue() const { return ValueIter(_map.begin()); }
ValueIter endValue() const { return ValueIter(_map.end()); }
};
int main(int argc, char** argv)
{
DS ds;
std::vector< int > v1;
v1.push_back(10);
v1.push_back(30);
v1.push_back(50);
ds.add(90, v1);
std::vector< int > v2;
v2.push_back(20);
v2.push_back(40);
v2.push_back(60);
ds.add(120, v2);
std::cout << "------------ keys ---------------" << std::endl;
for (DS::KeyIter it = ds.beginKey(); it != ds.endKey(); ++it) {
std::cout << (*it) << std::endl;
}
std::cout << "------------ values ---------------" << std::endl;
// std::cout << (*(ds.beginValue())) << std::endl;
for (DS::ValueIter it = ds.beginValue(); it != ds.endValue(); ++it) {
std::cout << (*it) << std::endl;
}
return 0;
}
Implemented in c++11. You should be able to do the conversion to boost/c++03 fairly simply.
This iterator is FORWARD ONLY and it's quite fragile (see the comparison operator).
user discretion advised.
#include <iostream>
#include <vector>
#include <unordered_map>
typedef std::vector< int > IntList;
typedef std::unordered_map< int, IntList* > Map;
struct whole_map_const_iterator
{
using C1 = IntList;
using C2 = Map;
using I1 = C1::const_iterator;
using I2 = C2::const_iterator;
using value_type = I1::value_type;
using reference = I1::reference;
whole_map_const_iterator(I2 i2) : _i2(i2) {}
bool operator==(const whole_map_const_iterator& r) const {
if (_i2 != r._i2)
return false;
if (deferred_i1 && r.deferred_i1)
return true;
if (deferred_i1 != r.deferred_i1)
return false;
return _i1 == r._i1;
}
bool operator!=(const whole_map_const_iterator& r) const { return !(*this == r); }
reference operator*() const {
check_deferred();
return *_i1;
}
void check_deferred() const {
if (deferred_i1) {
_i1 = _i2->second->begin();
_i1limit = _i2->second->end();
deferred_i1 = false;
}
}
void go_next()
{
check_deferred();
if (++_i1 == _i1limit) {
++_i2;
deferred_i1 = true;
}
}
whole_map_const_iterator& operator++() {
go_next();
return *this;
}
whole_map_const_iterator operator++(int) {
auto result = *this;
go_next();
return result;
}
I2 _i2;
mutable I1 _i1 = {}, _i1limit = {};
mutable bool deferred_i1 = true;
};
IntList a { 1, 2, 3, 4, 5 };
IntList b { 6, 7, 8, 9, 10 };
Map m { { 1, &a }, { 2, &b } };
int main()
{
using namespace std;
auto from = whole_map_const_iterator(m.begin());
auto to = whole_map_const_iterator(m.end());
for ( ; from != to ; ++from) {
std::cout << *from << std::endl;
}
return 0;
}
example output:
6
7
8
9
10
1
2
3
4
5
For bonus points, answer this question:
Q: Why all that damn complication over the deferred flag?

Design operator++ for iterating over certain elements of a multimap

I have the following class declaration:
class Dictionnary{
private:
map< int,list<string> > data;
public:
bool isPrime();
class prime_iterator{
private:
map< int,list<string> >::iterator it;
public:
iterator(){}
prime_iterator & operator++(){
++it;
while(it != data.end() && !isPrime(it->first)){
++it;
}
return it;
}
...
};
which is intended to provide an iterator over prime keys of a map<int,list<string>>. I'm not sure operator++ is well implemented.
First, is it a good design to do it != data.end()accessing the outer class? Second, is operator++ returning the right thing or should return only prime_iterator?. Also, can you think about any better solution?
My advice is to always try to find solution elsewhere - if not found - then only create by yourself:
You can use boost::fiter_iterator
For your case it would look in this way:
#include <boost/iterator/filter_iterator.hpp>
using DataMap = std::map<int, std::list<std::string>>;
struct is_prime_number {
bool operator()(const DataMap::value_type& x) { return x.first % 2 == 0; }
}; // I know this is just is_even - not is_prime :D
using DataMapPrimeIter = boost::filter_iterator<is_prime_number, DataMap::iterator>;
inline DataMapPrimeIter only_prime_begin(DataMap& dataMap)
{
return boost::make_filter_iterator<is_prime_number>(dataMap.begin(), dataMap.end());
}
inline DataMapPrimeIter only_prime_end(DataMap& dataMap)
{
return boost::make_filter_iterator<is_prime_number>(dataMap.end(), dataMap.end());
}
And usage:
int main()
{
DataMap dataMap{{1,{"A","B"}}, {2,{"C", "D", "E"}}};
for (auto i = only_prime_begin(dataMap), end = only_prime_end(dataMap); i != end; ++i)
{
std::cout << i->first << i->second.front() << std::endl;
}
}
If you want to have your own implementation, or you cannot use boost in your project - then look at boost implementation - it is for free to look at...
My humble solution. The typedef is just for convenience.
#include <iterator>
#include <iostream>
#include <map>
#include <string>
#include <vector>
#include <algorithm>
typedef std::map<int, std::string> map_t;
class Dictionary {
private:
map_t& m_map;
public:
class prime_iterator {
public:
prime_iterator( map_t::iterator begin, map_t::iterator end )
: m_current(begin), m_end(end) {
runUntilPrime();
}
prime_iterator& operator++() {
m_current++;
runUntilPrime();
return *this;
}
bool operator != (prime_iterator other) {
return other.m_current != m_current;
}
map_t::value_type& operator* () {
return *m_current;
}
private:
map_t::iterator m_current;
map_t::iterator m_end;
bool isPrime(int x) {
std::vector<int> primes = { 1, 2, 3, 5, 7, 11, 13, 17 };
return std::find( primes.begin(), primes.end(), x ) != primes.end();
}
void runUntilPrime() {
while( m_current != m_end && !isPrime(m_current->first) ) {
m_current++;
}
}
};
Dictionary( map_t& tmap )
: m_map(tmap) {}
prime_iterator begin() {
return prime_iterator( m_map.begin(), m_map.end() );
}
prime_iterator end () {
return prime_iterator(m_map.end(), m_map.end());
}
};
int main( int argc, char** argv ) {
map_t map;
map.emplace(0, "zero");
map.emplace(1, "one");
map.emplace(2, "two");
map.emplace(3, "three");
map.emplace(4, "four");
map.emplace(5, "five");
map.emplace(6, "six");
map.emplace(7, "seven");
map.emplace(8, "eight");
map.emplace(9, "nine");
map.emplace(10, "ten");
map.emplace(13, "thirteen");
Dictionary dict( map );
for( auto p : dict ) {
std::cout << p.first << "\t" << p.second << std::endl;
}
return 0;
}