In my code I need to have a functionality to iterate over all elements and check if there some element already exists possibly as soon as possible, so my choice fell on boost multi index container where I can use vector and unordered_set interface for my class Animal at the same time. The problem is that I am not able to find some element through unordered_set interface since I replaced key from std::string to std::array<char, 50> and adjusted the code, and I don't know what I am doing wrong ?
code:
https://wandbox.org/permlink/dnCaEzYVdXkTFBGo
#include <array>
#include <algorithm>
#include <iostream>
#include <chrono>
#include <string>
#include <vector>
#include <list>
#include <map>
#include <set>
#include <unordered_map>
#include <unordered_set>
#include <memory>
#include <boost/multi_index_container.hpp>
#include <boost/multi_index/ordered_index.hpp>
#include <boost/multi_index/composite_key.hpp>
#include <boost/multi_index/hashed_index.hpp>
#include <boost/multi_index/sequenced_index.hpp>
#include <boost/multi_index/random_access_index.hpp>
#include <boost/multi_index/member.hpp>
#include <boost/multi_index/identity.hpp>
int constexpr elements_size{ 1'000'000 };
struct Animal
{
Animal(std::string name, std::string description, int leg, int age, double maxSpeed) noexcept :
description_{std::move(description)}, leg_{leg}, age_{age}, maxSpeed_{maxSpeed}
{
std::copy(name.begin(), name.end(), name_.data());
}
Animal(std::string const& name, std::string const& description) noexcept :
description_{description}
{
std::copy(name.begin(), name.end(), name_.data());
}
Animal(Animal&& animal) noexcept
{
name_ = name_;
description_ = std::move(animal).description_;
leg_ = animal.leg_;
age_ = animal.age_;
maxSpeed_ = animal.maxSpeed_;
}
Animal(Animal const& animal) noexcept
{
name_ = animal.name_;
description_ = animal.description_;
leg_ = animal.leg_;
age_ = animal.age_;
maxSpeed_ = animal.maxSpeed_;
}
Animal& operator=(Animal&& animal) noexcept
{
name_ = name_;
description_ = std::move(animal).description_;
leg_ = animal.leg_;
age_ = animal.age_;
maxSpeed_ = animal.maxSpeed_;
return *this;
}
Animal& operator=(Animal const& animal) noexcept
{
name_ = animal.name_;
description_ = animal.description_;
leg_ = animal.leg_;
age_ = animal.age_;
maxSpeed_ = animal.maxSpeed_;
return *this;
}
std::array<char, 50> name_;
std::string description_;
int leg_{0};
int age_{0};
double maxSpeed_{0.0};
};
struct Hasher
{
bool print_;
Hasher(bool print = false): print_{print} {}
std::size_t operator()(std::array<char, 50> const& name) const
{
if (print_)
std::cout << "array hash" << std::hash<std::string_view>{}(name.data()) << std::endl;
return std::hash<std::string_view>{}(name.data());
}
std::size_t operator()(std::string const& name) const
{
if (print_)
std::cout << "string hash" << std::hash<std::string_view>{}(name.c_str()) << std::endl;
return std::hash<std::string_view>{}(name.c_str());
}
std::size_t operator()(const char* name) const
{
if (print_)
std::cout << "char hash" << std::hash<std::string_view>{}(name) << std::endl;
return std::hash<std::string_view>{}(name);
}
};
struct KeysComparator
{
bool operator()(std::array<char, 50> const& a1, std::array<char, 50> const& a2) const {return a1 == a2; }
template <typename T>
bool operator()(std::string const& n1, T const& t) const
{
std::cout << "### value.name_" << t.value.name_.data() << ", n1: " << n1 << std::endl;
return n1 == t.value.name_.data();
}
};
template<typename TimePoint>
std::string getElapsedTime(TimePoint const& start, TimePoint const& end)
{
auto micro = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
auto milli = std::chrono::duration_cast<std::chrono::milliseconds>(micro);
auto sec = std::chrono::duration_cast<std::chrono::seconds>(milli);
return {std::to_string(micro.count()) + " µs, " + std::to_string(milli.count()) + " ms, " + std::to_string(sec.count()) + " s"};
}
template<typename TimePoint>
void printStatistics(TimePoint const& emplace_start, TimePoint const& emplace_end, TimePoint const& iterate_start, TimePoint const& iterate_end,
TimePoint const& find_start, TimePoint const& find_end, intmax_t const sum, std::string target)
{
std::cout << "Elapsed time emplace: " << getElapsedTime(emplace_start, emplace_end)
<< " | iterate: " << getElapsedTime(iterate_start, iterate_end)
<< " | find: " << getElapsedTime(find_start, find_end)
<< ", sum:" << sum << " , calculation for " << target << std::endl;
}
void test()
{
using namespace boost::multi_index;
using Animal_multi = multi_index_container<Animal, indexed_by<
random_access<>,
hashed_unique<
composite_key<Animal, member<Animal, std::array<char, 50>, &Animal::name_>>,
composite_key_hash<Hasher>,
composite_key_equal_to<KeysComparator>>
>>;
Animal_multi container;
auto emplace_start = std::chrono::steady_clock::now();
for (auto i = 0; i < elements_size; ++i)
container.emplace_back("the really long name of some animal 12345678910_" + std::to_string(i),
"bla bla bla bla bla bla bla bla bla bla bla bla bla", 4, i, i + 2);
auto emplace_end = std::chrono::steady_clock::now();
intmax_t sum{0};
auto iterate_start = std::chrono::steady_clock::now();
for (auto const& e : container)
sum += e.age_;
auto iterate_end = std::chrono::steady_clock::now();
KeysComparator key_comparator;
Hasher hasher{true};
auto find_start = std::chrono::steady_clock::now();
auto &container_interface = container.get<1>();
auto isSucceeded = container_interface.count("the really long name of some animal 12345678910_" + std::to_string(elements_size-1),
hasher, key_comparator);
if (not isSucceeded)
std::cout << "WARN: Element has not been found." << std::endl;
auto find_end = std::chrono::steady_clock::now();
printStatistics(emplace_start, emplace_end, iterate_start, iterate_end, find_start, find_end, sum, "Animal_multi (boost multi_index)");
}
int main()
{
test();
return 0;
}
There are a number of bugs like in the move constructor:
name_ = name_; // oops this does nothing at all
Just follow Rule Of Zero. This will also inform you that std::string copy/assignment are not noexcept.
The name copy should probably be length-limited:
std::copy_n(name.begin(), std::min(name.size(), name_.size()), name_.data());
At this point I notice something that might explain your trouble: you don't NUL-terminate, nor make sure that the array is 0-initialized.
BINGO
Indeed, just a few lines down I spot:
return std::hash<std::string_view>{}(name.data());
That's... UB! Your string_view might contain indeterminate data, but what's worse, you would NEVER have copied the terminating NUL character. So, std::string_view will model a string with indeterminate length which WILL likely exceed 50.
Read here about Nasal Demons (UB)
Such are the perils of skipping standard library types for the old C craft.
First Dig
So, here's the entirety of the class with equal/better characteristics:
using Name = std::array<char, 50>;
struct Animal {
Animal(std::string_view name, std::string description,
int leg = 0, int age = 0, double maxSpeed = 0) noexcept
: name_{0}, // zero initialize!
description_{std::move(description)},
leg_{leg},
age_{age},
maxSpeed_{maxSpeed}
{
constexpr auto Capacity = std::tuple_size<Name>::value;
constexpr auto MaxLen = Capacity - 1; // reserve NUL char
assert(name.length() < MaxLen);
std::copy_n(name.data(), std::min(name.length(), MaxLen), name_.data());
}
//Animal ( Animal&& animal ) noexcept = default;
//Animal ( Animal const& animal ) = default;
//Animal& operator= ( Animal&& animal ) noexcept = default;
//Animal& operator= ( Animal const& animal ) = default;
Name name_;
std::string description_;
int leg_{0};
int age_{0};
double maxSpeed_{0.0};
};
Improving: FixedString
This just screams for a better Name type. How about, FixedString:
template <size_t N> struct FixedString {
static_assert(N > 1); // require space for NUL char
FixedString(std::string_view s) : data_{0} {
if (s.length() >= N)
throw std::length_error("FixedString");
std::copy_n(s.data(), std::min(s.length(), N - 1), data());
}
std::string_view str() const { return { data(), size() }; }
operator std::string_view() const { return str(); }
auto data() const { return data_.data(); }
auto data() { return data_.data(); }
auto c_str() const { return data_.data(); }
auto c_str() { return data_.data(); }
auto begin() const { return data_.begin(); }
auto end() const { return data_.end(); }
auto begin() { return data_.begin(); }
auto end() { return data_.end(); }
size_t size() const {
auto terminator = std::memchr(data(), 0, data_.max_size());
return terminator
? static_cast<char const*>(terminator) - data()
: data_.max_size();
};
bool operator<(FixedString const& rhs) const { return str() < rhs.str(); }
bool operator==(FixedString const& rhs) const { return str() == rhs.str(); }
bool operator!=(FixedString const& rhs) const { return str() != rhs.str(); }
// optimizations:
bool operator<(std::string_view const& rhs) const { return str() < rhs.substr(0, N-1); }
bool operator==(std::string_view const& rhs) const { return str() == rhs.substr(0, N-1); }
bool operator!=(std::string_view const& rhs) const { return str() != rhs.substr(0, N-1); }
private:
std::array<char, N> data_;
};
Now you can simply
using Name = FixedString<50>;
And all your Names will magically (and safely) convert to and from string views.
using Name = FixedString<50>;
struct Animal {
Animal(std::string_view name, std::string description,
int leg = 0, int age = 0, double maxSpeed = 0) noexcept
: name_{name}, description_{std::move(description)},
leg_{leg}, age_{age}, maxSpeed_{maxSpeed}
{ }
Name name_;
std::string description_;
int leg_{0};
int age_{0};
double maxSpeed_{0.0};
};
Everything Simplifies With The Right Abstraction
This is the most important lesson I think I learned in my programming career: choosing the right abstraction leads to simplicity. Here, we evaporate two messy helpers:
using Hasher = std::hash<std::string_view>;
using KeysComparator = std::equal_to<Name>;
Boom. They do everything you had, but better.
Now, The Missing Element
After simplifying the whole thing to this it should become pretty obvious that a std::array<char, 50> can never correctly contain names longer than 50 characters. Indeed, checking the insertions:
auto emplace_start = Now();
size_t duplicates = 0;
for (auto i = 0; i < elements_size; ++i) {
auto [_, ok] = container.emplace_back(
make_name(i), "bla bla bla bla bla bla bla bla bla bla bla bla bla",
4, i, i + 2);
if (!ok) ++duplicates;
}
if (duplicates) {
std::cerr << "Oops, " << duplicates << " duplicate keys not inserted\n";
}
auto emplace_end = Now();
Reveals that:
Oops, 999990 duplicate keys not inserted
Elapsed time emplace: 116.491ms iterate: 0.000145ms find: 0.000597ms, sum:45 , calculation for Animal_multi (boost multi_index)
At least, now you replaced Undefined
Behaviour with
constraint checks.
Of course, just increasing the name capacity fixes it: [https://wandbox.org/permlink/6AamJfXe76nYALfR)
using Name = FixedString<60>;
Prints:
Elapsed time emplace: 594.475ms iterate: 18.6076ms find: 0.003138ms, sum:499999500000 , calculation for Animal_multi (boost multi_index)
Alternatively you can throw on Name construction with an overly long name: Live On Wandbox
FixedString(std::string_view s) : data_{0} {
if (s.length() >= N)
throw std::length_error("FixedString");
std::copy_n(s.data(), std::min(s.length(), N - 1), data());
}
Which duly prints
terminate called after throwing an instance of 'std::length_error'
what(): FixedString
Full Listing
This demo uses FixedString<60> to avoid the key errors:
#include <boost/multi_index_container.hpp>
#include <boost/multi_index/hashed_index.hpp>
#include <boost/multi_index/random_access_index.hpp>
#include <boost/multi_index/member.hpp>
#include <iostream>
#include <iomanip>
#include <chrono>
using namespace std::chrono_literals;
int constexpr elements_size{ 1'000'000 };
template <size_t N> struct FixedString {
static_assert(N > 1); // require space for NUL char
FixedString(std::string_view s) : data_{0} {
if (s.length() >= N)
throw std::length_error("FixedString");
std::copy_n(s.data(), std::min(s.length(), N - 1), data());
}
std::string_view str() const { return { data(), size() }; }
operator std::string_view() const { return str(); }
auto data() const { return data_.data(); }
auto data() { return data_.data(); }
auto c_str() const { return data_.data(); }
auto c_str() { return data_.data(); }
auto begin() const { return data_.begin(); }
auto end() const { return data_.end(); }
auto begin() { return data_.begin(); }
auto end() { return data_.end(); }
size_t size() const {
auto terminator = std::memchr(data(), 0, data_.max_size());
return terminator
? static_cast<char const*>(terminator) - data()
: data_.max_size();
};
bool operator<(std::string_view const& rhs) const { return str() < rhs.substr(0, N-1); }
bool operator==(std::string_view const& rhs) const { return str() == rhs.substr(0, N-1); }
bool operator!=(std::string_view const& rhs) const { return str() != rhs.substr(0, N-1); }
bool operator<(FixedString const& rhs) const { return str() < rhs.str(); }
bool operator==(FixedString const& rhs) const { return str() == rhs.str(); }
bool operator!=(FixedString const& rhs) const { return str() != rhs.str(); }
private:
std::array<char, N> data_;
};
using Name = FixedString<60>;
struct Animal {
Animal(std::string_view name, std::string description,
int leg = 0, int age = 0, double maxSpeed = 0) noexcept
: name_{name}, description_{std::move(description)},
leg_{leg}, age_{age}, maxSpeed_{maxSpeed}
{ }
Name name_;
std::string description_;
int leg_{0};
int age_{0};
double maxSpeed_{0.0};
};
using Hasher = std::hash<std::string_view>;
using KeysComparator = std::equal_to<Name>;
using Clock = std::chrono::steady_clock;
using Duration = Clock::duration;
static auto Now = Clock::now;
void printStatistics(Duration emplace, Duration iterate, Duration find,
intmax_t const sum, std::string target)
{
std::cout << "Elapsed time"
<< " emplace: " << (emplace/1.0ms) << "ms"
<< " iterate: " << (iterate/1.0ms) << "ms"
<< " find: " << (find/1.0ms) << "ms"
<< ", sum:" << sum
<< " , calculation for " << target
<< std::endl;
}
void test() {
namespace bmi = boost::multi_index;
using Animal_multi = bmi::multi_index_container<Animal,
bmi::indexed_by<
bmi::random_access<>,
bmi::hashed_unique<
bmi::tag<struct by_name>,
bmi::member<Animal, Name, &Animal::name_>, Hasher, KeysComparator>
>
>;
Animal_multi container;
auto make_name = [](size_t id) {
return "the really long name of some animal 12345678910_" + std::to_string(id);
};
auto emplace_start = Now();
size_t duplicates = 0;
for (auto i = 0; i < elements_size; ++i) {
auto [_, ok] = container.emplace_back(
make_name(i), "bla bla bla bla bla bla bla bla bla bla bla bla bla",
4, i, i + 2);
if (!ok) ++duplicates;
}
if (duplicates) {
std::cerr << "Oops, " << duplicates << " duplicate keys not inserted\n";
}
auto emplace_end = Now();
intmax_t sum{ 0 };
auto iterate_start = Now();
for (auto const& e : container) {
sum += e.age_;
}
auto iterate_end = Now();
auto find_start = Now();
{
auto& name_idx = container.get<by_name>();
auto last_key = make_name(elements_size - 1);
if (name_idx.count(std::string_view(last_key)) == 0u) {
std::cout << "WARN: Element has not been found." << std::endl;
}
}
auto find_end = Now();
printStatistics(
emplace_end - emplace_start,
iterate_end - iterate_start,
find_end - find_start, sum,
"Animal_multi (boost multi_index)");
}
int main() { test(); }
Related
I have the following templated merge sort program:
#include <iostream>
#include <vector>
#include <string>
// trying to create a default method call
class CInstance {
private:
std::string str_;
public:
CInstance(const std::string& str) : str_(str) {}
bool const operator>(const CInstance& that){ return (this->str_.size() > that.str_.size());}
};
template<class T>
class CObj {
private:
T val;
public:
CObj(const T n) : val(n) {}
T Get() { return val; }
};
template<class T>
using vcobj = std::vector<CObj<T>>;
template<class T>
void display(vcobj<T>& v) {
for (auto &i : v) {
std::cout << i.Get() << " ";
}
std::cout << "\n";
}
template<class T>
vcobj<T> Merge(vcobj<T>& lv, vcobj<T>& rv) {
vcobj<T> ret;
auto lsize = lv.size();
auto rsize = rv.size();
unsigned int lpin = 0,
rpin = 0;
while(lpin < lsize && rpin < rsize) {
if(lv.at(lpin).Get() > rv.at(rpin).Get()) {
ret.emplace_back(rv.at(rpin).Get());
rpin++;
}
else {
ret.emplace_back(lv.at(lpin).Get());
lpin++;
}
}
for (auto i=lpin; i<lsize; i++) {
ret.emplace_back(lv.at(i).Get());
}
for (auto i=rpin; i<rsize; i++) {
ret.emplace_back(rv.at(i).Get());
}
return ret;
}
template<class T>
vcobj<T> Sort(const vcobj<T>& v) {
vcobj<T> ret;
auto size = v.size();
if(size == 0) {
return ret;
}
if(size > 1) {
auto mid = size / 2;
vcobj<T> l(v.begin(), v.begin()+mid);
auto lv = Sort(l);
vcobj<T> r(v.begin()+mid, v.end());
auto rv = Sort(r);
ret = Merge(lv, rv);
}
else {
ret = v;
}
return ret;
}
int main() {
{
vcobj<int> v = {4, 5, 2, 1, 9, 6, 10, 8, 15, 3, 7};
display(v);
auto sorted = Sort(v);
display(sorted);
}
{
vcobj<float> v = {0.01, 0.001, 0.002, 0.009, 0.010, 0.0003, 0.00001};
display(v);
auto sorted = Sort(v);
display(sorted);
}
{
vcobj<std::string> v = {{"pineapple"}, {"jackfruit"}, {"mango"}, {"apple"}, {"banana"}};
display(v);
auto sorted = Sort(v);
display(sorted);
}
// causing problem
{
vcobj<CInstance> v = {{"pineapple"}, {"jackfruit"}, {"mango"}, {"apple"}, {"banana"}};
display(v);
auto sorted = Sort(v);
display(sorted);
}
return 0;
}
In all of the above types, I can simply call the object and it extracts the data which looks like calling a default get() method. Is there a way to make objects of class CInstance trigger a methos, when used just alone.
example:
I could do something like
CInstance obj;
std::cout << obj;
And that will call a default method in CInstance what every it may be.
As already mentioned in the other answer you can create your own operator<< function:
std::ostream & operator<<(std::ostream &stream, const CInstance &obj) {
// stream << whatever you want to output
return stream;
}
You could also define a conversion operator. But you should think twice before you use them. They can lead to problems that are not easy to debug, especially when explicit is omitted. You generally should not use those for logging/debugging purposes. If your type represents a string and you use it to allow an easy conversion to an std::string then it might be fine.
#include <iostream>
#include <string>
class CInstance {
std::string str_ = "test";
public:
explicit operator const std::string () const { return str_; }
};
int main() {
CInstance obj;
std::cout << (std::string)obj << std::endl;
return 0;
}
If you can guarantee that the lifetime of the returned const char * is still valid after the call you could also do something like (but I would avoid that solution):
#include <iostream>
#include <string>
class CInstance {
std::string str_ = "test";
public:
operator const char *() const { return str_.c_str(); }
};
int main() {
CInstance t;
std::cout << t << std::endl;
return 0;
}
Personally, I would go with the first solution. But that really depends if you actually have a string representation of CInstance or if you want to display something for debugging purposes in a different format. I however would avoid the last non-explicit version with the const char * conversion operator.
In this exact case, you define an operator<< method like so:
std::ostream & operator<<(std::ostream &stream, const CInstance &obj) {
... output obj however you want to the stream. For instance:
stream << obj.getAge();
return stream;
}
v_map has the correct amount of information stored, however when i try to use std::set it only copies one element ,I assume the first one. This is my first time using std::set , maybe I miss something here...Thanks for your help !
typedef std::map<std::string,std::pair<int,int>> points_map;
void list_average(points_map &v_map)
{
Comparator compFunctor = [](std::pair<std::string,std::pair<int,int>> elem1,std::pair<std::string,std::pair<int,int>> elem2)
{
std::pair<int,int> it = elem1.second;
std::pair<int,int> jt = elem2.second;
return it.first < jt.first;
};
std::set<std::pair<std::string,std::pair<int,int>>,Comparator> v_set(v_map.begin(),v_map.end(),compFunctor);
for (std::pair<std::string,std::pair<int,int>> it : v_set)
{
std::pair<int,int> jt = it.second;
std::cout << it.first << " " << (jt.second - jt.first) / jt.first<< std::endl;
}
}
Note the following is the full program, I apologize in advance for the ugly code , and length of the code ,also I rewrote the name in the upper part of my code, in the full code , this particular function is called list_atlag
#include <iostream>
#include <string>
#include <map>
#include <set>
#include <vector>
#include <codecvt>
#include <iterator>
#include <numeric>
#include <functional>
#include <boost/filesystem.hpp>
#include <boost/foreach.hpp>
#include <boost/program_options.hpp>
#include <boost/tokenizer.hpp>
class Adatok
{
public:
Adatok(std::string name, std::string path, std::string date, int points) : _name(name), _path(path), _date(date), _points(points) {}
Adatok(const Adatok &other) = default;
Adatok &operator=(const Adatok &other) = default;
std::string get_name() { return _name; }
std::string get_path() { return _path; }
std::string get_date() { return _date; }
int get_points() { return _points; }
private:
std::string _name;
std::string _path;
std::string _date;
int _points;
};
class Ranglista
{
public:
Ranglista(std::string name, int points) : _name(name), _points(points) {}
Ranglista(const Ranglista &other) = default;
Ranglista &operator=(const Ranglista &other) = default;
std::string get_name() { return _name; }
int get_points() { return _points; }
bool operator<(const Ranglista &other)
{
return _points > other._points;
}
private:
std::string _name;
int _points;
};
class Vedes
{
public:
Vedes(std::string name, int point) : _name(name), _point(point) { _count++; }
Vedes(const Vedes &other) = default;
Vedes &operator=(const Vedes &other) = default;
std::string get_name() { return _name; }
int get_point() { return _point; }
int get_count() { return _count; }
void set_stuff(int &points)
{
_point += points;
_count++;
}
bool operator<(const Vedes &other)
{
return _count > other._count;
}
private:
std::string _name;
int _point;
int _count = 0;
};
typedef std::map<std::string, int> path_value; //minden path + az erteke
typedef std::vector<Adatok> name_path_date; //bejegyzesek
typedef std::vector<Ranglista> ranglista; //ranglista
typedef std::map<std::string,std::pair<int,int>> vedes_vec; //vedesek
typedef std::function<bool(std::pair<std::string,std::pair<int,int>>,std::pair<std::string,std::pair<int,int>>)> Comparator;
void create_pv(path_value &, boost::filesystem::path); //feltolti a path+ertek map-ot
void create_npd(name_path_date &, path_value &, std::string input); //feltolti a bejegyzesek vektorat + mindenki pontszama map
void create_np(name_path_date &, path_value &); // name + path map
void list_np(path_value &name_point); // nam + path kiiratas
void list_bejegyzesek(name_path_date &bejegyzesek); // bejegyzesek vektora kiiratas
bool check_bejegyzesek(name_path_date &bejegyzesek, std::string name, std::string path); //van-e mar ilyen bejegyzes
void create_rl(ranglista &rl_vec, path_value &name_point); //ranglista feltoltes
void list_rl(ranglista &rl_vec); //ranglista kiiratas
void vedes_atlag(name_path_date &bejegyzesek, vedes_vec &v_vec); //vedes atlag map
void list_atlag(vedes_vec &v_vec); //vedes atlag kiiratas
bool check_vedes(vedes_vec &v_vec, std::string name);
void vedes_elem(vedes_vec &v_vec, std::string name, int &&points); //
//void accumulate_pv(path_value&);
int main(int argc, char **argv)
{
std::vector<std::string> roots = {"City/Debrecen/Oktatás/Informatika/Programozás/DEIK/Prog1/", "City/Debrecen/Oktatás/Informatika/Programozás/DEIK/"};
std::string input_file_name = "db-2018-05-06.csv";
/* OPTIONS */
boost::program_options::options_description desc("ALLOWED OPTIONS");
desc.add_options()("help", "help msg")("root,r", boost::program_options::value<std::vector<std::string>>())("csv", boost::program_options::value<std::string>(), "comma separated values")("rank", "rang lista")("vedes", "labor vedesek");
boost::program_options::positional_options_description pdesc;
pdesc.add("root", -1);
boost::program_options::variables_map vm;
boost::program_options::store(boost::program_options::command_line_parser(argc, argv).options(desc).positional(pdesc).run(), vm);
boost::program_options::notify(vm);
int sum = 0;
path_value pv_map;
if (vm.count("help") || argc == 1)
{
std::cout << desc << std::endl;
return 1;
}
if (vm.count("root"))
{
roots = vm["root"].as<std::vector<std::string>>();
for (auto &i : roots)
{
boost::filesystem::path path(i);
create_pv(pv_map, path);
}
for (path_value::iterator it{pv_map.begin()}; it != pv_map.end(); it++)
sum += it->second;
//std::cout << sum << std::endl;create_npd
std::cout << std::accumulate(pv_map.begin(), pv_map.end(), 0, [](int value, const std::map<std::string, int>::value_type &p) { return value + p.second; });
std::cout << std::endl;
}
if (vm.count("csv"))
{
//input_file_name = vm["csv"].as<std::string>();
std::ifstream input_file{vm["csv"].as<std::string>()};
name_path_date bejegyzesek;
std::string temp;
path_value name_point;
while (getline(input_file, temp))
create_npd(bejegyzesek, pv_map, temp);
create_np(bejegyzesek, name_point);
//list_bejegyzesek(bejegyzesek);
//list_np(name_point);
if (vm.count("rank"))
{
ranglista rl_vec;
create_rl(rl_vec, name_point);
list_rl(rl_vec);
}
if (vm.count("vedes"))
{
vedes_vec v_vec;
vedes_atlag(bejegyzesek, v_vec);
list_atlag(v_vec);
}
return 0;
}
return 0;
}
void create_pv(path_value &pv_map, boost::filesystem::path path)
{
boost::filesystem::directory_iterator it{path}, eod;
BOOST_FOREACH (boost::filesystem::path const &p, std::make_pair(it, eod))
{
if (boost::filesystem::is_regular_file(p))
{
boost::filesystem::ifstream regular_file{p};
std::string temp;
int sum = 0; //aktualis .props erteke
while (getline(regular_file, temp))
{
temp.erase(0, temp.find_last_of('/'));
temp.erase(0, temp.find_first_of(' '));
sum += std::atoi((temp.substr(temp.find_first_of("0123456789"), temp.find_last_of("0123456789"))).c_str());
}
std::string result = p.string();
std::string result_path = result.substr(0, result.find_last_of('/'));
//std::cout << result_path << std::endl;
//pv_map.insert(std::make_pair(result, sum));
pv_map[result_path] = sum;
}
else
create_pv(pv_map, p);
}
}
//void accumulate_pv(path_value& pv_map)
//{
// std::cout<<std::accumulate(pv_map.begin(),pv_map.end(),0,[](int value,const path_value::int& p){return value+p.second;});
//}
void create_npd(name_path_date &bejegyzesek, path_value &pv_map, std::string input)
{
boost::tokenizer<boost::escaped_list_separator<char>> tokenizer{input};
boost::tokenizer<boost::escaped_list_separator<char>>::iterator it{tokenizer.begin()};
std::string name = *it;
std::string path = *(++it);
std::string date = *(++it);
path = path.substr(2);
if (!check_bejegyzesek(bejegyzesek, name, path))
bejegyzesek.push_back(Adatok(name, path, date, pv_map["/home/erik/Documents/Programs/"+path]));
}
bool check_bejegyzesek(name_path_date &bejegyzesek, std::string name, std::string path)
{
bool ok = false;
for (name_path_date::iterator it{bejegyzesek.begin()}; it != bejegyzesek.end(); it++)
{
if ((it->get_name() == name) && (it->get_path() == path))
ok = true;
}
return ok;
}
bool check_vedes(vedes_vec &v_vec, std::string name)
{
vedes_vec::iterator it = v_vec.find(name);
if (it != v_vec.end()) return true;
else return false;
}
void vedes_elem(vedes_vec &v_vec, std::string name, int &&points)
{
/*for (auto &it : v_vec)
if (it.get_name() == name)
it.set_stuff(points);
*/
vedes_vec::iterator i = v_vec.find(name);
std::pair<int,int> it = i->second;
//auto& jt = it->second;
it.first++;
it.second += points;
}
void create_np(name_path_date &bejegyzesek, path_value &name_point)
{
for (name_path_date::iterator it{bejegyzesek.begin()}; it != bejegyzesek.end(); it++)
if (name_point.count(it->get_name()) == 0)
name_point.insert(std::make_pair(it->get_name(), it->get_points()));
else
name_point[it->get_name()] += it->get_points();
}
void list_np(path_value &name_point)
{
for (path_value::iterator it{name_point.begin()}; it != name_point.end(); it++)
{
if (it->second)
std::cout << it->first << " " << it->second << std::endl;
}
}
void list_bejegyzesek(name_path_date &bejegyzesek)
{
for (name_path_date::iterator it{bejegyzesek.begin()}; it != bejegyzesek.end(); it++)
if (it->get_name() == "Varga Erik")
std::cout << it->get_name() << " " << it->get_path() << " " << it->get_points() << std::endl;
}
void create_rl(ranglista &rl_vec, path_value &name_point)
{
for (auto &it : name_point)
{
if (it.second > 0)
rl_vec.push_back(Ranglista(it.first, it.second));
}
std::sort(rl_vec.begin(), rl_vec.end());
}
void list_rl(ranglista &rl_vec)
{
for (auto &it : rl_vec)
std::cout << it.get_name() << " " << it.get_points() << std::endl;
}
void vedes_atlag(name_path_date &bejegyzesek, vedes_vec &v_vec)
{
std::string key = "City/Debrecen/Oktatás/Informatika/Programozás/DEIK/Prog1/Labor/Védés/";
for (auto &it : bejegyzesek)
{
if ((it.get_path().find("City/Debrecen/Oktatás/Informatika/Programozás/DEIK/Prog1/Labor/Védés/") != std::string::npos) && (it.get_points()) && (!check_vedes(v_vec, it.get_name())))
v_vec.insert(std::make_pair(it.get_name(),std::make_pair(1,it.get_points())));
else if ((check_vedes(v_vec, it.get_name())) && (it.get_path().find("City/Debrecen/Oktatás/Informatika/Programozás/DEIK/Prog1/Labor/Védés/") != std::string::npos) && (it.get_points()))
vedes_elem(v_vec, it.get_name(), it.get_points());
}
}
void list_atlag(vedes_vec &v_vec)
{
//std::sort(v_vec.begin(), v_vec.end());
Comparator compFunctor = [](std::pair<std::string,std::pair<int,int>> elem1,std::pair<std::string,std::pair<int,int>> elem2)
{
std::pair<int,int> it = elem1.second;
std::pair<int,int> jt = elem2.second;
return it.first < jt.first;
};
std::set<std::pair<std::string,std::pair<int,int>>,Comparator> v_set(v_vec.begin(),v_vec.end(),compFunctor);
//int sum = 0;
//int csum = 0;
for (std::pair<std::string,std::pair<int,int>> it : v_set)
{
std::pair<int,int> jt = it.second;
std::cout << it.first << " " << (jt.second - jt.first) / jt.first<< std::endl;
//sum += it.get_point();
//csum += it.get_count();
//sum = std::accumulate(v_vec.begin(), v_vec.end(), 0, [](int i, Vedes &o) { return i + o.get_point(); });
//csum = std::accumulate(v_vec.begin(), v_vec.end(), 0, [](int i, Vedes &o) { return i + o.get_count(); });
}
//std::cout << (sum - csum) / csum << std::endl;
}
so, as described here
template<
class Key,
class Compare = std::less<Key>,
class Allocator = std::allocator<Key>
> class set;
std::set is an associative container that contains a sorted set of unique objects of type Key.
I cleaned up your code, and made a Minimal, Complete, and Verifiable example,
#include <iostream>
#include <map>
#include <set>
using point_pair = std::pair<int,int>;
using points_map = std::map<std::string, point_pair>;
using points_set_pair = std::pair<std::string, point_pair>;
auto compFunctor = [](const points_set_pair &elem1, const points_set_pair &elem2)
{
return elem1.second.first < elem2.second.first;
};
using points_set = std::set<points_set_pair, decltype(compFunctor)>;
void list_average(const points_map &v_map)
{
points_set v_set(v_map.begin(),v_map.end(),compFunctor);
for (auto &elem : v_set)
{
const point_pair &jt = elem.second;
std::cout << elem.first << " " << (jt.second - jt.first) / jt.first<< "\n";
}
}
Now consider the first version of main
int main()
{
points_map v_map = { {"foo", { 1, 2}}, {"bar", { 3, 4}}};
list_average(v_map);
}
output:
foo 1
bar 0
Now consider the second version of main:
int main()
{
points_map v_map = { {"foo", { 1, 2}}, {"bar", { 1, 4}}};
list_average(v_map);
}
output:
bar 3
See the problem? As .second.first of the elements are both 1, the latter replaces the first. It is not unique. That's the downside of std::set.
So, what then?
Don't use std::set, but use std::vector and std::sort. Example:
#include <iostream>
#include <map>
#include <vector>
#include <algorithm>
using point_pair = std::pair<int,int>;
using points_map = std::map<std::string, point_pair>;
using string_point_pair = std::pair<std::string, point_pair>;
auto compFunctor = [](string_point_pair const &elem1, string_point_pair const &elem2)
{
return
elem1.second.first != elem2.second.first?
elem1.second.first < elem2.second.first:
elem1.second.second < elem2.second.second;
};
void list_average(points_map const &v_map)
{
std::vector<string_point_pair> v_vec(v_map.begin(),v_map.end());
std::sort(v_vec.begin(), v_vec.end(), compFunctor);
for (auto &elem : v_vec)
{
const point_pair &jt = elem.second;
std::cout << elem.first << " " << (jt.second - jt.first) / jt.first<< "\n";
}
}
int main()
{
points_map v_map = { {"foo", { 1, 2}}, {"bar", { 1, 4}}, {"baz", { 2, 4}}};
list_average(v_map);
}
Output:
foo 1
bar 3
baz 1
live demo
I have two loops to iterate a collection:
the first one adds missing elements into the collection.
the second one updates existing instances into the collection.
How can I replace the loops by standard library functions?
// add missing elements into the collection (if any)
for (auto i = collection.size(); i < objectTypes.size() + startIdx; i++)
{
collection.push_back(CNode(i));
}
// update elements of the collection
for (const auto& objectType : objectTypes)
{
collection[startIdx++].SetObjectType(objectType);
}
This question is a further step to this one.
Here a complete sample that compiles:
#include <string>
#include <iostream>
#include <vector>
#include <regex>
class CObject
{
std::string _objectType;
public:
CObject() : _objectType("n/a") {}
void SetObjectType(std::string objectType) { _objectType = objectType; }
std::string GetObjectType() const { return _objectType; }
};
class CNode
{
int _id;
CObject _object;
public:
explicit CNode(int id) : _id(id) {}
void SetObjectType(std::string objectType) { _object.SetObjectType(objectType); }
std::string GetObjectType() const { return _object.GetObjectType(); }
};
std::vector<std::string> SplitLine(std::string const& line, std::string seps)
{
std::regex regxSeps(seps); // the dot character needs to be escaped in a regex
std::sregex_token_iterator rit(line.begin(), line.end(), regxSeps, -1);
return std::vector<std::string>(rit, std::sregex_token_iterator());
}
static int ParseLine(std::string line, std::string seps, size_t startIdx, std::vector<CNode>& collection)
{
if (startIdx > collection.size())
{
throw std::invalid_argument("the start index is out of range");
}
auto objectTypes = SplitLine(line, seps);
for (auto missingIdx = collection.size(); missingIdx < objectTypes.size() + startIdx; missingIdx++)
{
collection.push_back(CNode(missingIdx));
}
for (const auto& objectType : objectTypes)
{
collection[startIdx++].SetObjectType(objectType);
}
return (startIdx - 1);
}
int main()
{
std::string seps = "\\."; // the dot character needs to be escaped in a regex
// 2 3 4 5 6 7 8 9
std::string line = "abc.def.ghi.klm.nop.qrs.tuv.wxyz";
std::vector<CNode> collection{ CNode(0), CNode(1), CNode(2) , CNode(3) , CNode(4) , CNode(5) };
auto startAt = 2;
try
{
auto collection_size = ParseLine(line, seps, startAt, collection);
std::cout << collection_size << std::endl;
for (auto value : collection)
{
std::cout << value.GetObjectType() << std::endl;
}
}
catch (std::invalid_argument& e)
{
std::cout << " out of range exception " << e.what() << std::endl;
}
}
Perhaps something similar to this would work (I have not tested it):
auto i = collection.size();
std::transform (objectTypes.begin(), objectTypes.end(),
std::back_inserter(collection),
[&](const ObjectType& ot) {
CNode ct(++i);
ct.SetObjectType(ot);
return ct;
});
Is there any reason also not to add an objectType parameter to the CNode constructor?
In case someone is interested to get the complete code of the function, below the final solution using the standard library to replace the two loops to insert and update the collection:
// Compute the number of elements to insert and to update
auto numInserts = startIdx + objectTypes.size() - collection.size();
auto numUpdates = collection.size() - startIdx;
// update the elements that already exists in the collection
std::for_each( objectTypes.begin(),
objectTypes.begin() + numUpdates,
[&](const std::string objectType)
{ collection[startIdx++].SetObjectType(objectType); });
// add the missing elements into the collection (if any)
std::transform( objectTypes.end() - numInserts,
objectTypes.end(),
std::back_inserter(collection),
[&](const std::string& objectType)
{ return CNode(++startIdx, objectType); });
Here a complete sample that compiles:
#include <string>
#include <iostream>
#include <vector>
#include <regex>
class CObject
{
std::string _objectType;
public:
CObject() : _objectType("n/a") {}
explicit CObject(std::string objectType) : _objectType(objectType) {};
void SetObjectType(std::string objectType) { _objectType = objectType; }
std::string GetObjectType() const { return _objectType; }
};
class CNode
{
int _id;
CObject _object;
public:
explicit CNode(int id) : _id(id) {}
explicit CNode(int id, std::string objectType) : _id(id), _object(objectType) {}
void SetObjectType(std::string objectType) { _object.SetObjectType(objectType); }
std::string GetObjectType() const { return _object.GetObjectType(); }
};
std::vector<std::string> SplitLine(std::string const& line, std::string seps)
{
std::regex regxSeps(seps); // the dot character needs to be escaped in a regex
std::sregex_token_iterator rit(line.begin(), line.end(), regxSeps, -1);
return std::vector<std::string>(rit, std::sregex_token_iterator());
}
static int ParseLineWithLoops(std::string line, std::string seps, size_t startIdx, std::vector<CNode>& collection)
{
if (startIdx > collection.size())
{
throw std::invalid_argument("the start index is out of range");
}
auto objectTypes = SplitLine(line, seps);
// expand the collection if needed
for (auto idx = collection.size(); idx < objectTypes.size() + startIdx; idx++)
{
collection.push_back(CNode(idx));
}
// update the types of elements into the collection
for (const auto& objectType : objectTypes)
{
collection[startIdx++].SetObjectType(objectType);
}
return (startIdx - 1);
}
static int ParseLineWithStdTransform(std::string line, std::string seps, size_t startIdx, std::vector<CNode>& collection)
{
if (startIdx > collection.size())
{
throw std::invalid_argument("the start index is out of range");
}
auto objectTypes = SplitLine(line, seps);
// Compute the number of elements to insert and to update
auto numInserts = startIdx + objectTypes.size() - collection.size();
auto numUpdates = collection.size() - startIdx;
// update the elements that already exists in the collection
std::for_each( objectTypes.begin(),
objectTypes.begin() + numUpdates,
[&](const std::string objectType) { collection[startIdx++].SetObjectType(objectType); });
// add the missing elements into the collection (if any)
std::transform( objectTypes.end() - numInserts,
objectTypes.end(),
std::back_inserter(collection),
[&](const std::string& objectType) { return CNode(++startIdx, objectType); });
return (collection.size() - 1);
}
int main()
{
std::string seps = "\\."; // the dot character needs to be escaped in a regex
// 2 3 4 5 6 7 8 9
std::string line = "abc.def.ghi.klm.nop.qrs.tuv.wxyz";
auto startAt = 2;
std::vector<CNode> collection1{ CNode(0), CNode(1), CNode(2) , CNode(3) , CNode(4) , CNode(5) };
try
{
auto collection_size = ParseLineWithStdTransform(line, seps, startAt, collection1);
std::cout << collection_size << std::endl;
for (auto value : collection1)
{
std::cout << value.GetObjectType() << std::endl;
}
}
catch (std::invalid_argument& e)
{
std::cout << " out of range exception " << e.what() << std::endl;
}
std::vector<CNode> collection2{ CNode(0), CNode(1), CNode(2) , CNode(3) , CNode(4) , CNode(5) };
try
{
auto collection_size = ParseLineWithLoops(line, seps, startAt, collection2);
std::cout << collection_size << std::endl;
for (auto value : collection2)
{
std::cout << value.GetObjectType() << std::endl;
}
}
catch (std::invalid_argument& e)
{
std::cout << " out of range exception " << e.what() << std::endl;
}
}
I have two lists of pointers to a data structure X, the algorithm is very simple:
It loops over the first list A and try to find the the first matching element in list B. The requirement is to have at least 50k elements in each list:
#include <iostream>
#include <memory>
#include <chrono>
#include <vector>
#include <algorithm>
#include <string>
struct X {
std::string field_1;
std::string field_2;
std::string field_3;
std::string field_4;
X(std::string f1, std::string f2, std::string f3, std::string f4)
: field_1(f1)
, field_2(f2)
, field_3(f3)
, field_4(f4)
{};
bool equal(const std::shared_ptr<X>& x) {
return (x->field_1 == field_1) &&
(x->field_2 == field_2) &&
(x->field_3 == field_3) &&
(x->field_4 == field_4);
};
X *match = nullptr;
};
typedef std::shared_ptr<X> X_ptr;
class Timer
{
public:
Timer(std::string name) : beg_(clock_::now()), name_(name) {}
~Timer() {
std::cout << "Elapsed(" << name_ << "): " << elapsed() << std::endl;
}
void reset() { beg_ = clock_::now(); }
double elapsed() const {
return std::chrono::duration_cast<second_>
(clock_::now() - beg_).count();
}
private:
typedef std::chrono::high_resolution_clock clock_;
typedef std::chrono::duration<double, std::ratio<1> > second_;
std::chrono::time_point<clock_> beg_;
std::string name_;
};
std::string random_string(size_t length)
{
auto randchar = []() -> char
{
const char charset[] =
"0123456789"
"ABCDEFGHIJKLMNOPQRSTUVWXYZ";
const size_t max_index = (sizeof(charset) - 1);
return charset[rand() % max_index];
};
std::string str(length, 0);
std::generate_n(str.begin(), length, randchar);
return str;
}
int main()
{
Timer t("main");
std::vector <X_ptr> list_A;
std::vector <X_ptr> list_B;
const int MAX_ELEM = 50000;
list_A.reserve(MAX_ELEM);
list_B.reserve(MAX_ELEM);
{
Timer t("insert");
for (int i = 0; i < MAX_ELEM; i++) {
list_A.push_back(X_ptr(new X{ random_string(2), random_string(2), random_string(2), random_string(2) }));
list_B.push_back(X_ptr(new X{ random_string(2), random_string(2), random_string(2), random_string(2) }));
}
}
{
Timer t("match");
std::for_each(list_A.begin(), list_A.end(), [list_B](X_ptr& a) {
auto found_b = std::find_if(list_B.begin(), list_B.end(), [a](const X_ptr& b) {
return a->equal(b);
});
if (found_b != list_B.end()) {
a->match = found_b->get();
std::cout << "match OK \n";
}
});
}
}
on my machine the program is running extremly slow:
Elapsed(insert): 0.05566
Elapsed(match): 98.3739
Elapsed(main): 98.452
Would appreciate it if you can think of any other way to optimize it to run faster.
You are using vectors so each lookup into list_B takes O(n), where n is the number of elements in B. This means the total algorithm is O(m*n), if m is the number of elements in list_A. Thus if m and n a similar in size, you have a O(n^2) algorithm. That is too slow for any large n. To fix this, convert list_B into a unordered_map, (you can do this as part of this algorithm as the conversion is O(n)) where an element in the map's key is an element from list B and the value anything, say 0. You can then perform lookups into the map in O(1) time using find() on the map. Thus your algorithm becomes O(n), way better that O(n^2).
For example
std::unordered_map< X_ptr, int > value_map;
Time r t("match");
std::for_each(list_B.begin(), list_B.end(), [&](X_ptr& b) {
value_map[b] = 0;
});
std::for_each(list_A.begin(), list_A.end(), [value_map](X_ptr& a) {
auto found_b = value_map.find( a );
if ( found_b != value_map.end() )
{
a->match = found_b->first.get();
std::cout << "match OK \n";
}
});
}
Your Version:
Elapsed(insert): 0.0758608
Elapsed(match): 182.899
Elapsed(main): 182.991
New Version:
Elapsed(insert): 0.0719907
Elapsed(match): 0.0388562
Elapsed(main): 0.130884
You may use something like the following:
std::sort(list_B.begin(), list_B.end(), deref_less<X>);
{
Timer t("match");
for (const auto& a : list_A) {
auto it = std::lower_bound(list_B.begin(), list_B.end(), a, deref_less<X>);
if (it != list_B.end() && **it == *a) {
a->match = it->get();
std::cout << "match OK \n";
}
}
}
Live example.
I came across one requirement where the record is stored as
Name : Employee_Id : Address
where Name and Employee_Id are supposed to be keys that is, a search function is to be provided on both Name and Employee Id.
I can think of using a map to store this structure
std::map< std:pair<std::string,std::string> , std::string >
// < < Name , Employee-Id> , Address >
but I'm not exactly sure how the search function will look like.
Boost.Multiindex
This is a Boost example
In the above example an ordered index is used but you can use also a hashed index:
#include <boost/multi_index_container.hpp>
#include <boost/multi_index/member.hpp>
#include <boost/multi_index/ordered_index.hpp>
#include <boost/multi_index/hashed_index.hpp>
#include <string>
#include <iostream>
struct employee
{
int id_;
std::string name_;
std::string address_;
employee(int id,std::string name,std::string address):id_(id),name_(name),address_(address) {}
};
struct id{};
struct name{};
struct address{};
struct id_hash{};
struct name_hash{};
typedef boost::multi_index_container<
employee,
boost::multi_index::indexed_by<
boost::multi_index::ordered_unique<boost::multi_index::tag<id>, BOOST_MULTI_INDEX_MEMBER(employee,int,id_)>,
boost::multi_index::ordered_unique<boost::multi_index::tag<name>,BOOST_MULTI_INDEX_MEMBER(employee,std::string,name_)>,
boost::multi_index::ordered_unique<boost::multi_index::tag<address>, BOOST_MULTI_INDEX_MEMBER(employee,std::string,address_)>,
boost::multi_index::hashed_unique<boost::multi_index::tag<id_hash>, BOOST_MULTI_INDEX_MEMBER(employee,int,id_)>,
boost::multi_index::hashed_unique<boost::multi_index::tag<name_hash>, BOOST_MULTI_INDEX_MEMBER(employee,std::string,name_)>
>
> employee_set;
typedef boost::multi_index::index<employee_set,id>::type employee_set_ordered_by_id_index_t;
typedef boost::multi_index::index<employee_set,name>::type employee_set_ordered_by_name_index_t;
typedef boost::multi_index::index<employee_set,name_hash>::type employee_set_hashed_by_name_index_t;
typedef boost::multi_index::index<employee_set,id>::type::const_iterator employee_set_ordered_by_id_iterator_t;
typedef boost::multi_index::index<employee_set,name>::type::const_iterator employee_set_ordered_by_name_iterator_t;
typedef boost::multi_index::index<employee_set,id_hash>::type::const_iterator employee_set_hashed_by_id_iterator_t;
typedef boost::multi_index::index<employee_set,name_hash>::type::const_iterator employee_set_hashed_by_name_iterator_t;
int main()
{
employee_set employee_set_;
employee_set_.insert(employee(1, "Employer1", "Address1"));
employee_set_.insert(employee(2, "Employer2", "Address2"));
employee_set_.insert(employee(3, "Employer3", "Address3"));
employee_set_.insert(employee(4, "Employer4", "Address4"));
// search by id using an ordered index
{
const employee_set_ordered_by_id_index_t& index_id = boost::multi_index::get<id>(employee_set_);
employee_set_ordered_by_id_iterator_t id_itr = index_id.find(2);
if (id_itr != index_id.end() ) {
const employee& tmp = *id_itr;
std::cout << tmp.id_ << ", " << tmp.name_ << ", " << tmp .address_ << std::endl;
} else {
std::cout << "No records have been found\n";
}
}
// search by non existing id using an ordered index
{
const employee_set_ordered_by_id_index_t& index_id = boost::multi_index::get<id>(employee_set_);
employee_set_ordered_by_id_iterator_t id_itr = index_id.find(2234);
if (id_itr != index_id.end() ) {
const employee& tmp = *id_itr;
std::cout << tmp.id_ << ", " << tmp.name_ << ", " << tmp .address_ << std::endl;
} else {
std::cout << "No records have been found\n";
}
}
// search by name using an ordered index
{
const employee_set_ordered_by_name_index_t& index_name = boost::multi_index::get<name>(employee_set_);
employee_set_ordered_by_name_iterator_t name_itr = index_name.find("Employer3");
if (name_itr != index_name.end() ) {
const employee& tmp = *name_itr;
std::cout << tmp.id_ << ", " << tmp.name_ << ", " << tmp .address_ << std::endl;
} else {
std::cout << "No records have been found\n";
}
}
// search by name using an hashed index
{
employee_set_hashed_by_name_index_t& index_name = boost::multi_index::get<name_hash>(employee_set_);
employee_set_hashed_by_name_iterator_t name_itr = index_name.find("Employer4");
if (name_itr != index_name.end() ) {
const employee& tmp = *name_itr;
std::cout << tmp.id_ << ", " << tmp.name_ << ", " << tmp .address_ << std::endl;
} else {
std::cout << "No records have been found\n";
}
}
// search by name using an hashed index but the name does not exists in the container
{
employee_set_hashed_by_name_index_t& index_name = boost::multi_index::get<name_hash>(employee_set_);
employee_set_hashed_by_name_iterator_t name_itr = index_name.find("Employer46545");
if (name_itr != index_name.end() ) {
const employee& tmp = *name_itr;
std::cout << tmp.id_ << ", " << tmp.name_ << ", " << tmp .address_ << std::endl;
} else {
std::cout << "No records have been found\n";
}
}
return 0;
}
If you want to use std::map, you can have two separate containers, each one having adifferent key (name, emp id) and the value should be a pointer the structure, so that you will not have multiple copies of the same data.
Example with tew keys:
#include <memory>
#include <map>
#include <iostream>
template <class KEY1,class KEY2, class OTHER >
class MultiKeyMap {
public:
struct Entry
{
KEY1 key1;
KEY2 key2;
OTHER otherVal;
Entry( const KEY1 &_key1,
const KEY2 &_key2,
const OTHER &_otherVal):
key1(_key1),key2(_key2),otherVal(_otherVal) {};
Entry() {};
};
private:
struct ExtendedEntry;
typedef std::shared_ptr<ExtendedEntry> ExtendedEntrySptr;
struct ExtendedEntry {
Entry entry;
typename std::map<KEY1,ExtendedEntrySptr>::iterator it1;
typename std::map<KEY2,ExtendedEntrySptr>::iterator it2;
ExtendedEntry() {};
ExtendedEntry(const Entry &e):entry(e) {};
};
std::map<KEY1,ExtendedEntrySptr> byKey1;
std::map<KEY2,ExtendedEntrySptr> byKey2;
public:
void del(ExtendedEntrySptr p)
{
if (p)
{
byKey1.erase(p->it1);
byKey2.erase(p->it2);
}
}
void insert(const Entry &entry) {
auto p=ExtendedEntrySptr(new ExtendedEntry(entry));
p->it1=byKey1.insert(std::make_pair(entry.key1,p)).first;
p->it2=byKey2.insert(std::make_pair(entry.key2,p)).first;
}
std::pair<Entry,bool> getByKey1(const KEY1 &key1)
{
const auto &ret=byKey1[key1];
if (ret)
return std::make_pair(ret->entry,true);
return std::make_pair(Entry(),false);
}
std::pair<Entry,bool> getByKey2(const KEY2 &key2)
{
const auto &ret=byKey2[key2];
if (ret)
return std::make_pair(ret->entry,true);
return std::make_pair(Entry(),false);
}
void deleteByKey1(const KEY1 &key1)
{
del(byKey1[key1]);
}
void deleteByKey2(const KEY2 &key2)
{
del(byKey2[key2]);
}
};
int main(int argc, const char *argv[])
{
typedef MultiKeyMap<int,std::string,int> M;
M map1;
map1.insert(M::Entry(1,"aaa",7));
map1.insert(M::Entry(2,"bbb",8));
map1.insert(M::Entry(3,"ccc",9));
map1.insert(M::Entry(7,"eee",9));
map1.insert(M::Entry(4,"ddd",9));
map1.deleteByKey1(7);
auto a=map1.getByKey1(2);
auto b=map1.getByKey2("ddd");
auto c=map1.getByKey1(7);
std::cout << "by key1=2 (should be bbb ): "<< (a.second ? a.first.key2:"Null") << std::endl;
std::cout << "by key2=ddd (should be ddd ): "<< (b.second ? b.first.key2:"Null") << std::endl;
std::cout << "by key1=7 (does not exist): "<< (c.second ? c.first.key2:"Null") << std::endl;
return 0;
}
Output:
by key1=2 (should be bbb ): bbb
by key2=ddd (should be ddd ): ddd
by key1=7 (does not exist): Null
If EmployeeID is the unique identifier, why use other keys? I would use EmployeeID as the internal key everywhere, and have other mappings from external/human readable IDs (such as Name) to it.
C++14 std::set::find non-key searches solution
This method saves you from storing the keys twice, once one the indexed object and secondly on as the key of a map as done at: https://stackoverflow.com/a/44526820/895245
This provides minimal examples of the central technique that should be easier to understand first: How to make a C++ map container where the key is part of the value?
#include <cassert>
#include <set>
#include <vector>
struct Point {
int x;
int y;
int z;
};
class PointIndexXY {
public:
void insert(Point *point) {
sx.insert(point);
sy.insert(point);
}
void erase(Point *point) {
sx.insert(point);
sy.insert(point);
}
Point* findX(int x) {
return *(this->sx.find(x));
}
Point* findY(int y) {
return *(this->sy.find(y));
}
private:
struct PointCmpX {
typedef std::true_type is_transparent;
bool operator()(const Point* lhs, int rhs) const { return lhs->x < rhs; }
bool operator()(int lhs, const Point* rhs) const { return lhs < rhs->x; }
bool operator()(const Point* lhs, const Point* rhs) const { return lhs->x < rhs->x; }
};
struct PointCmpY {
typedef std::true_type is_transparent;
bool operator()(const Point* lhs, int rhs) const { return lhs->y < rhs; }
bool operator()(int lhs, const Point* rhs) const { return lhs < rhs->y; }
bool operator()(const Point* lhs, const Point* rhs) const { return lhs->y < rhs->y; }
};
std::set<Point*, PointCmpX> sx;
std::set<Point*, PointCmpY> sy;
};
int main() {
std::vector<Point> points{
{1, -1, 1},
{2, -2, 4},
{0, 0, 0},
{3, -3, 9},
};
PointIndexXY idx;
for (auto& point : points) {
idx.insert(&point);
}
Point *p;
p = idx.findX(0);
assert(p->y == 0 && p->z == 0);
p = idx.findX(1);
assert(p->y == -1 && p->z == 1);
p = idx.findY(-2);
assert(p->x == 2 && p->z == 4);
}