Determine conversion factors of strings describing units - c++

In one of my project I need to determine the conversion factors of fairly complex units. I was able to write a static conversion function in case of statically defined units using the excellent boost library Boost.Units.
In my case the user enters the type of a conversion at run-time, so that I need a dynamic conversion function. A nice solution should use the already implemented functions in Boost.Units. Is this possible?
My own final solution
After some thoughts I was able to derive the following partial solution to my problem, which is sufficient for my needs. I'm relying on boost-spirit to parse the unit string, making this task indeed very easy. Great library!
Parsing unit strings might be a common task, that others might be interested in. Hence, I'm posting my final solution here including some tests for illustration.
The most important function is here convertUnit computing the conversion factor from one unit to another, if this conversion is possible.
UnitParser.cpp
#include "UnitParser.h"
#pragma warning(push)
#pragma warning(disable: 4512 4100 4503 4127 4348 4459)
#include <map>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/qi_symbols.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/math/constants/constants.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <vector>
#include <algorithm>
using namespace boost;
namespace {
struct modifier_ : spirit::qi::symbols<char, int> {
modifier_() { add("m", -4)("c", -3)("k", 4); }
} modifier;
struct baseUnit_ : spirit::qi::symbols<char, UnitParser::UnitType> {
baseUnit_() {
add
("g", UnitParser::UnitType::GRAM)
("m", UnitParser::UnitType::METER)
("s", UnitParser::UnitType::SECONDS)
("rad", UnitParser::UnitType::RADIANS)
("deg", UnitParser::UnitType::DEGREE)
("N", UnitParser::UnitType::NEWTON)
;
}
} baseUnit;
class UnitParserImpl : public spirit::qi::grammar<std::string::iterator, UnitParser::Units()>
{
public:
UnitParserImpl() : UnitParserImpl::base_type(unitsTop_)
{
using namespace boost::spirit::qi;
unitsTop_ = units_.alias();
units_ = (unit_ % '*');
unit_ = (-(modifier >> &baseUnit) >> baseUnit >> -(lexeme["^"] >> int_ ))[_val = boost::phoenix::construct<UnitParser::Unit>(_2, _3, _1)];
}
spirit::qi::rule<std::string::iterator, UnitParser::Units()> unitsTop_;
spirit::qi::rule<std::string::iterator, UnitParser::Units()> units_;
spirit::qi::rule<std::string::iterator, UnitParser::Unit()> unit_;
};
}
boost::optional<UnitParser::Units> UnitParser::parse(const std::string& expression, std::string&& errorMessage)
{
boost::optional<UnitParser::Units> result;
try {
Units units;
std::string formula = expression;
auto b = formula.begin();
auto e = formula.end();
UnitParserImpl parser;
bool ok = spirit::qi::phrase_parse(b, e, parser, spirit::qi::space, units);
if (!ok || b != e) {
return result;
}
result = units;
return result;
}
catch (const spirit::qi::expectation_failure<std::string::iterator>& except) {
errorMessage = except.what();
return result;
}
}
std::map<UnitParser::UnitType, UnitParser::Dimension> dimMap() {
std::map<UnitParser::UnitType, UnitParser::Dimension> ret;
ret[UnitParser::UnitType::SECONDS] = UnitParser::Dimension({ 0,1,0,0 });
ret[UnitParser::UnitType::METER] = UnitParser::Dimension({ 1,0,0,0 });
ret[UnitParser::UnitType::DEGREE] = UnitParser::Dimension({ 0,0,1,0 });
ret[UnitParser::UnitType::RADIANS] = UnitParser::Dimension({ 0,0,1,0 });
ret[UnitParser::UnitType::GRAM] = UnitParser::Dimension({ 0,0,0,1 });
ret[UnitParser::UnitType::NEWTON] = UnitParser::Dimension({ 1,-2,0,1 });
return ret;
}
UnitParser::Dimension UnitParser::getDimension(const UnitParser::Units& units)
{
auto map = dimMap();
UnitParser::Dimension ret;
for (auto unit : units) {
if (map.find(unit.unitType) != map.end()) {
auto dim=map[unit.unitType];
auto exp = unit.exponent;
ret.length += exp*dim.length;
ret.time += exp*dim.time;
ret.weigth += exp*dim.weigth;
ret.planarAngle += exp*dim.planarAngle;
}
}
return ret;
}
bool UnitParser::equalDimension(const Units& u1, const Units& u2)
{
return getDimension(u1) == getDimension(u2);
}
bool UnitParser::checkDimension(const UnitParser::Units& u1, const UnitParser::Units& u2)
{
return true;
}
// Bezogen auf die Einheiten: m,s,kg,rad
std::pair<double,int> UnitParser::getScale(const Units& units)
{
double ret = 1.;
int exp = 0;
for (auto unit : units) {
double scale = 1;
int e = 0;
if (unit.unitType==UnitType::DEGREE) {
scale = 180./boost::math::constants::pi<double>();
}
if (unit.unitType == UnitType::GRAM) {
e = unit.exponent*(unit.modifier-4);
}
else {
e = unit.exponent*unit.modifier;
}
exp += e;
ret *= scale;
}
return{ ret, exp };
}
boost::optional<double> UnitParser::convertUnit(const std::string& unitString1, const std::string& unitString2, std::string&& errorMessage)
{
boost::optional<double> ret;
auto unit1 = parse(unitString1);
auto unit2 = parse(unitString2);
if (!unit1) { errorMessage = unitString1 + " is not valid!"; return ret; }
if (!unit2) { errorMessage = unitString2 + " is not valid!"; return ret; }
if (!equalDimension(*unit1, *unit2)) {
errorMessage = "Dimensions of " + unitString1 + " and " + unitString2 + " mismatch!"; return ret;
}
auto s1 = getScale(*unit1);
auto s2 = getScale(*unit2);
int exp = s1.second - s2.second;
double scale = s1.first / s2.first;
ret = scale*std::pow(10, exp);
return ret;
}
UnitParser.h
#pragma once
#include <boost/optional.hpp>
#include <vector>
namespace UnitParser {
enum class UnitType {
SECONDS, METER, DEGREE, RADIANS, GRAM, NEWTON
};
struct Unit {
Unit() {}
Unit(const UnitType& unitType, const boost::optional<int> exponent, const boost::optional<int>& modifier) : unitType(unitType), exponent(exponent.value_or(1)), modifier(modifier.value_or(0)) {}
UnitType unitType;
int exponent;
int modifier;
};
typedef std::vector<Unit> Units;
struct Dimension {
Dimension() {};
Dimension(int length, int time, int planarAngle, int weigth) : length(length), time(time), planarAngle(planarAngle), weigth(weigth) {}
int length = 0;
int time = 0;
int planarAngle = 0;
int weigth = 0;
bool operator==(const UnitParser::Dimension& dim) {
return length == dim.length && planarAngle == dim.planarAngle && time == dim.time && weigth == dim.weigth;
}
};
boost::optional<Units> parse(const std::string& string, std::string&& errorMessage=std::string());
Dimension getDimension(const Units& units);
bool equalDimension(const Units& u1, const Units& u2);
bool checkDimension(const Units& u1, const Units& u2);
std::pair<double,int> getScale(const Units& u1);
boost::optional<double> convertUnit(const std::string& unitString1, const std::string& unitString2, std::string&& errorMessage=std::string());
}
UnitParserCatch.cpp
#define CATCH_CONFIG_MAIN
#include "catch.h"
#include "UnitParser.h"
#include <boost/math/constants/constants.hpp>
using namespace UnitParser;
TEST_CASE("ConvertUnit", "[UnitParser]") {
SECTION("Simple") {
auto s = convertUnit("mm^2", "cm^2"); // 1*mm^2 = 0.01*cm^2
REQUIRE(s);
CHECK(*s == 0.01);
}
SECTION("Newton") {
auto s = convertUnit("N", "kg*m*s^-2");
REQUIRE(s);
CHECK(*s == 1.);
}
SECTION("Wrong") {
std::string err;
auto s = convertUnit("m", "m*kg", std::move(err));
REQUIRE(!s);
CHECK(!err.empty());
}
}
TEST_CASE("Dimension", "[UnitParser]") {
SECTION("Simple") {
auto a=*parse("mm^2");
auto dim=getDimension(a);
CHECK(dim == Dimension(2, 0, 0, 0));
}
SECTION("Newton") {
auto a = *parse("mN^2");
auto dim = getDimension(a);
CHECK(dim == Dimension(2, -4, 0, 2));
}
SECTION("Fits") {
auto a = *parse("mm^2");
auto b = *parse("cm^2");
auto fits = equalDimension(a, b);
CHECK(fits);
}
SECTION("Newton") {
auto a = *parse("N");
auto b = *parse("kg*m*s^-2");
auto fits = equalDimension(a, b);
CHECK(fits);
}
SECTION("NoFit") {
auto a = *parse("mm^2*g");
auto b = *parse("cm^2");
auto fits = equalDimension(a, b);
CHECK(!fits);
}
}
TEST_CASE("Scale", "[UnitParser]") {
SECTION("Length") {
auto s = getScale(*parse("mm^2")); // 1*mm^2=1e-8*m^2
CHECK(s == std::make_pair(1., -8));
}
SECTION("Degree") {
auto s = getScale(*parse("deg"));
CHECK(s == std::make_pair(180. / boost::math::constants::pi<double>(),0));
}
SECTION("Complex") {
auto s = getScale(*parse("km^2*kg"));
CHECK(s == std::make_pair(1., 8));
}
}
TEST_CASE("Simple", "[UnitParser]") {
SECTION("Complex") {
SECTION("Full") {
auto u = parse("mm^2");
CHECK(u);
}
SECTION("Many") {
auto u = parse("mm^2*ms^-1");
CHECK(u);
}
}
SECTION("Units") {
SECTION("Newton") {
auto u = parse("N");
CHECK(u);
}
SECTION("Meter") {
auto u = parse("m");
CHECK(u);
}
SECTION("Seconds") {
auto u = parse("s");
CHECK(u);
SECTION("Exponent") {
CHECK(parse("s^2"));
CHECK(parse("ms^-2"));
CHECK(parse("ks^-3"));
}
}
SECTION("PlanarAngle") {
auto u = parse("deg");
CHECK(u);
}
}
}

Related

Is there anything like C++ default object method

I have the following templated merge sort program:
#include <iostream>
#include <vector>
#include <string>
// trying to create a default method call
class CInstance {
private:
std::string str_;
public:
CInstance(const std::string& str) : str_(str) {}
bool const operator>(const CInstance& that){ return (this->str_.size() > that.str_.size());}
};
template<class T>
class CObj {
private:
T val;
public:
CObj(const T n) : val(n) {}
T Get() { return val; }
};
template<class T>
using vcobj = std::vector<CObj<T>>;
template<class T>
void display(vcobj<T>& v) {
for (auto &i : v) {
std::cout << i.Get() << " ";
}
std::cout << "\n";
}
template<class T>
vcobj<T> Merge(vcobj<T>& lv, vcobj<T>& rv) {
vcobj<T> ret;
auto lsize = lv.size();
auto rsize = rv.size();
unsigned int lpin = 0,
rpin = 0;
while(lpin < lsize && rpin < rsize) {
if(lv.at(lpin).Get() > rv.at(rpin).Get()) {
ret.emplace_back(rv.at(rpin).Get());
rpin++;
}
else {
ret.emplace_back(lv.at(lpin).Get());
lpin++;
}
}
for (auto i=lpin; i<lsize; i++) {
ret.emplace_back(lv.at(i).Get());
}
for (auto i=rpin; i<rsize; i++) {
ret.emplace_back(rv.at(i).Get());
}
return ret;
}
template<class T>
vcobj<T> Sort(const vcobj<T>& v) {
vcobj<T> ret;
auto size = v.size();
if(size == 0) {
return ret;
}
if(size > 1) {
auto mid = size / 2;
vcobj<T> l(v.begin(), v.begin()+mid);
auto lv = Sort(l);
vcobj<T> r(v.begin()+mid, v.end());
auto rv = Sort(r);
ret = Merge(lv, rv);
}
else {
ret = v;
}
return ret;
}
int main() {
{
vcobj<int> v = {4, 5, 2, 1, 9, 6, 10, 8, 15, 3, 7};
display(v);
auto sorted = Sort(v);
display(sorted);
}
{
vcobj<float> v = {0.01, 0.001, 0.002, 0.009, 0.010, 0.0003, 0.00001};
display(v);
auto sorted = Sort(v);
display(sorted);
}
{
vcobj<std::string> v = {{"pineapple"}, {"jackfruit"}, {"mango"}, {"apple"}, {"banana"}};
display(v);
auto sorted = Sort(v);
display(sorted);
}
// causing problem
{
vcobj<CInstance> v = {{"pineapple"}, {"jackfruit"}, {"mango"}, {"apple"}, {"banana"}};
display(v);
auto sorted = Sort(v);
display(sorted);
}
return 0;
}
In all of the above types, I can simply call the object and it extracts the data which looks like calling a default get() method. Is there a way to make objects of class CInstance trigger a methos, when used just alone.
example:
I could do something like
CInstance obj;
std::cout << obj;
And that will call a default method in CInstance what every it may be.
As already mentioned in the other answer you can create your own operator<< function:
std::ostream & operator<<(std::ostream &stream, const CInstance &obj) {
// stream << whatever you want to output
return stream;
}
You could also define a conversion operator. But you should think twice before you use them. They can lead to problems that are not easy to debug, especially when explicit is omitted. You generally should not use those for logging/debugging purposes. If your type represents a string and you use it to allow an easy conversion to an std::string then it might be fine.
#include <iostream>
#include <string>
class CInstance {
std::string str_ = "test";
public:
explicit operator const std::string () const { return str_; }
};
int main() {
CInstance obj;
std::cout << (std::string)obj << std::endl;
return 0;
}
If you can guarantee that the lifetime of the returned const char * is still valid after the call you could also do something like (but I would avoid that solution):
#include <iostream>
#include <string>
class CInstance {
std::string str_ = "test";
public:
operator const char *() const { return str_.c_str(); }
};
int main() {
CInstance t;
std::cout << t << std::endl;
return 0;
}
Personally, I would go with the first solution. But that really depends if you actually have a string representation of CInstance or if you want to display something for debugging purposes in a different format. I however would avoid the last non-explicit version with the const char * conversion operator.
In this exact case, you define an operator<< method like so:
std::ostream & operator<<(std::ostream &stream, const CInstance &obj) {
... output obj however you want to the stream. For instance:
stream << obj.getAge();
return stream;
}

Find element in boost multi_index_container

In my code I need to have a functionality to iterate over all elements and check if there some element already exists possibly as soon as possible, so my choice fell on boost multi index container where I can use vector and unordered_set interface for my class Animal at the same time. The problem is that I am not able to find some element through unordered_set interface since I replaced key from std::string to std::array<char, 50> and adjusted the code, and I don't know what I am doing wrong ?
code:
https://wandbox.org/permlink/dnCaEzYVdXkTFBGo
#include <array>
#include <algorithm>
#include <iostream>
#include <chrono>
#include <string>
#include <vector>
#include <list>
#include <map>
#include <set>
#include <unordered_map>
#include <unordered_set>
#include <memory>
#include <boost/multi_index_container.hpp>
#include <boost/multi_index/ordered_index.hpp>
#include <boost/multi_index/composite_key.hpp>
#include <boost/multi_index/hashed_index.hpp>
#include <boost/multi_index/sequenced_index.hpp>
#include <boost/multi_index/random_access_index.hpp>
#include <boost/multi_index/member.hpp>
#include <boost/multi_index/identity.hpp>
int constexpr elements_size{ 1'000'000 };
struct Animal
{
Animal(std::string name, std::string description, int leg, int age, double maxSpeed) noexcept :
description_{std::move(description)}, leg_{leg}, age_{age}, maxSpeed_{maxSpeed}
{
std::copy(name.begin(), name.end(), name_.data());
}
Animal(std::string const& name, std::string const& description) noexcept :
description_{description}
{
std::copy(name.begin(), name.end(), name_.data());
}
Animal(Animal&& animal) noexcept
{
name_ = name_;
description_ = std::move(animal).description_;
leg_ = animal.leg_;
age_ = animal.age_;
maxSpeed_ = animal.maxSpeed_;
}
Animal(Animal const& animal) noexcept
{
name_ = animal.name_;
description_ = animal.description_;
leg_ = animal.leg_;
age_ = animal.age_;
maxSpeed_ = animal.maxSpeed_;
}
Animal& operator=(Animal&& animal) noexcept
{
name_ = name_;
description_ = std::move(animal).description_;
leg_ = animal.leg_;
age_ = animal.age_;
maxSpeed_ = animal.maxSpeed_;
return *this;
}
Animal& operator=(Animal const& animal) noexcept
{
name_ = animal.name_;
description_ = animal.description_;
leg_ = animal.leg_;
age_ = animal.age_;
maxSpeed_ = animal.maxSpeed_;
return *this;
}
std::array<char, 50> name_;
std::string description_;
int leg_{0};
int age_{0};
double maxSpeed_{0.0};
};
struct Hasher
{
bool print_;
Hasher(bool print = false): print_{print} {}
std::size_t operator()(std::array<char, 50> const& name) const
{
if (print_)
std::cout << "array hash" << std::hash<std::string_view>{}(name.data()) << std::endl;
return std::hash<std::string_view>{}(name.data());
}
std::size_t operator()(std::string const& name) const
{
if (print_)
std::cout << "string hash" << std::hash<std::string_view>{}(name.c_str()) << std::endl;
return std::hash<std::string_view>{}(name.c_str());
}
std::size_t operator()(const char* name) const
{
if (print_)
std::cout << "char hash" << std::hash<std::string_view>{}(name) << std::endl;
return std::hash<std::string_view>{}(name);
}
};
struct KeysComparator
{
bool operator()(std::array<char, 50> const& a1, std::array<char, 50> const& a2) const {return a1 == a2; }
template <typename T>
bool operator()(std::string const& n1, T const& t) const
{
std::cout << "### value.name_" << t.value.name_.data() << ", n1: " << n1 << std::endl;
return n1 == t.value.name_.data();
}
};
template<typename TimePoint>
std::string getElapsedTime(TimePoint const& start, TimePoint const& end)
{
auto micro = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
auto milli = std::chrono::duration_cast<std::chrono::milliseconds>(micro);
auto sec = std::chrono::duration_cast<std::chrono::seconds>(milli);
return {std::to_string(micro.count()) + " µs, " + std::to_string(milli.count()) + " ms, " + std::to_string(sec.count()) + " s"};
}
template<typename TimePoint>
void printStatistics(TimePoint const& emplace_start, TimePoint const& emplace_end, TimePoint const& iterate_start, TimePoint const& iterate_end,
TimePoint const& find_start, TimePoint const& find_end, intmax_t const sum, std::string target)
{
std::cout << "Elapsed time emplace: " << getElapsedTime(emplace_start, emplace_end)
<< " | iterate: " << getElapsedTime(iterate_start, iterate_end)
<< " | find: " << getElapsedTime(find_start, find_end)
<< ", sum:" << sum << " , calculation for " << target << std::endl;
}
void test()
{
using namespace boost::multi_index;
using Animal_multi = multi_index_container<Animal, indexed_by<
random_access<>,
hashed_unique<
composite_key<Animal, member<Animal, std::array<char, 50>, &Animal::name_>>,
composite_key_hash<Hasher>,
composite_key_equal_to<KeysComparator>>
>>;
Animal_multi container;
auto emplace_start = std::chrono::steady_clock::now();
for (auto i = 0; i < elements_size; ++i)
container.emplace_back("the really long name of some animal 12345678910_" + std::to_string(i),
"bla bla bla bla bla bla bla bla bla bla bla bla bla", 4, i, i + 2);
auto emplace_end = std::chrono::steady_clock::now();
intmax_t sum{0};
auto iterate_start = std::chrono::steady_clock::now();
for (auto const& e : container)
sum += e.age_;
auto iterate_end = std::chrono::steady_clock::now();
KeysComparator key_comparator;
Hasher hasher{true};
auto find_start = std::chrono::steady_clock::now();
auto &container_interface = container.get<1>();
auto isSucceeded = container_interface.count("the really long name of some animal 12345678910_" + std::to_string(elements_size-1),
hasher, key_comparator);
if (not isSucceeded)
std::cout << "WARN: Element has not been found." << std::endl;
auto find_end = std::chrono::steady_clock::now();
printStatistics(emplace_start, emplace_end, iterate_start, iterate_end, find_start, find_end, sum, "Animal_multi (boost multi_index)");
}
int main()
{
test();
return 0;
}
There are a number of bugs like in the move constructor:
name_ = name_; // oops this does nothing at all
Just follow Rule Of Zero. This will also inform you that std::string copy/assignment are not noexcept.
The name copy should probably be length-limited:
std::copy_n(name.begin(), std::min(name.size(), name_.size()), name_.data());
At this point I notice something that might explain your trouble: you don't NUL-terminate, nor make sure that the array is 0-initialized.
BINGO
Indeed, just a few lines down I spot:
return std::hash<std::string_view>{}(name.data());
That's... UB! Your string_view might contain indeterminate data, but what's worse, you would NEVER have copied the terminating NUL character. So, std::string_view will model a string with indeterminate length which WILL likely exceed 50.
Read here about Nasal Demons (UB)
Such are the perils of skipping standard library types for the old C craft.
First Dig
So, here's the entirety of the class with equal/better characteristics:
using Name = std::array<char, 50>;
struct Animal {
Animal(std::string_view name, std::string description,
int leg = 0, int age = 0, double maxSpeed = 0) noexcept
: name_{0}, // zero initialize!
description_{std::move(description)},
leg_{leg},
age_{age},
maxSpeed_{maxSpeed}
{
constexpr auto Capacity = std::tuple_size<Name>::value;
constexpr auto MaxLen = Capacity - 1; // reserve NUL char
assert(name.length() < MaxLen);
std::copy_n(name.data(), std::min(name.length(), MaxLen), name_.data());
}
//Animal ( Animal&& animal ) noexcept = default;
//Animal ( Animal const& animal ) = default;
//Animal& operator= ( Animal&& animal ) noexcept = default;
//Animal& operator= ( Animal const& animal ) = default;
Name name_;
std::string description_;
int leg_{0};
int age_{0};
double maxSpeed_{0.0};
};
Improving: FixedString
This just screams for a better Name type. How about, FixedString:
template <size_t N> struct FixedString {
static_assert(N > 1); // require space for NUL char
FixedString(std::string_view s) : data_{0} {
if (s.length() >= N)
throw std::length_error("FixedString");
std::copy_n(s.data(), std::min(s.length(), N - 1), data());
}
std::string_view str() const { return { data(), size() }; }
operator std::string_view() const { return str(); }
auto data() const { return data_.data(); }
auto data() { return data_.data(); }
auto c_str() const { return data_.data(); }
auto c_str() { return data_.data(); }
auto begin() const { return data_.begin(); }
auto end() const { return data_.end(); }
auto begin() { return data_.begin(); }
auto end() { return data_.end(); }
size_t size() const {
auto terminator = std::memchr(data(), 0, data_.max_size());
return terminator
? static_cast<char const*>(terminator) - data()
: data_.max_size();
};
bool operator<(FixedString const& rhs) const { return str() < rhs.str(); }
bool operator==(FixedString const& rhs) const { return str() == rhs.str(); }
bool operator!=(FixedString const& rhs) const { return str() != rhs.str(); }
// optimizations:
bool operator<(std::string_view const& rhs) const { return str() < rhs.substr(0, N-1); }
bool operator==(std::string_view const& rhs) const { return str() == rhs.substr(0, N-1); }
bool operator!=(std::string_view const& rhs) const { return str() != rhs.substr(0, N-1); }
private:
std::array<char, N> data_;
};
Now you can simply
using Name = FixedString<50>;
And all your Names will magically (and safely) convert to and from string views.
using Name = FixedString<50>;
struct Animal {
Animal(std::string_view name, std::string description,
int leg = 0, int age = 0, double maxSpeed = 0) noexcept
: name_{name}, description_{std::move(description)},
leg_{leg}, age_{age}, maxSpeed_{maxSpeed}
{ }
Name name_;
std::string description_;
int leg_{0};
int age_{0};
double maxSpeed_{0.0};
};
Everything Simplifies With The Right Abstraction
This is the most important lesson I think I learned in my programming career: choosing the right abstraction leads to simplicity. Here, we evaporate two messy helpers:
using Hasher = std::hash<std::string_view>;
using KeysComparator = std::equal_to<Name>;
Boom. They do everything you had, but better.
Now, The Missing Element
After simplifying the whole thing to this it should become pretty obvious that a std::array<char, 50> can never correctly contain names longer than 50 characters. Indeed, checking the insertions:
auto emplace_start = Now();
size_t duplicates = 0;
for (auto i = 0; i < elements_size; ++i) {
auto [_, ok] = container.emplace_back(
make_name(i), "bla bla bla bla bla bla bla bla bla bla bla bla bla",
4, i, i + 2);
if (!ok) ++duplicates;
}
if (duplicates) {
std::cerr << "Oops, " << duplicates << " duplicate keys not inserted\n";
}
auto emplace_end = Now();
Reveals that:
Oops, 999990 duplicate keys not inserted
Elapsed time emplace: 116.491ms iterate: 0.000145ms find: 0.000597ms, sum:45 , calculation for Animal_multi (boost multi_index)
At least, now you replaced Undefined
Behaviour with
constraint checks.
Of course, just increasing the name capacity fixes it: [https://wandbox.org/permlink/6AamJfXe76nYALfR)
using Name = FixedString<60>;
Prints:
Elapsed time emplace: 594.475ms iterate: 18.6076ms find: 0.003138ms, sum:499999500000 , calculation for Animal_multi (boost multi_index)
Alternatively you can throw on Name construction with an overly long name: Live On Wandbox
FixedString(std::string_view s) : data_{0} {
if (s.length() >= N)
throw std::length_error("FixedString");
std::copy_n(s.data(), std::min(s.length(), N - 1), data());
}
Which duly prints
terminate called after throwing an instance of 'std::length_error'
what(): FixedString
Full Listing
This demo uses FixedString<60> to avoid the key errors:
#include <boost/multi_index_container.hpp>
#include <boost/multi_index/hashed_index.hpp>
#include <boost/multi_index/random_access_index.hpp>
#include <boost/multi_index/member.hpp>
#include <iostream>
#include <iomanip>
#include <chrono>
using namespace std::chrono_literals;
int constexpr elements_size{ 1'000'000 };
template <size_t N> struct FixedString {
static_assert(N > 1); // require space for NUL char
FixedString(std::string_view s) : data_{0} {
if (s.length() >= N)
throw std::length_error("FixedString");
std::copy_n(s.data(), std::min(s.length(), N - 1), data());
}
std::string_view str() const { return { data(), size() }; }
operator std::string_view() const { return str(); }
auto data() const { return data_.data(); }
auto data() { return data_.data(); }
auto c_str() const { return data_.data(); }
auto c_str() { return data_.data(); }
auto begin() const { return data_.begin(); }
auto end() const { return data_.end(); }
auto begin() { return data_.begin(); }
auto end() { return data_.end(); }
size_t size() const {
auto terminator = std::memchr(data(), 0, data_.max_size());
return terminator
? static_cast<char const*>(terminator) - data()
: data_.max_size();
};
bool operator<(std::string_view const& rhs) const { return str() < rhs.substr(0, N-1); }
bool operator==(std::string_view const& rhs) const { return str() == rhs.substr(0, N-1); }
bool operator!=(std::string_view const& rhs) const { return str() != rhs.substr(0, N-1); }
bool operator<(FixedString const& rhs) const { return str() < rhs.str(); }
bool operator==(FixedString const& rhs) const { return str() == rhs.str(); }
bool operator!=(FixedString const& rhs) const { return str() != rhs.str(); }
private:
std::array<char, N> data_;
};
using Name = FixedString<60>;
struct Animal {
Animal(std::string_view name, std::string description,
int leg = 0, int age = 0, double maxSpeed = 0) noexcept
: name_{name}, description_{std::move(description)},
leg_{leg}, age_{age}, maxSpeed_{maxSpeed}
{ }
Name name_;
std::string description_;
int leg_{0};
int age_{0};
double maxSpeed_{0.0};
};
using Hasher = std::hash<std::string_view>;
using KeysComparator = std::equal_to<Name>;
using Clock = std::chrono::steady_clock;
using Duration = Clock::duration;
static auto Now = Clock::now;
void printStatistics(Duration emplace, Duration iterate, Duration find,
intmax_t const sum, std::string target)
{
std::cout << "Elapsed time"
<< " emplace: " << (emplace/1.0ms) << "ms"
<< " iterate: " << (iterate/1.0ms) << "ms"
<< " find: " << (find/1.0ms) << "ms"
<< ", sum:" << sum
<< " , calculation for " << target
<< std::endl;
}
void test() {
namespace bmi = boost::multi_index;
using Animal_multi = bmi::multi_index_container<Animal,
bmi::indexed_by<
bmi::random_access<>,
bmi::hashed_unique<
bmi::tag<struct by_name>,
bmi::member<Animal, Name, &Animal::name_>, Hasher, KeysComparator>
>
>;
Animal_multi container;
auto make_name = [](size_t id) {
return "the really long name of some animal 12345678910_" + std::to_string(id);
};
auto emplace_start = Now();
size_t duplicates = 0;
for (auto i = 0; i < elements_size; ++i) {
auto [_, ok] = container.emplace_back(
make_name(i), "bla bla bla bla bla bla bla bla bla bla bla bla bla",
4, i, i + 2);
if (!ok) ++duplicates;
}
if (duplicates) {
std::cerr << "Oops, " << duplicates << " duplicate keys not inserted\n";
}
auto emplace_end = Now();
intmax_t sum{ 0 };
auto iterate_start = Now();
for (auto const& e : container) {
sum += e.age_;
}
auto iterate_end = Now();
auto find_start = Now();
{
auto& name_idx = container.get<by_name>();
auto last_key = make_name(elements_size - 1);
if (name_idx.count(std::string_view(last_key)) == 0u) {
std::cout << "WARN: Element has not been found." << std::endl;
}
}
auto find_end = Now();
printStatistics(
emplace_end - emplace_start,
iterate_end - iterate_start,
find_end - find_start, sum,
"Animal_multi (boost multi_index)");
}
int main() { test(); }

Spirit X3: parser with internal state

I want to efficiently parse large CSV-like files, whose order of columns I get at runtime. With Spirit Qi, I would parse each field with a lazy auxiliary parser that would select at runtime which column-specific parser to apply to each column. But X3 doesn't seem to have lazy (despite that it's listed in documentation). After reading recommendations here on SO, I've decided to write a custom parser.
It ended up being pretty nice, but now I've noticed I don't really need the pos variable be exposed anywhere outside the custom parser itself. I've tried putting it into the custom parser itself and started getting compiler errors stating that the column_value_parser object is read-only. Can I somehow put pos into the parser structure?
Simplified code that gets the compile-time error, with commented out parts of my working version:
#include <iostream>
#include <variant>
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/support.hpp>
namespace helpers {
// https://bitbashing.io/std-visit.html
template<class... Ts> struct overloaded : Ts... { using Ts::operator()...; };
template<class... Ts> overloaded(Ts...) -> overloaded<Ts...>;
}
auto const unquoted_text_field = *(boost::spirit::x3::char_ - ',' - boost::spirit::x3::eol);
struct text { };
struct integer { };
struct real { };
struct skip { };
typedef std::variant<text, integer, real, skip> column_variant;
struct column_value_parser : boost::spirit::x3::parser<column_value_parser> {
typedef boost::spirit::unused_type attribute_type;
std::vector<column_variant>& columns;
// size_t& pos;
size_t pos;
// column_value_parser(std::vector<column_variant>& columns, size_t& pos)
column_value_parser(std::vector<column_variant>& columns)
: columns(columns)
// , pos(pos)
, pos(0)
{ }
template<typename It, typename Ctx, typename Other, typename Attr>
bool parse(It& f, It l, Ctx& ctx, Other const& other, Attr& attr) const {
auto const saved_f = f;
bool successful = false;
visit(
helpers::overloaded {
[&](skip const&) {
successful = boost::spirit::x3::parse(f, l, boost::spirit::x3::omit[unquoted_text_field]);
},
[&](text& c) {
std::string value;
successful = boost::spirit::x3::parse(f, l, unquoted_text_field, value);
if(successful) {
std::cout << "Text: " << value << '\n';
}
},
[&](integer& c) {
int value;
successful = boost::spirit::x3::parse(f, l, boost::spirit::x3::int_, value);
if(successful) {
std::cout << "Integer: " << value << '\n';
}
},
[&](real& c) {
double value;
successful = boost::spirit::x3::parse(f, l, boost::spirit::x3::double_, value);
if(successful) {
std::cout << "Real: " << value << '\n';
}
}
},
columns[pos]);
if(successful) {
pos = (pos + 1) % columns.size();
return true;
} else {
f = saved_f;
return false;
}
}
};
int main(int argc, char *argv[])
{
std::string input = "Hello,1,13.7,XXX\nWorld,2,1e3,YYY";
// Comes from external source.
std::vector<column_variant> columns = {text{}, integer{}, real{}, skip{}};
size_t pos = 0;
boost::spirit::x3::parse(
input.begin(), input.end(),
// (column_value_parser(columns, pos) % ',') % boost::spirit::x3::eol);
(column_value_parser(columns) % ',') % boost::spirit::x3::eol);
}
XY: My goal is to parse ~500 GB of pseudo-CSV files in a reasonable time on a machine with little RAM, convert into a list of (roughly) [row-number, column-name, value], then put into storage. The format is actually a little more complex than CSV: database dumps formatted in… human-friendly way, with column values being actually several small sublangauges (e.g. dates or, uh, something similar to whole apache log lines stuffed into a single field), and I'm often extracting only one specific part of each column. Different files may have different columns and in different order, which I can only learn by parsing yet another set of files containing original queries. Thankfully, Spirit makes it a breeze…
Three answers:
The easiest fix is to make pos a mutable member
The X3 hardcore answer is x3::with<>
Functional composition
1. Making pos mutable
Live On Wandbox
#include <iostream>
#include <variant>
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/support.hpp>
namespace helpers {
// https://bitbashing.io/std-visit.html
template<class... Ts> struct overloaded : Ts... { using Ts::operator()...; };
template<class... Ts> overloaded(Ts...) -> overloaded<Ts...>;
}
auto const unquoted_text_field = *(boost::spirit::x3::char_ - ',' - boost::spirit::x3::eol);
struct text { };
struct integer { };
struct real { };
struct skip { };
typedef std::variant<text, integer, real, skip> column_variant;
struct column_value_parser : boost::spirit::x3::parser<column_value_parser> {
typedef boost::spirit::unused_type attribute_type;
std::vector<column_variant>& columns;
size_t mutable pos = 0;
struct pos_tag;
column_value_parser(std::vector<column_variant>& columns)
: columns(columns)
{ }
template<typename It, typename Ctx, typename Other, typename Attr>
bool parse(It& f, It l, Ctx& /*ctx*/, Other const& /*other*/, Attr& /*attr*/) const {
auto const saved_f = f;
bool successful = false;
visit(
helpers::overloaded {
[&](skip const&) {
successful = boost::spirit::x3::parse(f, l, boost::spirit::x3::omit[unquoted_text_field]);
},
[&](text&) {
std::string value;
successful = boost::spirit::x3::parse(f, l, unquoted_text_field, value);
if(successful) {
std::cout << "Text: " << value << '\n';
}
},
[&](integer&) {
int value;
successful = boost::spirit::x3::parse(f, l, boost::spirit::x3::int_, value);
if(successful) {
std::cout << "Integer: " << value << '\n';
}
},
[&](real&) {
double value;
successful = boost::spirit::x3::parse(f, l, boost::spirit::x3::double_, value);
if(successful) {
std::cout << "Real: " << value << '\n';
}
}
},
columns[pos]);
if(successful) {
pos = (pos + 1) % columns.size();
return true;
} else {
f = saved_f;
return false;
}
}
};
int main() {
std::string input = "Hello,1,13.7,XXX\nWorld,2,1e3,YYY";
std::vector<column_variant> columns = {text{}, integer{}, real{}, skip{}};
boost::spirit::x3::parse(
input.begin(), input.end(),
(column_value_parser(columns) % ',') % boost::spirit::x3::eol);
}
2. x3::with<>
This is similar but with better (re)entrancy and encapsulation:
Live On Wandbox
#include <iostream>
#include <variant>
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/support.hpp>
namespace helpers {
// https://bitbashing.io/std-visit.html
template<class... Ts> struct overloaded : Ts... { using Ts::operator()...; };
template<class... Ts> overloaded(Ts...) -> overloaded<Ts...>;
}
auto const unquoted_text_field = *(boost::spirit::x3::char_ - ',' - boost::spirit::x3::eol);
struct text { };
struct integer { };
struct real { };
struct skip { };
typedef std::variant<text, integer, real, skip> column_variant;
struct column_value_parser : boost::spirit::x3::parser<column_value_parser> {
typedef boost::spirit::unused_type attribute_type;
std::vector<column_variant>& columns;
column_value_parser(std::vector<column_variant>& columns)
: columns(columns)
{ }
template<typename It, typename Ctx, typename Other, typename Attr>
bool parse(It& f, It l, Ctx const& ctx, Other const& /*other*/, Attr& /*attr*/) const {
auto const saved_f = f;
bool successful = false;
size_t& pos = boost::spirit::x3::get<pos_tag>(ctx).value;
visit(
helpers::overloaded {
[&](skip const&) {
successful = boost::spirit::x3::parse(f, l, boost::spirit::x3::omit[unquoted_text_field]);
},
[&](text&) {
std::string value;
successful = boost::spirit::x3::parse(f, l, unquoted_text_field, value);
if(successful) {
std::cout << "Text: " << value << '\n';
}
},
[&](integer&) {
int value;
successful = boost::spirit::x3::parse(f, l, boost::spirit::x3::int_, value);
if(successful) {
std::cout << "Integer: " << value << '\n';
}
},
[&](real&) {
double value;
successful = boost::spirit::x3::parse(f, l, boost::spirit::x3::double_, value);
if(successful) {
std::cout << "Real: " << value << '\n';
}
}
},
columns[pos]);
if(successful) {
pos = (pos + 1) % columns.size();
return true;
} else {
f = saved_f;
return false;
}
}
template <typename T>
struct Mutable { T mutable value; };
struct pos_tag;
auto invoke() const {
return boost::spirit::x3::with<pos_tag>(Mutable<size_t>{}) [ *this ];
}
};
int main() {
std::string input = "Hello,1,13.7,XXX\nWorld,2,1e3,YYY";
std::vector<column_variant> columns = {text{}, integer{}, real{}, skip{}};
column_value_parser p(columns);
boost::spirit::x3::parse(
input.begin(), input.end(),
(p.invoke() % ',') % boost::spirit::x3::eol);
}
3. Functional Composition
Because it's so much easier in X3, my favourite is to just generate the parser on demand.
Without requirements, this is the simplest I'd propose:
Live On Wandbox
#include <boost/spirit/home/x3.hpp>
namespace x3 = boost::spirit::x3;
namespace CSV {
struct text { };
struct integer { };
struct real { };
struct skip { };
auto const unquoted_text_field = *~x3::char_(",\n");
static inline auto as_parser(skip) { return x3::omit[unquoted_text_field]; }
static inline auto as_parser(text) { return unquoted_text_field; }
static inline auto as_parser(integer) { return x3::int_; }
static inline auto as_parser(real) { return x3::double_; }
template <typename... Spec>
static inline auto line_parser(Spec... spec) {
auto delim = ',' | &(x3::eoi | x3::eol);
return ((as_parser(spec) >> delim) >> ... >> x3::eps);
}
template <typename... Spec> static inline auto csv_parser(Spec... spec) {
return line_parser(spec...) % x3::eol;
}
}
#include <iostream>
#include <iomanip>
using namespace CSV;
int main() {
std::string const input = "Hello,1,13.7,XXX\nWorld,2,1e3,YYY";
auto f = begin(input), l = end(input);
auto p = csv_parser(text{}, integer{}, real{}, skip{});
if (parse(f, l, p)) {
std::cout << "Parsed\n";
} else {
std::cout << "Failed\n";
}
if (f!=l) {
std::cout << "Remaining: " << std::quoted(std::string(f,l)) << "\n";
}
}
A version with debug information enabled:
Live On Wandbox
<line>
<try>Hello,1,13.7,XXX\nWor</try>
<CSV::text>
<try>Hello,1,13.7,XXX\nWor</try>
<success>,1,13.7,XXX\nWorld,2,</success>
</CSV::text>
<CSV::integer>
<try>1,13.7,XXX\nWorld,2,1</try>
<success>,13.7,XXX\nWorld,2,1e</success>
</CSV::integer>
<CSV::real>
<try>13.7,XXX\nWorld,2,1e3</try>
<success>,XXX\nWorld,2,1e3,YYY</success>
</CSV::real>
<CSV::skip>
<try>XXX\nWorld,2,1e3,YYY</try>
<success>\nWorld,2,1e3,YYY</success>
</CSV::skip>
<success>\nWorld,2,1e3,YYY</success>
</line>
<line>
<try>World,2,1e3,YYY</try>
<CSV::text>
<try>World,2,1e3,YYY</try>
<success>,2,1e3,YYY</success>
</CSV::text>
<CSV::integer>
<try>2,1e3,YYY</try>
<success>,1e3,YYY</success>
</CSV::integer>
<CSV::real>
<try>1e3,YYY</try>
<success>,YYY</success>
</CSV::real>
<CSV::skip>
<try>YYY</try>
<success></success>
</CSV::skip>
<success></success>
</line>
Parsed
Notes, Caveats:
With anything mutable, beware of side-effects. E.g. if you have a | b and a includes column_value_parser, the side-effect of incrementing pos will not be rolled back when a fails and b is matched instead.
In short, this makes your parse function impure.

Creating std::set copies only one element, how to fix this?

v_map has the correct amount of information stored, however when i try to use std::set it only copies one element ,I assume the first one. This is my first time using std::set , maybe I miss something here...Thanks for your help !
typedef std::map<std::string,std::pair<int,int>> points_map;
void list_average(points_map &v_map)
{
Comparator compFunctor = [](std::pair<std::string,std::pair<int,int>> elem1,std::pair<std::string,std::pair<int,int>> elem2)
{
std::pair<int,int> it = elem1.second;
std::pair<int,int> jt = elem2.second;
return it.first < jt.first;
};
std::set<std::pair<std::string,std::pair<int,int>>,Comparator> v_set(v_map.begin(),v_map.end(),compFunctor);
for (std::pair<std::string,std::pair<int,int>> it : v_set)
{
std::pair<int,int> jt = it.second;
std::cout << it.first << " " << (jt.second - jt.first) / jt.first<< std::endl;
}
}
Note the following is the full program, I apologize in advance for the ugly code , and length of the code ,also I rewrote the name in the upper part of my code, in the full code , this particular function is called list_atlag
#include <iostream>
#include <string>
#include <map>
#include <set>
#include <vector>
#include <codecvt>
#include <iterator>
#include <numeric>
#include <functional>
#include <boost/filesystem.hpp>
#include <boost/foreach.hpp>
#include <boost/program_options.hpp>
#include <boost/tokenizer.hpp>
class Adatok
{
public:
Adatok(std::string name, std::string path, std::string date, int points) : _name(name), _path(path), _date(date), _points(points) {}
Adatok(const Adatok &other) = default;
Adatok &operator=(const Adatok &other) = default;
std::string get_name() { return _name; }
std::string get_path() { return _path; }
std::string get_date() { return _date; }
int get_points() { return _points; }
private:
std::string _name;
std::string _path;
std::string _date;
int _points;
};
class Ranglista
{
public:
Ranglista(std::string name, int points) : _name(name), _points(points) {}
Ranglista(const Ranglista &other) = default;
Ranglista &operator=(const Ranglista &other) = default;
std::string get_name() { return _name; }
int get_points() { return _points; }
bool operator<(const Ranglista &other)
{
return _points > other._points;
}
private:
std::string _name;
int _points;
};
class Vedes
{
public:
Vedes(std::string name, int point) : _name(name), _point(point) { _count++; }
Vedes(const Vedes &other) = default;
Vedes &operator=(const Vedes &other) = default;
std::string get_name() { return _name; }
int get_point() { return _point; }
int get_count() { return _count; }
void set_stuff(int &points)
{
_point += points;
_count++;
}
bool operator<(const Vedes &other)
{
return _count > other._count;
}
private:
std::string _name;
int _point;
int _count = 0;
};
typedef std::map<std::string, int> path_value; //minden path + az erteke
typedef std::vector<Adatok> name_path_date; //bejegyzesek
typedef std::vector<Ranglista> ranglista; //ranglista
typedef std::map<std::string,std::pair<int,int>> vedes_vec; //vedesek
typedef std::function<bool(std::pair<std::string,std::pair<int,int>>,std::pair<std::string,std::pair<int,int>>)> Comparator;
void create_pv(path_value &, boost::filesystem::path); //feltolti a path+ertek map-ot
void create_npd(name_path_date &, path_value &, std::string input); //feltolti a bejegyzesek vektorat + mindenki pontszama map
void create_np(name_path_date &, path_value &); // name + path map
void list_np(path_value &name_point); // nam + path kiiratas
void list_bejegyzesek(name_path_date &bejegyzesek); // bejegyzesek vektora kiiratas
bool check_bejegyzesek(name_path_date &bejegyzesek, std::string name, std::string path); //van-e mar ilyen bejegyzes
void create_rl(ranglista &rl_vec, path_value &name_point); //ranglista feltoltes
void list_rl(ranglista &rl_vec); //ranglista kiiratas
void vedes_atlag(name_path_date &bejegyzesek, vedes_vec &v_vec); //vedes atlag map
void list_atlag(vedes_vec &v_vec); //vedes atlag kiiratas
bool check_vedes(vedes_vec &v_vec, std::string name);
void vedes_elem(vedes_vec &v_vec, std::string name, int &&points); //
//void accumulate_pv(path_value&);
int main(int argc, char **argv)
{
std::vector<std::string> roots = {"City/Debrecen/Oktatás/Informatika/Programozás/DEIK/Prog1/", "City/Debrecen/Oktatás/Informatika/Programozás/DEIK/"};
std::string input_file_name = "db-2018-05-06.csv";
/* OPTIONS */
boost::program_options::options_description desc("ALLOWED OPTIONS");
desc.add_options()("help", "help msg")("root,r", boost::program_options::value<std::vector<std::string>>())("csv", boost::program_options::value<std::string>(), "comma separated values")("rank", "rang lista")("vedes", "labor vedesek");
boost::program_options::positional_options_description pdesc;
pdesc.add("root", -1);
boost::program_options::variables_map vm;
boost::program_options::store(boost::program_options::command_line_parser(argc, argv).options(desc).positional(pdesc).run(), vm);
boost::program_options::notify(vm);
int sum = 0;
path_value pv_map;
if (vm.count("help") || argc == 1)
{
std::cout << desc << std::endl;
return 1;
}
if (vm.count("root"))
{
roots = vm["root"].as<std::vector<std::string>>();
for (auto &i : roots)
{
boost::filesystem::path path(i);
create_pv(pv_map, path);
}
for (path_value::iterator it{pv_map.begin()}; it != pv_map.end(); it++)
sum += it->second;
//std::cout << sum << std::endl;create_npd
std::cout << std::accumulate(pv_map.begin(), pv_map.end(), 0, [](int value, const std::map<std::string, int>::value_type &p) { return value + p.second; });
std::cout << std::endl;
}
if (vm.count("csv"))
{
//input_file_name = vm["csv"].as<std::string>();
std::ifstream input_file{vm["csv"].as<std::string>()};
name_path_date bejegyzesek;
std::string temp;
path_value name_point;
while (getline(input_file, temp))
create_npd(bejegyzesek, pv_map, temp);
create_np(bejegyzesek, name_point);
//list_bejegyzesek(bejegyzesek);
//list_np(name_point);
if (vm.count("rank"))
{
ranglista rl_vec;
create_rl(rl_vec, name_point);
list_rl(rl_vec);
}
if (vm.count("vedes"))
{
vedes_vec v_vec;
vedes_atlag(bejegyzesek, v_vec);
list_atlag(v_vec);
}
return 0;
}
return 0;
}
void create_pv(path_value &pv_map, boost::filesystem::path path)
{
boost::filesystem::directory_iterator it{path}, eod;
BOOST_FOREACH (boost::filesystem::path const &p, std::make_pair(it, eod))
{
if (boost::filesystem::is_regular_file(p))
{
boost::filesystem::ifstream regular_file{p};
std::string temp;
int sum = 0; //aktualis .props erteke
while (getline(regular_file, temp))
{
temp.erase(0, temp.find_last_of('/'));
temp.erase(0, temp.find_first_of(' '));
sum += std::atoi((temp.substr(temp.find_first_of("0123456789"), temp.find_last_of("0123456789"))).c_str());
}
std::string result = p.string();
std::string result_path = result.substr(0, result.find_last_of('/'));
//std::cout << result_path << std::endl;
//pv_map.insert(std::make_pair(result, sum));
pv_map[result_path] = sum;
}
else
create_pv(pv_map, p);
}
}
//void accumulate_pv(path_value& pv_map)
//{
// std::cout<<std::accumulate(pv_map.begin(),pv_map.end(),0,[](int value,const path_value::int& p){return value+p.second;});
//}
void create_npd(name_path_date &bejegyzesek, path_value &pv_map, std::string input)
{
boost::tokenizer<boost::escaped_list_separator<char>> tokenizer{input};
boost::tokenizer<boost::escaped_list_separator<char>>::iterator it{tokenizer.begin()};
std::string name = *it;
std::string path = *(++it);
std::string date = *(++it);
path = path.substr(2);
if (!check_bejegyzesek(bejegyzesek, name, path))
bejegyzesek.push_back(Adatok(name, path, date, pv_map["/home/erik/Documents/Programs/"+path]));
}
bool check_bejegyzesek(name_path_date &bejegyzesek, std::string name, std::string path)
{
bool ok = false;
for (name_path_date::iterator it{bejegyzesek.begin()}; it != bejegyzesek.end(); it++)
{
if ((it->get_name() == name) && (it->get_path() == path))
ok = true;
}
return ok;
}
bool check_vedes(vedes_vec &v_vec, std::string name)
{
vedes_vec::iterator it = v_vec.find(name);
if (it != v_vec.end()) return true;
else return false;
}
void vedes_elem(vedes_vec &v_vec, std::string name, int &&points)
{
/*for (auto &it : v_vec)
if (it.get_name() == name)
it.set_stuff(points);
*/
vedes_vec::iterator i = v_vec.find(name);
std::pair<int,int> it = i->second;
//auto& jt = it->second;
it.first++;
it.second += points;
}
void create_np(name_path_date &bejegyzesek, path_value &name_point)
{
for (name_path_date::iterator it{bejegyzesek.begin()}; it != bejegyzesek.end(); it++)
if (name_point.count(it->get_name()) == 0)
name_point.insert(std::make_pair(it->get_name(), it->get_points()));
else
name_point[it->get_name()] += it->get_points();
}
void list_np(path_value &name_point)
{
for (path_value::iterator it{name_point.begin()}; it != name_point.end(); it++)
{
if (it->second)
std::cout << it->first << " " << it->second << std::endl;
}
}
void list_bejegyzesek(name_path_date &bejegyzesek)
{
for (name_path_date::iterator it{bejegyzesek.begin()}; it != bejegyzesek.end(); it++)
if (it->get_name() == "Varga Erik")
std::cout << it->get_name() << " " << it->get_path() << " " << it->get_points() << std::endl;
}
void create_rl(ranglista &rl_vec, path_value &name_point)
{
for (auto &it : name_point)
{
if (it.second > 0)
rl_vec.push_back(Ranglista(it.first, it.second));
}
std::sort(rl_vec.begin(), rl_vec.end());
}
void list_rl(ranglista &rl_vec)
{
for (auto &it : rl_vec)
std::cout << it.get_name() << " " << it.get_points() << std::endl;
}
void vedes_atlag(name_path_date &bejegyzesek, vedes_vec &v_vec)
{
std::string key = "City/Debrecen/Oktatás/Informatika/Programozás/DEIK/Prog1/Labor/Védés/";
for (auto &it : bejegyzesek)
{
if ((it.get_path().find("City/Debrecen/Oktatás/Informatika/Programozás/DEIK/Prog1/Labor/Védés/") != std::string::npos) && (it.get_points()) && (!check_vedes(v_vec, it.get_name())))
v_vec.insert(std::make_pair(it.get_name(),std::make_pair(1,it.get_points())));
else if ((check_vedes(v_vec, it.get_name())) && (it.get_path().find("City/Debrecen/Oktatás/Informatika/Programozás/DEIK/Prog1/Labor/Védés/") != std::string::npos) && (it.get_points()))
vedes_elem(v_vec, it.get_name(), it.get_points());
}
}
void list_atlag(vedes_vec &v_vec)
{
//std::sort(v_vec.begin(), v_vec.end());
Comparator compFunctor = [](std::pair<std::string,std::pair<int,int>> elem1,std::pair<std::string,std::pair<int,int>> elem2)
{
std::pair<int,int> it = elem1.second;
std::pair<int,int> jt = elem2.second;
return it.first < jt.first;
};
std::set<std::pair<std::string,std::pair<int,int>>,Comparator> v_set(v_vec.begin(),v_vec.end(),compFunctor);
//int sum = 0;
//int csum = 0;
for (std::pair<std::string,std::pair<int,int>> it : v_set)
{
std::pair<int,int> jt = it.second;
std::cout << it.first << " " << (jt.second - jt.first) / jt.first<< std::endl;
//sum += it.get_point();
//csum += it.get_count();
//sum = std::accumulate(v_vec.begin(), v_vec.end(), 0, [](int i, Vedes &o) { return i + o.get_point(); });
//csum = std::accumulate(v_vec.begin(), v_vec.end(), 0, [](int i, Vedes &o) { return i + o.get_count(); });
}
//std::cout << (sum - csum) / csum << std::endl;
}
so, as described here
template<
class Key,
class Compare = std::less<Key>,
class Allocator = std::allocator<Key>
> class set;
std::set is an associative container that contains a sorted set of unique objects of type Key.
I cleaned up your code, and made a Minimal, Complete, and Verifiable example,
#include <iostream>
#include <map>
#include <set>
using point_pair = std::pair<int,int>;
using points_map = std::map<std::string, point_pair>;
using points_set_pair = std::pair<std::string, point_pair>;
auto compFunctor = [](const points_set_pair &elem1, const points_set_pair &elem2)
{
return elem1.second.first < elem2.second.first;
};
using points_set = std::set<points_set_pair, decltype(compFunctor)>;
void list_average(const points_map &v_map)
{
points_set v_set(v_map.begin(),v_map.end(),compFunctor);
for (auto &elem : v_set)
{
const point_pair &jt = elem.second;
std::cout << elem.first << " " << (jt.second - jt.first) / jt.first<< "\n";
}
}
Now consider the first version of main
int main()
{
points_map v_map = { {"foo", { 1, 2}}, {"bar", { 3, 4}}};
list_average(v_map);
}
output:
foo 1
bar 0
Now consider the second version of main:
int main()
{
points_map v_map = { {"foo", { 1, 2}}, {"bar", { 1, 4}}};
list_average(v_map);
}
output:
bar 3
See the problem? As .second.first of the elements are both 1, the latter replaces the first. It is not unique. That's the downside of std::set.
So, what then?
Don't use std::set, but use std::vector and std::sort. Example:
#include <iostream>
#include <map>
#include <vector>
#include <algorithm>
using point_pair = std::pair<int,int>;
using points_map = std::map<std::string, point_pair>;
using string_point_pair = std::pair<std::string, point_pair>;
auto compFunctor = [](string_point_pair const &elem1, string_point_pair const &elem2)
{
return
elem1.second.first != elem2.second.first?
elem1.second.first < elem2.second.first:
elem1.second.second < elem2.second.second;
};
void list_average(points_map const &v_map)
{
std::vector<string_point_pair> v_vec(v_map.begin(),v_map.end());
std::sort(v_vec.begin(), v_vec.end(), compFunctor);
for (auto &elem : v_vec)
{
const point_pair &jt = elem.second;
std::cout << elem.first << " " << (jt.second - jt.first) / jt.first<< "\n";
}
}
int main()
{
points_map v_map = { {"foo", { 1, 2}}, {"bar", { 1, 4}}, {"baz", { 2, 4}}};
list_average(v_map);
}
Output:
foo 1
bar 3
baz 1
live demo

How to make this matching algorithm run faster?

I have two lists of pointers to a data structure X, the algorithm is very simple:
It loops over the first list A and try to find the the first matching element in list B. The requirement is to have at least 50k elements in each list:
#include <iostream>
#include <memory>
#include <chrono>
#include <vector>
#include <algorithm>
#include <string>
struct X {
std::string field_1;
std::string field_2;
std::string field_3;
std::string field_4;
X(std::string f1, std::string f2, std::string f3, std::string f4)
: field_1(f1)
, field_2(f2)
, field_3(f3)
, field_4(f4)
{};
bool equal(const std::shared_ptr<X>& x) {
return (x->field_1 == field_1) &&
(x->field_2 == field_2) &&
(x->field_3 == field_3) &&
(x->field_4 == field_4);
};
X *match = nullptr;
};
typedef std::shared_ptr<X> X_ptr;
class Timer
{
public:
Timer(std::string name) : beg_(clock_::now()), name_(name) {}
~Timer() {
std::cout << "Elapsed(" << name_ << "): " << elapsed() << std::endl;
}
void reset() { beg_ = clock_::now(); }
double elapsed() const {
return std::chrono::duration_cast<second_>
(clock_::now() - beg_).count();
}
private:
typedef std::chrono::high_resolution_clock clock_;
typedef std::chrono::duration<double, std::ratio<1> > second_;
std::chrono::time_point<clock_> beg_;
std::string name_;
};
std::string random_string(size_t length)
{
auto randchar = []() -> char
{
const char charset[] =
"0123456789"
"ABCDEFGHIJKLMNOPQRSTUVWXYZ";
const size_t max_index = (sizeof(charset) - 1);
return charset[rand() % max_index];
};
std::string str(length, 0);
std::generate_n(str.begin(), length, randchar);
return str;
}
int main()
{
Timer t("main");
std::vector <X_ptr> list_A;
std::vector <X_ptr> list_B;
const int MAX_ELEM = 50000;
list_A.reserve(MAX_ELEM);
list_B.reserve(MAX_ELEM);
{
Timer t("insert");
for (int i = 0; i < MAX_ELEM; i++) {
list_A.push_back(X_ptr(new X{ random_string(2), random_string(2), random_string(2), random_string(2) }));
list_B.push_back(X_ptr(new X{ random_string(2), random_string(2), random_string(2), random_string(2) }));
}
}
{
Timer t("match");
std::for_each(list_A.begin(), list_A.end(), [list_B](X_ptr& a) {
auto found_b = std::find_if(list_B.begin(), list_B.end(), [a](const X_ptr& b) {
return a->equal(b);
});
if (found_b != list_B.end()) {
a->match = found_b->get();
std::cout << "match OK \n";
}
});
}
}
on my machine the program is running extremly slow:
Elapsed(insert): 0.05566
Elapsed(match): 98.3739
Elapsed(main): 98.452
Would appreciate it if you can think of any other way to optimize it to run faster.
You are using vectors so each lookup into list_B takes O(n), where n is the number of elements in B. This means the total algorithm is O(m*n), if m is the number of elements in list_A. Thus if m and n a similar in size, you have a O(n^2) algorithm. That is too slow for any large n. To fix this, convert list_B into a unordered_map, (you can do this as part of this algorithm as the conversion is O(n)) where an element in the map's key is an element from list B and the value anything, say 0. You can then perform lookups into the map in O(1) time using find() on the map. Thus your algorithm becomes O(n), way better that O(n^2).
For example
std::unordered_map< X_ptr, int > value_map;
Time r t("match");
std::for_each(list_B.begin(), list_B.end(), [&](X_ptr& b) {
value_map[b] = 0;
});
std::for_each(list_A.begin(), list_A.end(), [value_map](X_ptr& a) {
auto found_b = value_map.find( a );
if ( found_b != value_map.end() )
{
a->match = found_b->first.get();
std::cout << "match OK \n";
}
});
}
Your Version:
Elapsed(insert): 0.0758608
Elapsed(match): 182.899
Elapsed(main): 182.991
New Version:
Elapsed(insert): 0.0719907
Elapsed(match): 0.0388562
Elapsed(main): 0.130884
You may use something like the following:
std::sort(list_B.begin(), list_B.end(), deref_less<X>);
{
Timer t("match");
for (const auto& a : list_A) {
auto it = std::lower_bound(list_B.begin(), list_B.end(), a, deref_less<X>);
if (it != list_B.end() && **it == *a) {
a->match = it->get();
std::cout << "match OK \n";
}
}
}
Live example.