Hash an arbitrary precision value (boost::multiprecision::cpp_int)

Hash an arbitrary precision value (boost::multiprecision::cpp_int) - c++

I need to get the hash of a value with arbitrary precision (from Boost.Multiprecision); I use the cpp_int backend. I came up with the following code:
boost::multiprecision::cpp_int x0 = 1;
const auto seed = std::hash<std::string>{}(x0.str());
I don't need the code to be as fast as possible, but I find it very clumsy to hash the string representation.
So my question is twofold:
Keeping the arbitrary precision, can I hash the value more efficiently?
Maybe I should not insisting on keeping the arbitrary precision and I should convert to a double which I could hash easily (I would still however make the comparison needed for the hash table using the arbitrary precision value)?

You can (ab)use the serialization support:
Support for serialization comes in two forms:
Classes number, debug_adaptor, logged_adaptor and rational_adaptor have "pass through" serialization support which requires the underlying backend to be serializable.
Backends cpp_int, cpp_bin_float, cpp_dec_float and float128 have full support for Boost.Serialization.
So, let me cobble something together that works with boost and std unordered containers:
template <typename Map>
void test(Map const& map) {
std::cout << "\n" << __PRETTY_FUNCTION__ << "\n";
for(auto& p : map)
std::cout << p.second << "\t" << p.first << "\n";
}
int main() {
using boost::multiprecision::cpp_int;
test(std::unordered_map<cpp_int, std::string> {
{ cpp_int(1) << 111, "one" },
{ cpp_int(2) << 222, "two" },
{ cpp_int(3) << 333, "three" },
});
test(boost::unordered_map<cpp_int, std::string> {
{ cpp_int(1) << 111, "one" },
{ cpp_int(2) << 222, "two" },
{ cpp_int(3) << 333, "three" },
});
}
Let's forward the relevant hash<> implementations to our own hash_impl specialization that uses Multiprecision and Serialization:
namespace std {
template <typename backend>
struct hash<boost::multiprecision::number<backend> >
: mp_hashing::hash_impl<boost::multiprecision::number<backend> >
{};
}
namespace boost {
template <typename backend>
struct hash<multiprecision::number<backend> >
: mp_hashing::hash_impl<multiprecision::number<backend> >
{};
}
Now, of course, this begs the question, how is hash_impl implemented?
template <typename T> struct hash_impl {
size_t operator()(T const& v) const {
using namespace boost;
size_t seed = 0;
{
iostreams::stream<hash_sink> os(seed);
archive::binary_oarchive oa(os, archive::no_header | archive::no_codecvt);
oa << v;
}
return seed;
}
};
This looks pretty simple. That's because Boost is awesome, and writing a hash_sink device for use with Boost Iostreams is just the following straightforward exercise:
namespace io = boost::iostreams;
struct hash_sink {
hash_sink(size_t& seed_ref) : _ptr(&seed_ref) {}
typedef char char_type;
typedef io::sink_tag category;
std::streamsize write(const char* s, std::streamsize n) {
boost::hash_combine(*_ptr, boost::hash_range(s, s+n));
return n;
}
private:
size_t* _ptr;
};
Full Demo:
Live On Coliru
#include <iostream>
#include <iomanip>
#include <boost/archive/binary_oarchive.hpp>
#include <boost/multiprecision/cpp_int.hpp>
#include <boost/multiprecision/cpp_int/serialize.hpp>
#include <boost/iostreams/device/back_inserter.hpp>
#include <boost/iostreams/stream_buffer.hpp>
#include <boost/iostreams/stream.hpp>
#include <boost/functional/hash.hpp>
namespace mp_hashing {
namespace io = boost::iostreams;
struct hash_sink {
hash_sink(size_t& seed_ref) : _ptr(&seed_ref) {}
typedef char char_type;
typedef io::sink_tag category;
std::streamsize write(const char* s, std::streamsize n) {
boost::hash_combine(*_ptr, boost::hash_range(s, s+n));
return n;
}
private:
size_t* _ptr;
};
template <typename T> struct hash_impl {
size_t operator()(T const& v) const {
using namespace boost;
size_t seed = 0;
{
iostreams::stream<hash_sink> os(seed);
archive::binary_oarchive oa(os, archive::no_header | archive::no_codecvt);
oa << v;
}
return seed;
}
};
}
#include <unordered_map>
#include <boost/unordered_map.hpp>
namespace std {
template <typename backend>
struct hash<boost::multiprecision::number<backend> >
: mp_hashing::hash_impl<boost::multiprecision::number<backend> >
{};
}
namespace boost {
template <typename backend>
struct hash<multiprecision::number<backend> >
: mp_hashing::hash_impl<multiprecision::number<backend> >
{};
}
template <typename Map>
void test(Map const& map) {
std::cout << "\n" << __PRETTY_FUNCTION__ << "\n";
for(auto& p : map)
std::cout << p.second << "\t" << p.first << "\n";
}
int main() {
using boost::multiprecision::cpp_int;
test(std::unordered_map<cpp_int, std::string> {
{ cpp_int(1) << 111, "one" },
{ cpp_int(2) << 222, "two" },
{ cpp_int(3) << 333, "three" },
});
test(boost::unordered_map<cpp_int, std::string> {
{ cpp_int(1) << 111, "one" },
{ cpp_int(2) << 222, "two" },
{ cpp_int(3) << 333, "three" },
});
}
Prints
void test(const Map&) [with Map = std::unordered_map<boost::multiprecision::number<boost::multiprecision::backends::cpp_int_backend<> >, std::basic_string<char> >]
one 2596148429267413814265248164610048
three 52494017394792286184940053450822912768476066341437098474218494553838871980785022157364316248553291776
two 13479973333575319897333507543509815336818572211270286240551805124608
void test(const Map&) [with Map = boost::unordered::unordered_map<boost::multiprecision::number<boost::multiprecision::backends::cpp_int_backend<> >, std::basic_string<char> >]
three 52494017394792286184940053450822912768476066341437098474218494553838871980785022157364316248553291776
two 13479973333575319897333507543509815336818572211270286240551805124608
one 2596148429267413814265248164610048
As you can see, the difference in implementation between Boost's and the standard library's unordered_map show up in the different orderings for identical hashes.

Just to say that I've just added native hashing support (for Boost.Hash and std::hash) to git develop. It works for all the number types including those from GMP etc. Unfortunately that code won't be released until Boost-1.62 now.
The answer above that (ab)uses serialization support, is actually extremely cool and really rather clever ;) However, it wouldn't work if you wanted to use a vector-based hasher like CityHash, I added an example of using that by accessing the limbs directly to the docs: https://htmlpreview.github.io/?https://github.com/boostorg/multiprecision/blob/develop/doc/html/boost_multiprecision/tut/hash.html Either direct limb-access or the serialization tip will work with all previous releases of course.

Related

How to use u8_to_u32_iterator in Boost Spirit X3?

I am using Boost Spirit X3 to create a programming language, but when I try to support Unicode, I get an error!
Here is an example of a simplified version of that program.
#define BOOST_SPIRIT_X3_UNICODE
#include <boost/spirit/home/x3.hpp>
namespace x3 = boost::spirit::x3;
struct sample : x3::symbols<unsigned> {
sample()
{
add("48", 10);
}
};
int main()
{
const std::string s("🌸");
boost::u8_to_u32_iterator<std::string::const_iterator> first{cbegin(s)},
last{cend(s)};
x3::parse(first, last, sample{});
}
Live on wandbox
What should I do?

As you noticed, internally char_encoding::unicode employs char32_t.
So, first changing the symbols accordingly:
template <typename T>
using symbols = x3::symbols_parser<boost::spirit::char_encoding::unicode, T>;
struct sample : symbols<unsigned> {
sample() { add(U"48", 10); }
};
Now the code fails calling into case_compare:
/home/sehe/custom/boost_1_78_0/boost/spirit/home/x3/string/detail/tst.hpp|74 col 33| error: no match for call to ‘(boost::spirit::x3::case_compare<boost::spirit::char_encoding::unicode>) (reference, char32_t&)’
As you can see it expects a char32_t reference, but u8_to_u32_iterator returns unsigned ints (std::uint32_t).
Just for comparison / sanity check: https://godbolt.org/z/1zozxq96W
Luckily you can instruct the u8_to_u32_iterator to use another co-domain type:
Live On Compiler Explorer
#define BOOST_SPIRIT_X3_UNICODE
#include <boost/spirit/home/x3.hpp>
#include <iomanip>
#include <iostream>
namespace x3 = boost::spirit::x3;
template <typename T>
using symbols = x3::symbols_parser<boost::spirit::char_encoding::unicode, T>;
struct sample : symbols<unsigned> {
sample() { add(U"48", 10)(U"🌸", 11); }
};
int main() {
auto test = [](auto const& s) {
boost::u8_to_u32_iterator<decltype(cbegin(s)), char32_t> first{
cbegin(s)},
last{cend(s)};
unsigned parsed_value;
if (x3::parse(first, last, sample{}, parsed_value)) {
std::cout << s << " -> " << parsed_value << "\n";
} else {
std::cout << s << " FAIL\n";
}
};
for (std::string s : {"🌸", "48", "🤷"})
test(s);
}
Prints
🌸 -> 11
48 -> 10
🤷 FAIL

Getting field names with boost::pfr

Hi I'm using boost::pfr for basic reflection, it works fine, but the problem is it is only print or deal with the field values, like with boost::pfr::io it prints each member of the struct, but how can I print it as name value pairs, same issue with for_each_field, the functor only accepts values, but not names. How can I get the field names?
struct S {
int n;
std::string name;
};
S o{1, "foo"};
std::cout << boost::pfr::io(o);
// Outputs: {1, "foo"}, how can I get n = 1, name = "foo"?

If you think adapting a struct is not too intrusive (it doesn't change your existing definitions, and you don't even need to have it in a public header):
BOOST_FUSION_ADAPT_STRUCT(S, n, name)
Then you can concoct a general operator<< for sequences:
namespace BF = boost::fusion;
template <typename T,
typename Enable = std::enable_if_t<
// BF::traits::is_sequence<T>::type::value>
std::is_same_v<BF::struct_tag, typename BF::traits::tag_of<T>::type>>>
std::ostream& operator<<(std::ostream& os, T const& v)
{
bool first = true;
auto visitor = [&]<size_t I>() {
os << (std::exchange(first, false) ? "" : ", ")
<< BF::extension::struct_member_name<T, I>::call()
<< " = " << BF::at_c<I>(v);
};
// visit members
[&]<size_t... II>(std::index_sequence<II...>)
{
return ((visitor.template operator()<II>(), ...);
}
(std::make_index_sequence<BF::result_of::size<T>::type::value>{});
return os;
}
(Prior to c++20 this would require some explicit template types instead of the lambdas, perhaps making it more readable. I guess I'm lazy...)
Here's a live demo: Live On Compiler Explorer
n = 1, name = foo
Bonus: Correctly quoting string-like types
Live On Compiler Explorer
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/for_each.hpp>
#include <boost/fusion/include/at_c.hpp>
#include <iostream>
#include <iomanip>
namespace MyLib {
struct S {
int n;
std::string name;
};
namespace BF = boost::fusion;
static auto inline pretty(std::string_view sv) { return std::quoted(sv); }
template <typename T,
typename Enable = std::enable_if_t<
not std::is_constructible_v<std::string_view, T const&>>>
static inline T const& pretty(T const& v)
{
return v;
}
template <typename T,
typename Enable = std::enable_if_t<
// BF::traits::is_sequence<T>::type::value>
std::is_same_v<BF::struct_tag, typename BF::traits::tag_of<T>::type>>>
std::ostream& operator<<(std::ostream& os, T const& v)
{
bool first = true;
auto visitor = [&]<size_t I>() {
os << (std::exchange(first, false) ? "" : ", ")
<< BF::extension::struct_member_name<T, I>::call()
<< " = " << pretty(BF::at_c<I>(v));
};
// visit members
[&]<size_t... II>(std::index_sequence<II...>)
{
return (visitor.template operator()<II>(), ...);
}
(std::make_index_sequence<BF::result_of::size<T>::type::value>{});
return os;
}
} // namespace MyLib
BOOST_FUSION_ADAPT_STRUCT(MyLib::S, n, name)
int main()
{
MyLib::S o{1, "foo"};
std::cout << o << "\n";
}
Outputs:
n = 1, name = "foo"

The library cannot offer any such functionality because it is currently impossible to obtain the name of a member of a class as value of an object.
If you want to output field names, you need to declare string objects mapped with the members and implement a operator<< which uses these strings manually.
To do this a more sophisticated reflection library would probably offer macros to use in the definition of the members. Macros can expand their argument(s) into a declaration using the provided name as identifier while also producing code using the name as string literal (via the # macro replacement operator).

It's stupid but hey, with a stringifying macro per field it could be enough for you.
C++14, no additional library
#include <boost/pfr.hpp>
struct S
{
int n;
std::string name;
static char const* const s_memNames[2];
};
char const* const S::s_memNames[2] = {"n", "name"};
// utility
template< size_t I, typename TR >
char const* MemberName()
{
using T = std::remove_reference_t<TR>;
if (I < std::size(T::s_memNames))
return T::s_memNames[I];
return nullptr;
}
// test:
#include <iostream>
using std::cout;
template< size_t I, typename T >
void StreamAt(T&& inst)
{
char const* n = MemberName<I,T>();
auto& v = boost::pfr::get<I>(inst);
cout << "(" << n << " = " << v << ")";
}
int main()
{
S s{2, "boo"};
boost::pfr::for_each_field(s, [&](const auto&, auto I)
{
StreamAt<decltype(I)::value>(s);
cout << "\n";
});
}
output:
(n = 2)
(name = boo)
(previous version of the suggestion, this one has more fluff so less interesting)
#include <boost/pfr.hpp>
// library additions:
static char const* g_names[100];
template< size_t V >
struct Id : std::integral_constant<size_t, V > {};
template< size_t I, typename T >
using TypeAt = boost::pfr::tuple_element_t<I, T>;
template<std::size_t Pos, class Struct>
constexpr int Ni() // name index
{
return std::tuple_element_t<Pos, typename std::remove_reference_t<Struct>::NamesAt >::value;
}
struct StaticCaller
{
template< typename Functor >
StaticCaller(Functor f) { f();}
};
///
/// YOUR CODE HERE
struct S
{
using NamesAt = std::tuple<Id<__COUNTER__>, Id<__COUNTER__>>; // add this
int n;
std::string name;
static void Init() // add this
{
g_names[Ni<0,S>()] = "n";
g_names[Ni<1,S>()] = "name";
}
};
StaticCaller g_sc__LINE__(S::Init); // add this
// utilities
template< size_t I, typename T >
auto GetValueName(T&& inst)
{
return std::make_pair(boost::pfr::get<I>(inst), g_names[Ni<I,T>()]);
}
// test:
#include <iostream>
using std::cout;
template< size_t I, typename T >
void StreamAt(T&& inst)
{
auto const& [v,n] = GetValueName<I>(inst);
cout << "(" << v << ", " << n << ")";
}
int main()
{
S s{2, "boo"};
boost::pfr::for_each_field(s, [&](const auto&, auto I)
{
StreamAt<decltype(I)::value>(s);
cout << "\n";
});
}
output
(2, n)
(boo, name)

Vector of string shared memory map

How to append string to a vector contained inside map? Structure is map(float,vector(string)) where the map is in shared memory.my question is if key==desired key then append string to the vector of strings?

Do you mean something like this:
#include <map>
#include <vector>
#include <string>
#include <iostream>
int main()
{
std::map<float, std::vector<std::string>> m;
m[.5f].emplace_back("First");
m[.5f].emplace_back("Second");
m[.0f].emplace_back("Hello");
m[.0f].emplace_back("World");
for(const auto& [key, value] : m)
{
std::cout << "Key: " << key << '\n';
for(const auto& str : value)
std::cout << '\t' << str << '\n';
}
std::cout.flush();
return 0;
}

Doing this in shared memory is pretty hard, actually.
If you get all the allocators right, and add the locking, you'd usually get very clunky code that is hard to read due to all the allocator passing around.
You can, however, employ Boost's scoped allocator adaptor which will do a lot (lot) of magic that makes life better.
I think the following code sample just about nails the sweet spot.
Warning: This builds on years of experience trying to beat this into submission. If you fall just outside of the boundary of "magic" (mostly the in-place construction support due to uses_allocator<> and scoped_allocator_adaptor) you will find it breaks up and you'll be writing a lot of manual constructor/conversion calls to make it work.
Live On Coliru
#define DEMO
#include <iostream>
#include <iomanip>
#include <mutex>
#include <boost/interprocess/containers/map.hpp>
#include <boost/interprocess/containers/string.hpp>
#include <boost/interprocess/containers/vector.hpp>
#include <boost/interprocess/sync/interprocess_mutex.hpp>
#include <boost/interprocess/managed_shared_memory.hpp>
#include <boost/interprocess/managed_mapped_file.hpp> // For Coliru (doesn't support shared memory)
#include <boost/interprocess/allocators/allocator.hpp>
#include <boost/container/scoped_allocator.hpp>
namespace bip = boost::interprocess;
namespace bc = boost::container;
namespace Shared {
using Segment = bip::managed_mapped_file; // Coliru doesn't support bip::managed_shared_memory
template <typename T> using Alloc = bc::scoped_allocator_adaptor<bip::allocator<T, Segment::segment_manager> >;
template <typename V>
using Vector = bip::vector<V, Alloc<V> >;
template <typename K, typename V, typename Cmp = std::less<K> >
using Map = bip::map<K, V, Cmp, Alloc<std::pair<K const, V> > >;
using String = bip::basic_string<char, std::char_traits<char>, Alloc<char> >;
using Mutex = bip::interprocess_mutex;
}
namespace Lib {
using namespace Shared;
struct Data {
using Map = Shared::Map<float, Shared::Vector<Shared::String> >;
mutable Mutex _mx;
Map _map;
template <typename Alloc> Data(Alloc alloc = {}) : _map(alloc) {}
bool append(float f, std::string s) {
std::lock_guard<Mutex> lk(_mx); // lock
auto it = _map.find(f);
bool const exists = it != _map.end();
#ifndef DEMO
if (exists) {
it->second.emplace_back(s);
}
#else
// you didn't specify this, but lets insert new keys here, if
// only for the demo
_map[f].emplace_back(s);
#endif
return exists;
}
size_t size() const {
std::lock_guard<Mutex> lk(_mx); // lock
return _map.size();
}
friend std::ostream& operator<<(std::ostream& os, Data const& data) {
std::lock_guard<Mutex> lk(data._mx); // lock
for (auto& [f,v] : data._map) {
os << f << " ->";
for (auto& ss : v) {
os << " " << std::quoted(std::string(ss));
}
os << "\n";
}
return os;
}
};
}
struct Program {
Shared::Segment msm { bip::open_or_create, "data.bin", 10*1024 };
Lib::Data& _data = *msm.find_or_construct<Lib::Data>("data")(msm.get_segment_manager());
void report() const {
std::cout << "Map contains " << _data.size() << " entries\n" << _data;
}
};
struct Client : Program {
void run(float f) {
_data.append(f, "one");
_data.append(f, "two");
}
};
int main() {
{
Program server;
server.report();
Client().run(.5f);
Client().run(.6f);
}
// report again
Program().report();
}
First run would print:
Map contains 0 entries
Map contains 2 entries
0.5 -> "one" "two"
0.6 -> "one" "two"
A second run:
Map contains 2 entries
0.5 -> "one" "two"
0.6 -> "one" "two"
Map contains 2 entries
0.5 -> "one" "two" "one" "two"
0.6 -> "one" "two" "one" "two"

Python's enumerate in c++

In Python , instead of
colors = ['red', 'green', 'blue', 'yellow']
for i in range(len(colors)):
print i, '--->', colors[i]
One can write
for i, color in enumerate(colors):
print i, '--->', color
Is there a similar thing in c++?

You can actually implement something similar in c++17.
Here is a sketch(c++-ish pseudocode), I use values everywhere and they should be replaced by appropriate references/forwarding, also you should fix how you get types (use iterator_traits), may be support unknown size, may be implement proper iterator interface etc
template <typename T>
struct EnumeratedIterator {
size_t index;
T iterator;
void operator++() {
++iterator;
}
std::pair<size_t, T>() {
return {index, *iterator};
}
bool operator !=(EnumeratedIterator o) {
return iterator != o.iterator;
}
}
template <typename T>
struct Enumerated {
T collection;
EnumeratedIterator<typename T::iterator> begin() {
return {0, collection.begin()};
}
EnumeratedIterator<typename T::iterator> end() {
return {collection.size(), collection.end()};
}
}
auto enumerate(T col) {
return Enumerated<T>(col);
}
and then use it like
for (auto [index, color] : enumerate(vector<int>{5, 7, 10})) {
assert(index < color);
}

Boost provides an adaptor which allows to do something similiar:
http://www.boost.org/doc/libs/1_63_0/libs/range/doc/html/range/reference/adaptors/reference/indexed.html
The following code is taken from the link above
#include <boost/range/adaptor/indexed.hpp>
#include <boost/assign.hpp>
#include <iterator>
#include <iostream>
#include <vector>
int main(int argc, const char* argv[])
{
using namespace boost::assign;
using namespace boost::adaptors;
std::vector<int> input;
input += 10,20,30,40,50,60,70,80,90;
for (const auto& element : input | indexed(0))
{
std::cout << "Element = " << element.value()
<< " Index = " << element.index()
<< std::endl;
}
return 0;
}

Maybe you can emulate it like this:
int i = 0;
for (auto color : { "red", "green", "blue", "yellow" })
std::cout << i++ << "--->" << color << std::endl;

How to stop string concatenation in Spirit Qi 'repeat' parser?

I would like to split a string into parts:
input = "part1/part2/part3/also3"
and fill the structure that consist of three std::string with these parts.
struct strings
{
std::string a; // <- part1
std::string b; // <- part2
std::string c; // <- part3/also3
};
However my parser seems to merge the parts together and store it into the first std::string.
Here is the code on coliru
#include <iostream>
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapted.hpp>
namespace qi = ::boost::spirit::qi;
struct strings
{
std::string a;
std::string b;
std::string c;
};
BOOST_FUSION_ADAPT_STRUCT(strings,
(std::string, a) (std::string, b) (std::string, c))
template <typename It>
struct split_string_grammar: qi::grammar<It, strings ()>
{
split_string_grammar (int parts)
: split_string_grammar::base_type (split_string)
{
assert (parts > 0);
using namespace qi;
split_string = repeat (parts-1) [part > '/'] > last_part;
part = +(~char_ ("/"));
last_part = +char_;
BOOST_SPIRIT_DEBUG_NODES ((split_string) (part) (last_part))
}
private:
qi::rule<It, strings ()> split_string;
qi::rule<It, std::string ()> part, last_part;
};
int main ()
{
std::string const input { "one/two/three/four" };
auto const last = input.end ();
auto first = input.begin ();
// split into 3 parts.
split_string_grammar<decltype (first)> split_string (3);
strings ss;
bool ok = qi::parse (first, last, split_string, ss);
std::cout << "Parsed: " << ok << "\n";
if (ok) {
std::cout << "a:" << ss.a << "\n";
std::cout << "b:" << ss.b << "\n";
std::cout << "c:" << ss.c << "\n";
}
}
The output is:
Parsed: 1
a:onetwo
b:three/four
c:
while I expected:
Parsed: 1
a:one
b:two
c:three/four
I'd like not to modify the grammar heavily and leave "repeat" statement in it, because the "real" grammar is much more complex of course and I will need to have it there. Just need to find the way to disable the concatenations. I tried
repeat (parts-1) [as_string[part] > '/']
but that does not compile.

The trouble here is specifically that qi::repeat is documented to expose a container of element-types.
Now, because the exposed attribute type of the rule (strings) is not a container-type, Spirit "knows" how to flatten the values.
Of course it's not what you wanted in this case, but usually this heuristic makes for really convenient accumulation of string values.
Fix 1: use a container attribute
You could witness the reverse fix by getting rid of the non-container (sequence) target attribute:
Live On Coliru
//#define BOOST_SPIRIT_DEBUG
#include <iostream>
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapted.hpp>
namespace qi = ::boost::spirit::qi;
using strings = std::vector<std::string>;
template <typename It>
struct split_string_grammar: qi::grammar<It, strings ()>
{
split_string_grammar (int parts)
: split_string_grammar::base_type (split_string)
{
assert (parts > 0);
using namespace qi;
split_string = repeat (parts-1) [part > '/']
> last_part
;
part = +(~char_ ("/"))
;
last_part = +char_
;
BOOST_SPIRIT_DEBUG_NODES ((split_string) (part) (last_part))
}
private:
qi::rule<It, strings ()> split_string;
qi::rule<It, std::string ()> part, last_part;
};
int main ()
{
std::string const input { "one/two/three/four" };
auto const last = input.end ();
auto first = input.begin ();
// split into 3 parts.
split_string_grammar<decltype (first)> split_string (3);
strings ss;
bool ok = qi::parse (first, last, split_string, ss);
std::cout << "Parsed: " << ok << "\n";
if (ok) {
for(auto i = 0ul; i<ss.size(); ++i)
std::cout << static_cast<char>('a'+i) << ":" << ss[i] << "\n";
}
}
What you really wanted:
Of course you want to keep the struct/sequence adaptation (?); In this case that's really tricky because as soon as you use any kind of Kleene operator (*,%) or qi::repeat you'll have the attribute transformation rules as outlined above, ruining your mood.
Luckily, I just remembered I have a hacky solution based on the auto_ parser. Note the caveat in this older answer though:
Read empty values with boost::spirit
CAVEAT Specializing for std::string directly like this might not be the best idea (it might not always be appropriate and might interact badly with other parsers).
By default create_parser<std::string> is not defined, so you might decide this usage is good enough for your case:
Live On Coliru
#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/include/qi.hpp>
namespace qi = boost::spirit::qi;
struct strings {
std::string a;
std::string b;
std::string c;
};
namespace boost { namespace spirit { namespace traits {
template <> struct create_parser<std::string> {
typedef proto::result_of::deep_copy<
BOOST_TYPEOF(
qi::lexeme [+(qi::char_ - '/')] | qi::attr("(unspecified)")
)
>::type type;
static type call() {
return proto::deep_copy(
qi::lexeme [+(qi::char_ - '/')] | qi::attr("(unspecified)")
);
}
};
}}}
BOOST_FUSION_ADAPT_STRUCT(strings, (std::string, a)(std::string, b)(std::string, c))
template <typename Iterator>
struct google_parser : qi::grammar<Iterator, strings()> {
google_parser() : google_parser::base_type(entry, "contacts") {
using namespace qi;
entry =
skip('/') [auto_]
;
}
private:
qi::rule<Iterator, strings()> entry;
};
int main() {
using It = std::string::const_iterator;
google_parser<It> p;
std::string const input = "part1/part2/part3/also3";
It f = input.begin(), l = input.end();
strings ss;
bool ok = qi::parse(f, l, p >> *qi::char_, ss, ss.c);
if (ok)
{
std::cout << "a:" << ss.a << "\n";
std::cout << "b:" << ss.b << "\n";
std::cout << "c:" << ss.c << "\n";
}
else
std::cout << "Parse failed\n";
if (f!=l)
std::cout << "Remaining unparsed: '" << std::string(f,l) << "'\n";
}
Prints
a:part1
b:part2
c:part3/also3
Update/Bonus
In reponse to the OP's own answer I wanted to challenge myself to write it more generically indeed.
The main thing is to to write set_field_ in such a way that it doesn't know/assume more than required about the destination sequence type.
With a bit of Boost Fusion magic that became:
struct set_field_
{
template <typename Seq, typename Value>
void operator() (Seq& seq, Value const& src, unsigned idx) const {
fus::fold(seq, 0u, Visit<Value> { idx, src });
}
private:
template <typename Value>
struct Visit {
unsigned target_idx;
Value const& value;
template <typename B>
unsigned operator()(unsigned i, B& dest) const {
if (target_idx == i) {
boost::spirit::traits::assign_to(value, dest);
}
return i + 1;
}
};
};
It has the added flexibility of applying Spirit's attribute compatibility rules¹. So, you can use the same grammar with both the following types:
struct strings {
std::string a, b, c;
};
struct alternative {
std::vector<char> first;
std::string second;
std::string third;
};
To drive the point home, I made the adaptation of the second struct reverse the field order:
BOOST_FUSION_ADAPT_STRUCT(strings, a, b, c)
BOOST_FUSION_ADAPT_STRUCT(alternative, third, second, first) // REVERSE ORDER :)
Without further ado, the demo program:
Live On Coliru
#define BOOST_SPIRIT_USE_PHOENIX_V3
#define BOOST_RESULT_OF_USE_DECLTYPE
#include <boost/fusion/adapted.hpp>
#include <boost/fusion/algorithm/iteration.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
namespace qi = boost::spirit::qi;
namespace fus = boost::fusion;
struct strings {
std::string a, b, c;
};
struct alternative {
std::vector<char> first;
std::string second;
std::string third;
};
BOOST_FUSION_ADAPT_STRUCT(strings, a, b, c)
BOOST_FUSION_ADAPT_STRUCT(alternative, third, second, first) // REVERSE ORDER :)
// output helpers for demo:
namespace {
inline std::ostream& operator<<(std::ostream& os, strings const& data) {
return os
<< "a:\"" << data.a << "\" "
<< "b:\"" << data.b << "\" "
<< "c:\"" << data.c << "\" ";
}
inline std::ostream& operator<<(std::ostream& os, alternative const& data) {
os << "first: vector<char> { \""; os.write(&data.first[0], data.first.size()); os << "\" } ";
os << "second: \"" << data.second << "\" ";
os << "third: \"" << data.third << "\" ";
return os;
}
}
struct set_field_
{
template <typename Seq, typename Value>
void operator() (Seq& seq, Value const& src, unsigned idx) const {
fus::fold(seq, 0u, Visit<Value> { idx, src });
}
private:
template <typename Value>
struct Visit {
unsigned target_idx;
Value const& value;
template <typename B>
unsigned operator()(unsigned i, B& dest) const {
if (target_idx == i) {
boost::spirit::traits::assign_to(value, dest);
}
return i + 1;
}
};
};
boost::phoenix::function<set_field_> const set_field = {};
template <typename It, typename Target>
struct split_string_grammar: qi::grammar<It, Target(), qi::locals<unsigned> >
{
split_string_grammar (int parts)
: split_string_grammar::base_type (split_string)
{
assert (parts > 0);
using namespace qi;
using boost::phoenix::val;
_a_type _current; // custom placeholder
split_string =
eps [ _current = 0u ]
> repeat (parts-1)
[part [ set_field(_val, _1, _current++) ] > '/']
> last_part [ set_field(_val, _1, _current++) ];
part = +(~char_ ("/"));
last_part = +char_;
BOOST_SPIRIT_DEBUG_NODES ((split_string) (part) (last_part))
}
private:
qi::rule<It, Target(), qi::locals<unsigned> > split_string;
qi::rule<It, std::string()> part, last_part;
};
template <size_t N = 3, typename Target>
void run_test(Target target) {
using It = std::string::const_iterator;
std::string const input { "one/two/three/four" };
It first = input.begin(), last = input.end();
split_string_grammar<It, Target> split_string(N);
bool ok = qi::parse (first, last, split_string, target);
if (ok) {
std::cout << target << '\n';
} else {
std::cout << "Parse failed\n";
}
if (first != last)
std::cout << "Remaining input left unparsed: '" << std::string(first, last) << "'\n";
}
int main ()
{
run_test(strings {});
run_test(alternative {});
}
Output:
a:"one" b:"two" c:"three/four"
first: vector<char> { "three/four" } second: "two" third: "one"
¹ as with BOOST_SPIRIT_ACTIONS_ALLOW_ATTR_COMPAT

Besides sehe's suggestions one more possible way is to use semantic actions (coliru):
struct set_field_
{
void operator() (strings& dst, std::string const& src, unsigned& idx) const
{
assert (idx < 3);
switch (idx++) {
case 0: dst.a = src; break;
case 1: dst.b = src; break;
case 2: dst.c = src; break;
}
}
};
boost::phoenix::function<set_field_> const set_field { set_field_ {} };
template <typename It>
struct split_string_grammar: qi::grammar<It, strings (), qi::locals<unsigned> >
{
split_string_grammar (int parts)
: split_string_grammar::base_type (split_string)
{
assert (parts > 0);
using namespace qi;
using boost::phoenix::val;
split_string = eps [ _a = val (0) ]
> repeat (parts-1) [part [ set_field (_val, _1, _a) ] > '/']
> last_part [ set_field (_val, _1, _a) ];
part = +(~char_ ("/"));
last_part = +char_;
BOOST_SPIRIT_DEBUG_NODES ((split_string) (part) (last_part))
}
private:
qi::rule<It, strings (), qi::locals<unsigned> > split_string;
qi::rule<It, std::string ()> part, last_part;
};

We Keep Coding

c++ django amazon-web-services regex python-2.7 google-cloud-platform list unit-testing opengl ember.js

Hash an arbitrary precision value (boost::multiprecision::cpp_int) - c++

Related

How to use u8_to_u32_iterator in Boost Spirit X3?

Getting field names with boost::pfr

Vector of string shared memory map

Python's enumerate in c++

How to stop string concatenation in Spirit Qi 'repeat' parser?

Categories

Resources