I want to efficiently parse large CSV-like files, whose order of columns I get at runtime. With Spirit Qi, I would parse each field with a lazy auxiliary parser that would select at runtime which column-specific parser to apply to each column. But X3 doesn't seem to have lazy (despite that it's listed in documentation). After reading recommendations here on SO, I've decided to write a custom parser.
It ended up being pretty nice, but now I've noticed I don't really need the pos variable be exposed anywhere outside the custom parser itself. I've tried putting it into the custom parser itself and started getting compiler errors stating that the column_value_parser object is read-only. Can I somehow put pos into the parser structure?
Simplified code that gets the compile-time error, with commented out parts of my working version:
#include <iostream>
#include <variant>
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/support.hpp>
namespace helpers {
// https://bitbashing.io/std-visit.html
template<class... Ts> struct overloaded : Ts... { using Ts::operator()...; };
template<class... Ts> overloaded(Ts...) -> overloaded<Ts...>;
}
auto const unquoted_text_field = *(boost::spirit::x3::char_ - ',' - boost::spirit::x3::eol);
struct text { };
struct integer { };
struct real { };
struct skip { };
typedef std::variant<text, integer, real, skip> column_variant;
struct column_value_parser : boost::spirit::x3::parser<column_value_parser> {
typedef boost::spirit::unused_type attribute_type;
std::vector<column_variant>& columns;
// size_t& pos;
size_t pos;
// column_value_parser(std::vector<column_variant>& columns, size_t& pos)
column_value_parser(std::vector<column_variant>& columns)
: columns(columns)
// , pos(pos)
, pos(0)
{ }
template<typename It, typename Ctx, typename Other, typename Attr>
bool parse(It& f, It l, Ctx& ctx, Other const& other, Attr& attr) const {
auto const saved_f = f;
bool successful = false;
visit(
helpers::overloaded {
[&](skip const&) {
successful = boost::spirit::x3::parse(f, l, boost::spirit::x3::omit[unquoted_text_field]);
},
[&](text& c) {
std::string value;
successful = boost::spirit::x3::parse(f, l, unquoted_text_field, value);
if(successful) {
std::cout << "Text: " << value << '\n';
}
},
[&](integer& c) {
int value;
successful = boost::spirit::x3::parse(f, l, boost::spirit::x3::int_, value);
if(successful) {
std::cout << "Integer: " << value << '\n';
}
},
[&](real& c) {
double value;
successful = boost::spirit::x3::parse(f, l, boost::spirit::x3::double_, value);
if(successful) {
std::cout << "Real: " << value << '\n';
}
}
},
columns[pos]);
if(successful) {
pos = (pos + 1) % columns.size();
return true;
} else {
f = saved_f;
return false;
}
}
};
int main(int argc, char *argv[])
{
std::string input = "Hello,1,13.7,XXX\nWorld,2,1e3,YYY";
// Comes from external source.
std::vector<column_variant> columns = {text{}, integer{}, real{}, skip{}};
size_t pos = 0;
boost::spirit::x3::parse(
input.begin(), input.end(),
// (column_value_parser(columns, pos) % ',') % boost::spirit::x3::eol);
(column_value_parser(columns) % ',') % boost::spirit::x3::eol);
}
XY: My goal is to parse ~500 GB of pseudo-CSV files in a reasonable time on a machine with little RAM, convert into a list of (roughly) [row-number, column-name, value], then put into storage. The format is actually a little more complex than CSV: database dumps formatted in… human-friendly way, with column values being actually several small sublangauges (e.g. dates or, uh, something similar to whole apache log lines stuffed into a single field), and I'm often extracting only one specific part of each column. Different files may have different columns and in different order, which I can only learn by parsing yet another set of files containing original queries. Thankfully, Spirit makes it a breeze…
Three answers:
The easiest fix is to make pos a mutable member
The X3 hardcore answer is x3::with<>
Functional composition
1. Making pos mutable
Live On Wandbox
#include <iostream>
#include <variant>
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/support.hpp>
namespace helpers {
// https://bitbashing.io/std-visit.html
template<class... Ts> struct overloaded : Ts... { using Ts::operator()...; };
template<class... Ts> overloaded(Ts...) -> overloaded<Ts...>;
}
auto const unquoted_text_field = *(boost::spirit::x3::char_ - ',' - boost::spirit::x3::eol);
struct text { };
struct integer { };
struct real { };
struct skip { };
typedef std::variant<text, integer, real, skip> column_variant;
struct column_value_parser : boost::spirit::x3::parser<column_value_parser> {
typedef boost::spirit::unused_type attribute_type;
std::vector<column_variant>& columns;
size_t mutable pos = 0;
struct pos_tag;
column_value_parser(std::vector<column_variant>& columns)
: columns(columns)
{ }
template<typename It, typename Ctx, typename Other, typename Attr>
bool parse(It& f, It l, Ctx& /*ctx*/, Other const& /*other*/, Attr& /*attr*/) const {
auto const saved_f = f;
bool successful = false;
visit(
helpers::overloaded {
[&](skip const&) {
successful = boost::spirit::x3::parse(f, l, boost::spirit::x3::omit[unquoted_text_field]);
},
[&](text&) {
std::string value;
successful = boost::spirit::x3::parse(f, l, unquoted_text_field, value);
if(successful) {
std::cout << "Text: " << value << '\n';
}
},
[&](integer&) {
int value;
successful = boost::spirit::x3::parse(f, l, boost::spirit::x3::int_, value);
if(successful) {
std::cout << "Integer: " << value << '\n';
}
},
[&](real&) {
double value;
successful = boost::spirit::x3::parse(f, l, boost::spirit::x3::double_, value);
if(successful) {
std::cout << "Real: " << value << '\n';
}
}
},
columns[pos]);
if(successful) {
pos = (pos + 1) % columns.size();
return true;
} else {
f = saved_f;
return false;
}
}
};
int main() {
std::string input = "Hello,1,13.7,XXX\nWorld,2,1e3,YYY";
std::vector<column_variant> columns = {text{}, integer{}, real{}, skip{}};
boost::spirit::x3::parse(
input.begin(), input.end(),
(column_value_parser(columns) % ',') % boost::spirit::x3::eol);
}
2. x3::with<>
This is similar but with better (re)entrancy and encapsulation:
Live On Wandbox
#include <iostream>
#include <variant>
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/support.hpp>
namespace helpers {
// https://bitbashing.io/std-visit.html
template<class... Ts> struct overloaded : Ts... { using Ts::operator()...; };
template<class... Ts> overloaded(Ts...) -> overloaded<Ts...>;
}
auto const unquoted_text_field = *(boost::spirit::x3::char_ - ',' - boost::spirit::x3::eol);
struct text { };
struct integer { };
struct real { };
struct skip { };
typedef std::variant<text, integer, real, skip> column_variant;
struct column_value_parser : boost::spirit::x3::parser<column_value_parser> {
typedef boost::spirit::unused_type attribute_type;
std::vector<column_variant>& columns;
column_value_parser(std::vector<column_variant>& columns)
: columns(columns)
{ }
template<typename It, typename Ctx, typename Other, typename Attr>
bool parse(It& f, It l, Ctx const& ctx, Other const& /*other*/, Attr& /*attr*/) const {
auto const saved_f = f;
bool successful = false;
size_t& pos = boost::spirit::x3::get<pos_tag>(ctx).value;
visit(
helpers::overloaded {
[&](skip const&) {
successful = boost::spirit::x3::parse(f, l, boost::spirit::x3::omit[unquoted_text_field]);
},
[&](text&) {
std::string value;
successful = boost::spirit::x3::parse(f, l, unquoted_text_field, value);
if(successful) {
std::cout << "Text: " << value << '\n';
}
},
[&](integer&) {
int value;
successful = boost::spirit::x3::parse(f, l, boost::spirit::x3::int_, value);
if(successful) {
std::cout << "Integer: " << value << '\n';
}
},
[&](real&) {
double value;
successful = boost::spirit::x3::parse(f, l, boost::spirit::x3::double_, value);
if(successful) {
std::cout << "Real: " << value << '\n';
}
}
},
columns[pos]);
if(successful) {
pos = (pos + 1) % columns.size();
return true;
} else {
f = saved_f;
return false;
}
}
template <typename T>
struct Mutable { T mutable value; };
struct pos_tag;
auto invoke() const {
return boost::spirit::x3::with<pos_tag>(Mutable<size_t>{}) [ *this ];
}
};
int main() {
std::string input = "Hello,1,13.7,XXX\nWorld,2,1e3,YYY";
std::vector<column_variant> columns = {text{}, integer{}, real{}, skip{}};
column_value_parser p(columns);
boost::spirit::x3::parse(
input.begin(), input.end(),
(p.invoke() % ',') % boost::spirit::x3::eol);
}
3. Functional Composition
Because it's so much easier in X3, my favourite is to just generate the parser on demand.
Without requirements, this is the simplest I'd propose:
Live On Wandbox
#include <boost/spirit/home/x3.hpp>
namespace x3 = boost::spirit::x3;
namespace CSV {
struct text { };
struct integer { };
struct real { };
struct skip { };
auto const unquoted_text_field = *~x3::char_(",\n");
static inline auto as_parser(skip) { return x3::omit[unquoted_text_field]; }
static inline auto as_parser(text) { return unquoted_text_field; }
static inline auto as_parser(integer) { return x3::int_; }
static inline auto as_parser(real) { return x3::double_; }
template <typename... Spec>
static inline auto line_parser(Spec... spec) {
auto delim = ',' | &(x3::eoi | x3::eol);
return ((as_parser(spec) >> delim) >> ... >> x3::eps);
}
template <typename... Spec> static inline auto csv_parser(Spec... spec) {
return line_parser(spec...) % x3::eol;
}
}
#include <iostream>
#include <iomanip>
using namespace CSV;
int main() {
std::string const input = "Hello,1,13.7,XXX\nWorld,2,1e3,YYY";
auto f = begin(input), l = end(input);
auto p = csv_parser(text{}, integer{}, real{}, skip{});
if (parse(f, l, p)) {
std::cout << "Parsed\n";
} else {
std::cout << "Failed\n";
}
if (f!=l) {
std::cout << "Remaining: " << std::quoted(std::string(f,l)) << "\n";
}
}
A version with debug information enabled:
Live On Wandbox
<line>
<try>Hello,1,13.7,XXX\nWor</try>
<CSV::text>
<try>Hello,1,13.7,XXX\nWor</try>
<success>,1,13.7,XXX\nWorld,2,</success>
</CSV::text>
<CSV::integer>
<try>1,13.7,XXX\nWorld,2,1</try>
<success>,13.7,XXX\nWorld,2,1e</success>
</CSV::integer>
<CSV::real>
<try>13.7,XXX\nWorld,2,1e3</try>
<success>,XXX\nWorld,2,1e3,YYY</success>
</CSV::real>
<CSV::skip>
<try>XXX\nWorld,2,1e3,YYY</try>
<success>\nWorld,2,1e3,YYY</success>
</CSV::skip>
<success>\nWorld,2,1e3,YYY</success>
</line>
<line>
<try>World,2,1e3,YYY</try>
<CSV::text>
<try>World,2,1e3,YYY</try>
<success>,2,1e3,YYY</success>
</CSV::text>
<CSV::integer>
<try>2,1e3,YYY</try>
<success>,1e3,YYY</success>
</CSV::integer>
<CSV::real>
<try>1e3,YYY</try>
<success>,YYY</success>
</CSV::real>
<CSV::skip>
<try>YYY</try>
<success></success>
</CSV::skip>
<success></success>
</line>
Parsed
Notes, Caveats:
With anything mutable, beware of side-effects. E.g. if you have a | b and a includes column_value_parser, the side-effect of incrementing pos will not be rolled back when a fails and b is matched instead.
In short, this makes your parse function impure.
Related
Hi I'm using boost::pfr for basic reflection, it works fine, but the problem is it is only print or deal with the field values, like with boost::pfr::io it prints each member of the struct, but how can I print it as name value pairs, same issue with for_each_field, the functor only accepts values, but not names. How can I get the field names?
struct S {
int n;
std::string name;
};
S o{1, "foo"};
std::cout << boost::pfr::io(o);
// Outputs: {1, "foo"}, how can I get n = 1, name = "foo"?
If you think adapting a struct is not too intrusive (it doesn't change your existing definitions, and you don't even need to have it in a public header):
BOOST_FUSION_ADAPT_STRUCT(S, n, name)
Then you can concoct a general operator<< for sequences:
namespace BF = boost::fusion;
template <typename T,
typename Enable = std::enable_if_t<
// BF::traits::is_sequence<T>::type::value>
std::is_same_v<BF::struct_tag, typename BF::traits::tag_of<T>::type>>>
std::ostream& operator<<(std::ostream& os, T const& v)
{
bool first = true;
auto visitor = [&]<size_t I>() {
os << (std::exchange(first, false) ? "" : ", ")
<< BF::extension::struct_member_name<T, I>::call()
<< " = " << BF::at_c<I>(v);
};
// visit members
[&]<size_t... II>(std::index_sequence<II...>)
{
return ((visitor.template operator()<II>(), ...);
}
(std::make_index_sequence<BF::result_of::size<T>::type::value>{});
return os;
}
(Prior to c++20 this would require some explicit template types instead of the lambdas, perhaps making it more readable. I guess I'm lazy...)
Here's a live demo: Live On Compiler Explorer
n = 1, name = foo
Bonus: Correctly quoting string-like types
Live On Compiler Explorer
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/for_each.hpp>
#include <boost/fusion/include/at_c.hpp>
#include <iostream>
#include <iomanip>
namespace MyLib {
struct S {
int n;
std::string name;
};
namespace BF = boost::fusion;
static auto inline pretty(std::string_view sv) { return std::quoted(sv); }
template <typename T,
typename Enable = std::enable_if_t<
not std::is_constructible_v<std::string_view, T const&>>>
static inline T const& pretty(T const& v)
{
return v;
}
template <typename T,
typename Enable = std::enable_if_t<
// BF::traits::is_sequence<T>::type::value>
std::is_same_v<BF::struct_tag, typename BF::traits::tag_of<T>::type>>>
std::ostream& operator<<(std::ostream& os, T const& v)
{
bool first = true;
auto visitor = [&]<size_t I>() {
os << (std::exchange(first, false) ? "" : ", ")
<< BF::extension::struct_member_name<T, I>::call()
<< " = " << pretty(BF::at_c<I>(v));
};
// visit members
[&]<size_t... II>(std::index_sequence<II...>)
{
return (visitor.template operator()<II>(), ...);
}
(std::make_index_sequence<BF::result_of::size<T>::type::value>{});
return os;
}
} // namespace MyLib
BOOST_FUSION_ADAPT_STRUCT(MyLib::S, n, name)
int main()
{
MyLib::S o{1, "foo"};
std::cout << o << "\n";
}
Outputs:
n = 1, name = "foo"
The library cannot offer any such functionality because it is currently impossible to obtain the name of a member of a class as value of an object.
If you want to output field names, you need to declare string objects mapped with the members and implement a operator<< which uses these strings manually.
To do this a more sophisticated reflection library would probably offer macros to use in the definition of the members. Macros can expand their argument(s) into a declaration using the provided name as identifier while also producing code using the name as string literal (via the # macro replacement operator).
It's stupid but hey, with a stringifying macro per field it could be enough for you.
C++14, no additional library
#include <boost/pfr.hpp>
struct S
{
int n;
std::string name;
static char const* const s_memNames[2];
};
char const* const S::s_memNames[2] = {"n", "name"};
// utility
template< size_t I, typename TR >
char const* MemberName()
{
using T = std::remove_reference_t<TR>;
if (I < std::size(T::s_memNames))
return T::s_memNames[I];
return nullptr;
}
// test:
#include <iostream>
using std::cout;
template< size_t I, typename T >
void StreamAt(T&& inst)
{
char const* n = MemberName<I,T>();
auto& v = boost::pfr::get<I>(inst);
cout << "(" << n << " = " << v << ")";
}
int main()
{
S s{2, "boo"};
boost::pfr::for_each_field(s, [&](const auto&, auto I)
{
StreamAt<decltype(I)::value>(s);
cout << "\n";
});
}
output:
(n = 2)
(name = boo)
(previous version of the suggestion, this one has more fluff so less interesting)
#include <boost/pfr.hpp>
// library additions:
static char const* g_names[100];
template< size_t V >
struct Id : std::integral_constant<size_t, V > {};
template< size_t I, typename T >
using TypeAt = boost::pfr::tuple_element_t<I, T>;
template<std::size_t Pos, class Struct>
constexpr int Ni() // name index
{
return std::tuple_element_t<Pos, typename std::remove_reference_t<Struct>::NamesAt >::value;
}
struct StaticCaller
{
template< typename Functor >
StaticCaller(Functor f) { f();}
};
///
/// YOUR CODE HERE
struct S
{
using NamesAt = std::tuple<Id<__COUNTER__>, Id<__COUNTER__>>; // add this
int n;
std::string name;
static void Init() // add this
{
g_names[Ni<0,S>()] = "n";
g_names[Ni<1,S>()] = "name";
}
};
StaticCaller g_sc__LINE__(S::Init); // add this
// utilities
template< size_t I, typename T >
auto GetValueName(T&& inst)
{
return std::make_pair(boost::pfr::get<I>(inst), g_names[Ni<I,T>()]);
}
// test:
#include <iostream>
using std::cout;
template< size_t I, typename T >
void StreamAt(T&& inst)
{
auto const& [v,n] = GetValueName<I>(inst);
cout << "(" << v << ", " << n << ")";
}
int main()
{
S s{2, "boo"};
boost::pfr::for_each_field(s, [&](const auto&, auto I)
{
StreamAt<decltype(I)::value>(s);
cout << "\n";
});
}
output
(2, n)
(boo, name)
The boost::spirit::x3 error handling utilities allow for the user to choose what is shown to the user when an expectation failure occurs. This, however, does not seem to be the case for the line number portion of the message, which is exactly what I'd like to modify. So instead of it printing out In line 1: etc. I would like to print some other message in it's place with the same line number info. Anyone know how I could do that, or if it is even modifiable in the first place?
EDIT:
Here's the code straight from https://www.boost.org/doc/libs/1_68_0/libs/spirit/doc/x3/html/spirit_x3/tutorials/error_handling.html:
struct error_handler
{
template <typename Iterator, typename Exception, typename Context>
x3::error_handler_result on_error(
Iterator& first, Iterator const& last
, Exception const& x, Context const& context)
{
auto& error_handler = x3::get<x3::error_handler_tag>(context).get();
std::string message = "Error! Expecting: " + x.which() + " here:";
error_handler(x.where(), message);
return x3::error_handler_result::fail;
}
};
In addition to the on_error function printing out the message, it prints "In line x: ", where x is the line number. I really can't have that, it does not fit in with my project in the slightest.
Wow. First of all, I did not know all details about that example and x3::error_handler<>.
For a good break-down of how to provide error handling/diagnostic messages in X3 from basic principles, see this walk-through: Spirit X3, Is this error handling approach useful?
Traditionally (as in Qi) we would do the position tracking using an iterator adaptor:
Get current line in boost spirit grammar or Cross-platform way to get line number of an INI file where given option was found
or even the classic version of this How to pass the iterator to a function in spirit qi
At first glance it looks like the position_cache can be used separately (see eg. Boost Spirit x3 not compiling).
However, it turns out that - sadly - x3::annotate_on_success conflated the annotation task with error-handling, by assuming that position cache will always live inside the error handler. This at once means:
the error handler is more complicated than strictly required
this compounds with the fact that x3::error_handler<> is not well-suited for inheritance (due to private members and tricky to unambiguously overload operator() while keeping some overloads)
x3::annotate_on_success is simply not available to you unless you at least have a no-op error-handler like (Live On Coliru)
template <typename It> struct dummy_handler_for_annotate_on_success {
x3::position_cache<std::vector<It> > pos_cache;
dummy_handler_for_annotate_on_success(It f, It l) : pos_cache(f,l) {}
template <typename T> void tag(T& ast, It first, It last) {
return pos_cache.annotate(ast, first, last);
}
};
and have that present in the context under the x3::error_handler_tag for annotate_on_success to work.
On the positive, this does have the benefit of not requiring two separate context injections, like:
auto const parser
= x3::with<x3::position_cache_tag>(std::ref(pos_cache)) [
x3::with<x3::error_handler_tag>(error_handler)
[ parser::employees ]
]
;
So, here's my take on providing a custom error-handler implementation. I simplified it a bit from the built-in version¹.
One simplification is also an optimization, resting on the assumption that the iterator type is bidirectional. If not, I think you'd be better off using spirit::line_pos_iterator<> as linked above.
template <typename It> class diagnostics_handler {
x3::position_cache<std::vector<It> > _pos_cache;
std::ostream& _os;
public:
diagnostics_handler(It f, It l, std::ostream& os) : _pos_cache(f, l), _os(os) {}
void operator()(x3::position_tagged const& ast, std::string const& error_message) const {
auto where = _pos_cache.position_of(ast);
operator()(where.begin(), where.end(), error_message);
}
void operator()(It err_first, std::string const& error_message) const {
operator()(err_first, boost::none, error_message);
}
void operator()(It err_first, boost::optional<It> err_last, std::string const& error_message) const {
auto first = _pos_cache.first(),
last = _pos_cache.last();
while (err_first != last && std::isspace(*err_first))
++err_first;
_os << "L:"<< line_number(err_first) << " "
<< error_message << std::endl;
It cursor = get_line_start(first, err_first);
print_line(cursor, last);
auto score = [&](It& it, char fill) -> auto& {
auto f = _os.fill();
auto n = std::distance(cursor, it);
cursor = it;
return _os << std::setfill(fill) << std::setw(n) << "" << std::setfill(f);
};
if (err_last.has_value()) {
score(err_first, ' ');
score(*err_last, '~') << " <<-- Here" << std::endl;
} else {
score(err_first, '_') << "^_" << std::endl;
}
}
template <typename AST> void tag(AST& ast, It first, It last) {
return _pos_cache.annotate(ast, first, last);
}
auto const& get_position_cache() const { return _pos_cache; }
private:
static constexpr std::array crlf { '\r', '\n' };
auto get_line_start(It first, It pos) const {
return std::find_first_of( // assumed bidir iterators
std::make_reverse_iterator(pos), std::make_reverse_iterator(first),
crlf.begin(), crlf.end()
).base();
}
auto line_number(It i) const {
return 1 + std::count(_pos_cache.first(), i, '\n');
}
void print_line(It f, It l) const {
std::basic_string s(f, std::find_first_of(f, l, crlf.begin(), crlf.end()));
_os << boost::locale::conv::utf_to_utf<char>(s) << std::endl;
}
};
Which you can then demo like Live On Coliru
custom::diagnostics_handler<It> diags(iter, end, std::clog);
auto const parser
= x3::with<x3::error_handler_tag>(std::ref(diags))
[ parser::employees ]
;
std::vector<ast::employee> ast;
if (phrase_parse(iter, end, parser >> x3::eoi, x3::space, ast)) {
std::cout << "Parsing succeeded\n";
for (auto const& emp : ast) {
std::cout << "got: " << emp << std::endl;
diags(emp.who.last_name, "note: that's a nice last name");
diags(emp.who, "warning: the whole person could be nice?");
}
} ...
Which prints:
With custom diagnostics only:
Parsing succeeded
got: (23 (Amanda Stefanski) 1000.99)
L:1 note: that's a nice last name
{ 23, "Amanda", "Stefanski", 1000.99 },
~~~~~~~~~~~ <<-- Here
L:1 warning: the whole person could be nice?
{ 23, "Amanda", "Stefanski", 1000.99 },
~~~~~~~~~~~~~~~~~~~~~ <<-- Here
got: (35 (Angie Chilcote) 2000.99)
L:2 note: that's a nice last name
{ 35, "Angie", "Chilcote", 2000.99 }
~~~~~~~~~~ <<-- Here
L:2 warning: the whole person could be nice?
{ 35, "Angie", "Chilcote", 2000.99 }
~~~~~~~~~~~~~~~~~~~ <<-- Here
----- Now with parse error:
L:3 error: expecting: person
'Amanda', "Stefanski", 1000.99 },
_^_
Parsing failed
Simplifying Down
By breaking the false coupling between annotate_on_success and x3::error_handler_tag context, you could slim it down, a lot:
template <typename It> struct diagnostics_handler {
It _first, _last;
std::ostream& _os;
void operator()(It err_first, std::string const& error_message) const {
size_t line_no = 1;
auto bol = _first;
for (auto it = bol; it != err_first; ++it)
if (*it == '\n') {
bol = it+1;
line_no += 1;
}
_os << "L:" << line_no
<< ":" << std::distance(bol, err_first)
<< " " << error_message << "\n";
}
};
See it Live On Coliru
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/position_tagged.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>
#include <iostream>
#include <iomanip>
#include <string>
namespace x3 = boost::spirit::x3;
namespace ast {
struct name : std::string, x3::position_tagged {
using std::string::string;
using std::string::operator=;
};
struct person : x3::position_tagged { ast::name first_name, last_name; };
struct employee : x3::position_tagged { int age; person who; double salary; };
using boost::fusion::operator<<;
}
BOOST_FUSION_ADAPT_STRUCT(ast::person, first_name, last_name)
BOOST_FUSION_ADAPT_STRUCT(ast::employee, age, who, salary)
namespace custom {
struct diagnostics_handler_tag;
template <typename It> struct diagnostics_handler {
It _first, _last;
std::ostream& _os;
void operator()(It err_first, std::string const& error_message) const {
size_t line_no = 1;
auto bol = _first;
for (auto it = bol; it != err_first; ++it)
if (*it == '\n') {
bol = it+1;
line_no += 1;
}
_os << "L:"<< line_no
<< ":" << std::distance(bol, err_first)
<< " " << error_message << "\n";
}
};
} // namespace custom
namespace parser {
namespace x3 = boost::spirit::x3;
namespace ascii = boost::spirit::x3::ascii;
struct error_handler {
template <typename It, typename E, typename Ctx>
x3::error_handler_result on_error(It&, It const&, E const& x, Ctx const& ctx) {
auto& handler = x3::get<custom::diagnostics_handler_tag>(ctx);
handler(x.where(), "error: expecting: " + x.which());
return x3::error_handler_result::fail;
}
};
struct annotate_position {
template <typename T, typename Iterator, typename Context>
inline void on_success(const Iterator &first, const Iterator &last, T &ast, const Context &context)
{
auto &position_cache = x3::get<annotate_position>(context).get();
position_cache.annotate(ast, first, last);
}
};
struct quoted_string_class : annotate_position {};
struct person_class : annotate_position {};
struct employee_class : error_handler, annotate_position {};
x3::rule<quoted_string_class, ast::name> const name = "name";
x3::rule<person_class, ast::person> const person = "person";
x3::rule<employee_class, ast::employee> const employee = "employee";
auto const name_def
= x3::lexeme['"' >> +(x3::char_ - '"') >> '"']
;
auto const person_def
= name > ',' > name
;
auto const employee_def
= '{' > x3::int_ > ',' > person > ',' > x3::double_ > '}'
;
BOOST_SPIRIT_DEFINE(name, person, employee)
auto const employees = employee >> *(',' >> employee);
}
void parse(std::string const& input) {
using It = std::string::const_iterator;
It iter = input.begin(), end = input.end();
x3::position_cache<std::vector<It> > pos_cache(iter, end);
custom::diagnostics_handler<It> diags { iter, end, std::clog };
auto const parser =
x3::with<parser::annotate_position>(std::ref(pos_cache)) [
x3::with<custom::diagnostics_handler_tag>(diags) [
parser::employees
]
];
std::vector<ast::employee> ast;
if (phrase_parse(iter, end, parser >> x3::eoi, x3::space, ast)) {
std::cout << "Parsing succeeded\n";
for (auto const& emp : ast) {
std::cout << "got: " << emp << std::endl;
diags(pos_cache.position_of(emp.who.last_name).begin(), "note: that's a nice last name");
diags(pos_cache.position_of(emp.who).begin(), "warning: the whole person could be nice?");
}
} else {
std::cout << "Parsing failed\n";
ast.clear();
}
}
static std::string const
good_input = R"({ 23, "Amanda", "Stefanski", 1000.99 },
{ 35, "Angie", "Chilcote", 2000.99 }
)",
bad_input = R"(
{ 23,
'Amanda', "Stefanski", 1000.99 },
)";
int main() {
std::cout << "With custom diagnostics only:" << std::endl;
parse(good_input);
std::cout << "\n\n ----- Now with parse error:" << std::endl;
parse(bad_input);
}
Prints:
With custom diagnostics only:
Parsing succeeded
got: (23 (Amanda Stefanski) 1000.99)
L:1:16 note: that's a nice last name
L:1:6 warning: the whole person could be nice?
got: (35 (Angie Chilcote) 2000.99)
L:2:23 note: that's a nice last name
L:2:14 warning: the whole person could be nice?
----- Now with parse error:
L:2:13 error: expecting: person
Parsing failed
¹ also fixed a bug that causes diagnostics to display wrongly on the first line(?) with x3::error_handler<> implementation
I have a template class with 3 template arguments.
template <class T, class U, class Y>
class MyClass {};
I wanna get input from users by CLI arguments, something like ./cli float driver-x load
The first arg can be float or double
The second arg is a driver name: driver-x, driver-y, ...
The third argument is about the action type: load, unload, ...
If I want to create a new instance of MyClass based on user inputs, I have to define many if/else statements. Because a user inputs are string and I have to prepare a condition on them.
So, it will be something like this:
if (data_type == "float")
if (driver == "driver-x")
if (action == "load")
MyClass<float, DriverX, Load> t;
t......
As far as I know, it's impossible to store a type in a variable in C++.
So, is there any way exists to improve the if/else statements? Something like:
if (data_type == "float")
//
if (driver == "driver-x")
//
if (action == "load")
//
MyClass<......> t;
t.....;
Or any other way?
I'm looking for a way to improve these if/else statements.
Here's my take
template<typename T>
struct proxy { // or std::type_identity
using type = T;
};
template<typename... Ts>
using choice_of = std::variant<proxy<Ts>...>;
template<typename T, typename>
using type_const_t = T;
template<typename T, typename... Ts>
std::optional<choice_of<T, Ts...>> choose(std::string const &choice, std::string const &head, type_const_t<std::string const&, Ts>... tail) noexcept {
if(choice == head) return proxy<T>{};
else if constexpr(sizeof...(Ts) == 0) return std::nullopt;
else if(auto rec = choose<Ts...>(choice, tail...)) return std::visit(
[](auto rec) -> choice_of<T, Ts...> { return rec; },
*rec);
else return std::nullopt;
}
auto data_choice = choose<float, double>(data_type, "float", "double");
auto driver_choice = choose<DriverX, DriverY>(driver, "driver-x", "driver-y");
auto action_choice = choose<Load, Unload>(action, "load", "unload");
std::visit([](auto data_type_p, auto driver_p, auto action_p) {
auto t = MyClass<typename decltype(data_type_p)::type, typename decltype(driver_p)::type, typename decltype(action_p)::type>{};
// do stuff with t
}, data_choice.value(), driver_choice.value(), action_choice.value());
Complete example on Godbolt
You can build some machinery to do this for you, extracting it into a function call.
For example, here I build a tuple which contains strings and types, then I check a passed string against all of them:
#include <string_view>
#include <cstddef>
#include <tuple>
#include <utility>
#include <type_traits>
template<class T>
struct mapped_type {
const std::string_view key;
using type = T;
explicit constexpr operator bool() const noexcept {
return true;
}
};
namespace detail {
template<class K, class F, class M, std::size_t I>
constexpr void lookup_impl(const K& key, F&& f, M&& m, std::integral_constant<std::size_t, I>) {
using tuple_t = typename std::remove_cv<typename std::remove_reference<M>::type>::type;
if constexpr (I < std::tuple_size<tuple_t>::value) {
const auto& mapping = std::get<I>(m);
if (mapping.key == key) {
std::forward<F>(f)(mapping);
return;
}
lookup_impl(key, std::forward<F>(f), std::forward<M>(m), std::integral_constant<std::size_t, I + 1>{});
} else {
std::forward<F>(f)(std::false_type{});
}
}
}
// Calls `f` with the first value from `m` that matches the key
// or `std::false_type{}` if no key matches.
template<class K, class F, class M>
constexpr void lookup(const K& key, F&& f, M&& m) {
detail::lookup_impl(key, std::forward<F>(f), std::forward<M>(m), std::integral_constant<std::size_t, 0>{});
}
// This is our mapping for the first argument
inline constexpr auto data_type_map = std::make_tuple(
mapped_type<float>{ "float" },
mapped_type<double>{ "double" }
);
// Example usage
#include <iostream>
int main() {
const char* s = "float";
lookup(s, [](const auto& arg) {
if constexpr (!arg) {
std::cout << "Invalid type\n";
} else {
using type = typename std::remove_cv<typename std::remove_reference<decltype(arg)>::type>::type::type;
std::cout << "Got type: " << typeid(type).name() << '\n';
}
}, data_type_map);
}
And then you can call this recursively inside the lambda.
You could also create a version that takes a tuple of keys and a tuple of values to call one function with many arguments:
#include <string_view>
#include <tuple>
#include <utility>
#include <type_traits>
template<class T>
struct mapped_type {
const std::string_view key;
using type = T;
explicit constexpr operator bool() const noexcept {
return true;
}
};
namespace detail {
template<class K, class F, class M, std::size_t I>
constexpr void lookup_impl(F&& f, const K& key, M&& m, std::integral_constant<std::size_t, I>) {
using tuple_t = typename std::remove_cv<typename std::remove_reference<M>::type>::type;
if constexpr (I < std::tuple_size<tuple_t>::value) {
const auto& mapping = std::get<I>(m);
if (mapping.key == key) {
std::forward<F>(f)(mapping);
return;
}
lookup_impl(std::forward<F>(f), key, std::forward<M>(m), std::integral_constant<std::size_t, I + 1>{});
} else {
std::forward<F>(f)(std::false_type{});
}
}
template<class F, class K, class M, std::size_t I>
constexpr void multilookup_impl(F&& f, const K& keys, M&& mappings, std::integral_constant<std::size_t, I>) {
constexpr std::size_t size = std::tuple_size<typename std::remove_cv<typename std::remove_reference<K>::type>::type>::value;
if constexpr (I >= size) {
std::forward<F>(f)();
} else {
lookup_impl([&](const auto& current_lookup) {
multilookup_impl(
[&](const auto&... args) { std::forward<F>(f)(current_lookup, args...); },
keys, mappings, std::integral_constant<std::size_t, I + 1>{}
);
}, std::get<I>(keys), std::get<I>(mappings), std::integral_constant<std::size_t, 0>{});
}
}
}
template<class F, class K, class M>
constexpr void lookup(F&& f, const K& keys, M&& mappings) {
using map_tuple_t = typename std::remove_cv<typename std::remove_reference<M>::type>::type;
using key_tuple_t = typename std::remove_cv<typename std::remove_reference<K>::type>::type;
constexpr std::size_t size = std::tuple_size<key_tuple_t>::value;
static_assert(size == std::tuple_size<map_tuple_t>::value, "Wrong number of keys for given number of maps");
detail::multilookup_impl(std::forward<F>(f), keys, mappings, std::integral_constant<std::size_t, 0>{});
}
Which looks almost the same, but there's one more level of calls.
It would be used like this:
#include <iostream>
inline constexpr auto data_type_map = std::make_tuple(
mapped_type<float>{ "float" },
mapped_type<double>{ "double" }
);
inline constexpr auto driver_type_map = std::make_tuple(
mapped_type<DriverX>{ "driver-x" },
mapped_type<DriverY>{ "driver-y" }
);
inline constexpr auto action_type_map = std::make_tuple(
mapped_type<Load>{ "load" },
mapped_type<Unload>{ "unload" }
);
int main() {
const char* a = "float";
const char* b = "driver-x";
const char* c = "load";
lookup([](const auto& data, const auto& driver, const auto& action) {
if constexpr (!data) {
std::cout << "Could not parse data!\n";
} else if constexpr (!driver) {
std::cout << "Could not parse driver!\n";
} else if constexpr (!action) {
std::cout << "Could not parse action!\n";
} else {
using data_type = typename std::remove_cv<typename std::remove_reference<decltype(data)>::type>::type::type;
using driver_type = typename std::remove_cv<typename std::remove_reference<decltype(driver)>::type>::type::type;
using action_type = typename std::remove_cv<typename std::remove_reference<decltype(action)>::type>::type::type;
MyClass<data_type, driver_type, action_type> t;
std::cout << "Constructed a " << typeid(decltype(t)).name() << '\n';
}
},
std::array<const char*, 3>{ a, b, c },
std::forward_as_tuple(data_type_map, driver_type_map, action_type_map)
);
}
I think you are looking for something like X-macros:
#define YOUR_TABLE \
X(float, DriverX, "driver-x", Load) \
X(int, DriverY, "driver-y", action2) \
X(int, DriverY, "driver-y", action3)
#define X(data_type, driver, driverName, action) if((0 == strcmp(#data_type,argv[1])) \
&& (0 == strcmp(driverName,argv[2])) && (0 == strcmp(#action,argv[3])))\
{ \
MyClass<data_type, driver, action> t; \
t.... \
}
YOUR_TABLE
#undef X
Prepare your puke-bag, here is a far-from-elegant solution but
simple enough to be easily adapted.
The main drawback I see is that all the remaining of the application
that needs to work with the created instance must stand in a
lambda-closure (this solution does not return this instance).
Every possible argument is considered only once in a
dedicated function (not X times Y times Z if/else).
/**
g++ -std=c++17 -o prog_cpp prog_cpp.cpp \
-pedantic -Wall -Wextra -Wconversion -Wno-sign-conversion \
-g -O0 -UNDEBUG -fsanitize=address,undefined
**/
#include <iostream>
#include <string>
#include <stdexcept>
//----------------------------------------------------------------------------
struct DriverX { auto show() const { return "DriverX"; } };
struct DriverY { auto show() const { return "DriverY"; } };
struct Load { auto show() const { return "Load"; } };
struct Unload { auto show() const { return "UnLoad"; } };
template<typename RealType,
typename DriverType,
typename ActionType>
struct MyClass
{
RealType real{};
DriverType driver{};
ActionType action{};
auto show() const
{
return std::to_string(sizeof(real))+" bytes real, "+
driver.show()+", "+action.show();
}
};
//----------------------------------------------------------------------------
template<typename RealType,
typename DriverType,
typename DoEverythingFunction>
void
with_MyClass_3(const std::string &action,
DoEverythingFunction fnct)
{
if(action=="load")
{
return fnct(MyClass<RealType, DriverType, Load>{});
}
if(action=="unload")
{
return fnct(MyClass<RealType, DriverType, Unload>{});
}
throw std::runtime_error{"unexpected action: "+action};
}
template<typename RealType,
typename DoEverythingFunction>
void
with_MyClass_2(const std::string &driver,
const std::string &action,
DoEverythingFunction fnct)
{
if(driver=="driver-x")
{
return with_MyClass_3<RealType, DriverX>(action, fnct);
}
if(driver=="driver-y")
{
return with_MyClass_3<RealType, DriverY>(action, fnct);
}
throw std::runtime_error{"unexpected driver: "+driver};
}
template<typename DoEverythingFunction>
void
with_MyClass(const std::string &real,
const std::string &driver,
const std::string &action,
DoEverythingFunction fnct)
{
if(real=="float")
{
return with_MyClass_2<float>(driver, action, fnct);
}
if(real=="double")
{
return with_MyClass_2<double>(driver, action, fnct);
}
throw std::runtime_error{"unexpected real: "+real};
}
//----------------------------------------------------------------------------
int
main(int argc,
char **argv)
{
std::cout << "~~~~ hardcoded types ~~~~\n";
const MyClass<float, DriverX, Load> mc1;
std::cout << "mc1: " << mc1.show() << '\n';
const MyClass<double, DriverY, Unload> mc2;
std::cout << "mc2: " << mc2.show() << '\n';
std::cout << "\n~~~~ many types ~~~~\n";
for(const auto &real: {"float", "double", "int"})
{
for(const auto &driver: {"driver-x", "driver-y", "driver-z"})
{
for(const auto &action: {"load", "unload", "sleep"})
{
try
{
with_MyClass(real, driver, action,
[&](const auto &mc)
{
std::cout << "working with: " << mc.show() << '\n';
});
}
catch(const std::exception &e)
{
std::cerr << "!!! " << e.what() << " !!!\n";
}
}
}
}
if(argc>3)
{
std::cout << "\n~~~~ from command line ~~~~\n";
try
{
with_MyClass(argv[1], argv[2], argv[3],
[&](const auto &mc)
{
std::cout << "working with: " << mc.show() << '\n';
});
}
catch(const std::exception &e)
{
std::cerr << "!!! " << e.what() << " !!!\n";
}
}
return 0;
}
I was writing a CSV parser and I thought it would be a great idea to put in practice some advanced C++. In particular, there's a useful function to split a line of a CSV file given a delimiter. Although it's a straightfoward function to write, now I want that function to return a tuple with a varying number of arguments and types. For example :
int main() {
auto [a, b, c] = extract<int, std::string, float>("42;hello;3.1415", ';');
std::cout << a << ' ' << b << ' ' << c << std::endl;
}
Should print out :
42 hello 3.1415
So I thought of a variadic template function :
template <typename... T>
std::tuple<T...> extract(const std::string&& str, const char&& delimiter) {
std::tuple<T...> splited_line;
/* ... */
return splited_line;
}
But I can't modify the tuple inside that function with a variable parameter, like so :
std::get<i>(splited_line) // doesn't work
That wasn't a big surprise, I'm quite new to this language. I'm now wondering how to achieve this small function in a elegant way.
Thanks for any help.
You might do something like (I let you implement "parsing" part):
// Parsing parts
std::vector<std::string> split(const std::string& s, char delimiter);
template <typename T>
T ConvertTo(const std::string& s);
// Variadic part
template <typename... Ts, std::size_t ... Is>
std::tuple<Ts...> extract_impl(std::index_sequence<Is...>,
const std::vector<std::string>& v)
{
return { ConvertTo<Ts>(v[Is])... };
}
template <typename... Ts>
std::tuple<Ts...> extract(const std::string& s, char delimiter) {
const auto strings = split(s, delimiter);
if (strings.size() != sizeof...(Ts)) {
// Error handling
// ...
}
return extract_impl<Ts...>(std::index_sequence_for<Ts...>(), strings);
}
template<class F>
auto foreach_argument( F&& f ) {
return [f = std::forward<F>(f)](auto&&...elems) {
( (void)f(elems), ... );
};
}
template <class... Ts>
std::tuple<Ts...> extract(const std::string& str, const char delimiter) {
std::tuple<Ts...> splited_line;
std::size_t i = 0;
std::size_t index = 0;
auto operation = [&](auto&& elem){
if (index == std::string::npos)
return;
auto next = str.find( delimiter, index );
std::string element = str.substr( index, next );
index = next;
// parse the string "element" into the argument "elem"
++i;
};
std::apply(foreach_argument(operation), splitted_line);
return splited_line;
}
this results in default-constructed Ts first, and if the element isn't found it remains default-constructed.
The return value
std::optional<std::tuple<Ts...>>
or throw-if-not-matching options would have a
std::tuple<std::optional<Ts>...>
within the function, and the lambda in apply would .emplace the element when it was found. Then ensure that all elements are valid before returning, else throw or return the empty optional.
Ie, to turn a std::tuple<std::optional<Ts>...>> into a std::tuple<Ts...> something like:
return std::apply( [](auto&&elems){ return std::make_tuple( *elems... ); }, splitted_line );
Okay, thanks to the help of the community, I got my problem solved. Maybe it'll help someone understands variadic template functions, so I'm going to share a working code (based on Adam Nevraumont's code) :
#include <iostream>
#include <string>
#include <tuple>
#include <string_view>
#include <sstream>
template <typename... Ts>
std::tuple<Ts...> extract(std::string_view str, char delimiter = ';') {
size_t idx = 0;
auto pop = [&](auto&& elem) {
auto next = str.find(delimiter, idx);
std::stringstream ss;
ss << str.substr(idx, next - idx);
ss >> elem;
idx = next + 1;
};
std::tuple<Ts...> splited;
std::apply([&](auto&&...elems) { (pop(elems), ...); }, splited);
return splited;
}
int main() {
std::string dataline = "-42;hello;3.1415;c";
auto [i, s, f, c] = extract<int, std::string, float, char>(dataline);
std::cout << i << " " << s << " " << f << " " << c << std::endl;
}
As you can see, I convert string into the type I want with stringstream... maybe if you have more control on the type you're handling in the tuple, you have to implement an another template variadic function and then specialize it (based on Jarod42's code) :
#include <iostream>
#include <string>
#include <tuple>
#include <string_view>
template <typename T> T convert_to(const std::string_view& s) { return T(); } // default constructor
template <> std::string convert_to(const std::string_view& s) { return std::string(s); }
template <> float convert_to(const std::string_view& s) { return std::stof(std::string(s)); }
template <> int convert_to(const std::string_view& s) { return std::stoi(std::string(s)); }
template <typename... Ts, size_t... Is>
std::tuple<Ts...> extract_impl(std::index_sequence<Is...>,
std::string_view splited[sizeof...(Ts)]) {
return { convert_to<Ts>(splited[Is])... };
}
template <typename... Ts>
std::tuple<Ts...> extract(std::string_view str, char delimiter = ';') {
std::string_view splited[sizeof...(Ts)];
for (size_t i = 0, idx = 0; i < sizeof...(Ts); ++i) {
auto next = str.find(delimiter, idx);
splited[i] = str.substr(idx, next - idx);
idx = next + 1;
}
return extract_impl<Ts...>(std::index_sequence_for<Ts...>(), splited);
}
int main() {
auto [a, b, c] = extract<int, std::string, float>("-42;hello;3.1415");
std::cout << a << ' ' << b << ' ' << c;
}
I would like to split a string into parts:
input = "part1/part2/part3/also3"
and fill the structure that consist of three std::string with these parts.
struct strings
{
std::string a; // <- part1
std::string b; // <- part2
std::string c; // <- part3/also3
};
However my parser seems to merge the parts together and store it into the first std::string.
Here is the code on coliru
#include <iostream>
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapted.hpp>
namespace qi = ::boost::spirit::qi;
struct strings
{
std::string a;
std::string b;
std::string c;
};
BOOST_FUSION_ADAPT_STRUCT(strings,
(std::string, a) (std::string, b) (std::string, c))
template <typename It>
struct split_string_grammar: qi::grammar<It, strings ()>
{
split_string_grammar (int parts)
: split_string_grammar::base_type (split_string)
{
assert (parts > 0);
using namespace qi;
split_string = repeat (parts-1) [part > '/'] > last_part;
part = +(~char_ ("/"));
last_part = +char_;
BOOST_SPIRIT_DEBUG_NODES ((split_string) (part) (last_part))
}
private:
qi::rule<It, strings ()> split_string;
qi::rule<It, std::string ()> part, last_part;
};
int main ()
{
std::string const input { "one/two/three/four" };
auto const last = input.end ();
auto first = input.begin ();
// split into 3 parts.
split_string_grammar<decltype (first)> split_string (3);
strings ss;
bool ok = qi::parse (first, last, split_string, ss);
std::cout << "Parsed: " << ok << "\n";
if (ok) {
std::cout << "a:" << ss.a << "\n";
std::cout << "b:" << ss.b << "\n";
std::cout << "c:" << ss.c << "\n";
}
}
The output is:
Parsed: 1
a:onetwo
b:three/four
c:
while I expected:
Parsed: 1
a:one
b:two
c:three/four
I'd like not to modify the grammar heavily and leave "repeat" statement in it, because the "real" grammar is much more complex of course and I will need to have it there. Just need to find the way to disable the concatenations. I tried
repeat (parts-1) [as_string[part] > '/']
but that does not compile.
The trouble here is specifically that qi::repeat is documented to expose a container of element-types.
Now, because the exposed attribute type of the rule (strings) is not a container-type, Spirit "knows" how to flatten the values.
Of course it's not what you wanted in this case, but usually this heuristic makes for really convenient accumulation of string values.
Fix 1: use a container attribute
You could witness the reverse fix by getting rid of the non-container (sequence) target attribute:
Live On Coliru
//#define BOOST_SPIRIT_DEBUG
#include <iostream>
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapted.hpp>
namespace qi = ::boost::spirit::qi;
using strings = std::vector<std::string>;
template <typename It>
struct split_string_grammar: qi::grammar<It, strings ()>
{
split_string_grammar (int parts)
: split_string_grammar::base_type (split_string)
{
assert (parts > 0);
using namespace qi;
split_string = repeat (parts-1) [part > '/']
> last_part
;
part = +(~char_ ("/"))
;
last_part = +char_
;
BOOST_SPIRIT_DEBUG_NODES ((split_string) (part) (last_part))
}
private:
qi::rule<It, strings ()> split_string;
qi::rule<It, std::string ()> part, last_part;
};
int main ()
{
std::string const input { "one/two/three/four" };
auto const last = input.end ();
auto first = input.begin ();
// split into 3 parts.
split_string_grammar<decltype (first)> split_string (3);
strings ss;
bool ok = qi::parse (first, last, split_string, ss);
std::cout << "Parsed: " << ok << "\n";
if (ok) {
for(auto i = 0ul; i<ss.size(); ++i)
std::cout << static_cast<char>('a'+i) << ":" << ss[i] << "\n";
}
}
What you really wanted:
Of course you want to keep the struct/sequence adaptation (?); In this case that's really tricky because as soon as you use any kind of Kleene operator (*,%) or qi::repeat you'll have the attribute transformation rules as outlined above, ruining your mood.
Luckily, I just remembered I have a hacky solution based on the auto_ parser. Note the caveat in this older answer though:
Read empty values with boost::spirit
CAVEAT Specializing for std::string directly like this might not be the best idea (it might not always be appropriate and might interact badly with other parsers).
By default create_parser<std::string> is not defined, so you might decide this usage is good enough for your case:
Live On Coliru
#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/include/qi.hpp>
namespace qi = boost::spirit::qi;
struct strings {
std::string a;
std::string b;
std::string c;
};
namespace boost { namespace spirit { namespace traits {
template <> struct create_parser<std::string> {
typedef proto::result_of::deep_copy<
BOOST_TYPEOF(
qi::lexeme [+(qi::char_ - '/')] | qi::attr("(unspecified)")
)
>::type type;
static type call() {
return proto::deep_copy(
qi::lexeme [+(qi::char_ - '/')] | qi::attr("(unspecified)")
);
}
};
}}}
BOOST_FUSION_ADAPT_STRUCT(strings, (std::string, a)(std::string, b)(std::string, c))
template <typename Iterator>
struct google_parser : qi::grammar<Iterator, strings()> {
google_parser() : google_parser::base_type(entry, "contacts") {
using namespace qi;
entry =
skip('/') [auto_]
;
}
private:
qi::rule<Iterator, strings()> entry;
};
int main() {
using It = std::string::const_iterator;
google_parser<It> p;
std::string const input = "part1/part2/part3/also3";
It f = input.begin(), l = input.end();
strings ss;
bool ok = qi::parse(f, l, p >> *qi::char_, ss, ss.c);
if (ok)
{
std::cout << "a:" << ss.a << "\n";
std::cout << "b:" << ss.b << "\n";
std::cout << "c:" << ss.c << "\n";
}
else
std::cout << "Parse failed\n";
if (f!=l)
std::cout << "Remaining unparsed: '" << std::string(f,l) << "'\n";
}
Prints
a:part1
b:part2
c:part3/also3
Update/Bonus
In reponse to the OP's own answer I wanted to challenge myself to write it more generically indeed.
The main thing is to to write set_field_ in such a way that it doesn't know/assume more than required about the destination sequence type.
With a bit of Boost Fusion magic that became:
struct set_field_
{
template <typename Seq, typename Value>
void operator() (Seq& seq, Value const& src, unsigned idx) const {
fus::fold(seq, 0u, Visit<Value> { idx, src });
}
private:
template <typename Value>
struct Visit {
unsigned target_idx;
Value const& value;
template <typename B>
unsigned operator()(unsigned i, B& dest) const {
if (target_idx == i) {
boost::spirit::traits::assign_to(value, dest);
}
return i + 1;
}
};
};
It has the added flexibility of applying Spirit's attribute compatibility rules¹. So, you can use the same grammar with both the following types:
struct strings {
std::string a, b, c;
};
struct alternative {
std::vector<char> first;
std::string second;
std::string third;
};
To drive the point home, I made the adaptation of the second struct reverse the field order:
BOOST_FUSION_ADAPT_STRUCT(strings, a, b, c)
BOOST_FUSION_ADAPT_STRUCT(alternative, third, second, first) // REVERSE ORDER :)
Without further ado, the demo program:
Live On Coliru
#define BOOST_SPIRIT_USE_PHOENIX_V3
#define BOOST_RESULT_OF_USE_DECLTYPE
#include <boost/fusion/adapted.hpp>
#include <boost/fusion/algorithm/iteration.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
namespace qi = boost::spirit::qi;
namespace fus = boost::fusion;
struct strings {
std::string a, b, c;
};
struct alternative {
std::vector<char> first;
std::string second;
std::string third;
};
BOOST_FUSION_ADAPT_STRUCT(strings, a, b, c)
BOOST_FUSION_ADAPT_STRUCT(alternative, third, second, first) // REVERSE ORDER :)
// output helpers for demo:
namespace {
inline std::ostream& operator<<(std::ostream& os, strings const& data) {
return os
<< "a:\"" << data.a << "\" "
<< "b:\"" << data.b << "\" "
<< "c:\"" << data.c << "\" ";
}
inline std::ostream& operator<<(std::ostream& os, alternative const& data) {
os << "first: vector<char> { \""; os.write(&data.first[0], data.first.size()); os << "\" } ";
os << "second: \"" << data.second << "\" ";
os << "third: \"" << data.third << "\" ";
return os;
}
}
struct set_field_
{
template <typename Seq, typename Value>
void operator() (Seq& seq, Value const& src, unsigned idx) const {
fus::fold(seq, 0u, Visit<Value> { idx, src });
}
private:
template <typename Value>
struct Visit {
unsigned target_idx;
Value const& value;
template <typename B>
unsigned operator()(unsigned i, B& dest) const {
if (target_idx == i) {
boost::spirit::traits::assign_to(value, dest);
}
return i + 1;
}
};
};
boost::phoenix::function<set_field_> const set_field = {};
template <typename It, typename Target>
struct split_string_grammar: qi::grammar<It, Target(), qi::locals<unsigned> >
{
split_string_grammar (int parts)
: split_string_grammar::base_type (split_string)
{
assert (parts > 0);
using namespace qi;
using boost::phoenix::val;
_a_type _current; // custom placeholder
split_string =
eps [ _current = 0u ]
> repeat (parts-1)
[part [ set_field(_val, _1, _current++) ] > '/']
> last_part [ set_field(_val, _1, _current++) ];
part = +(~char_ ("/"));
last_part = +char_;
BOOST_SPIRIT_DEBUG_NODES ((split_string) (part) (last_part))
}
private:
qi::rule<It, Target(), qi::locals<unsigned> > split_string;
qi::rule<It, std::string()> part, last_part;
};
template <size_t N = 3, typename Target>
void run_test(Target target) {
using It = std::string::const_iterator;
std::string const input { "one/two/three/four" };
It first = input.begin(), last = input.end();
split_string_grammar<It, Target> split_string(N);
bool ok = qi::parse (first, last, split_string, target);
if (ok) {
std::cout << target << '\n';
} else {
std::cout << "Parse failed\n";
}
if (first != last)
std::cout << "Remaining input left unparsed: '" << std::string(first, last) << "'\n";
}
int main ()
{
run_test(strings {});
run_test(alternative {});
}
Output:
a:"one" b:"two" c:"three/four"
first: vector<char> { "three/four" } second: "two" third: "one"
¹ as with BOOST_SPIRIT_ACTIONS_ALLOW_ATTR_COMPAT
Besides sehe's suggestions one more possible way is to use semantic actions (coliru):
struct set_field_
{
void operator() (strings& dst, std::string const& src, unsigned& idx) const
{
assert (idx < 3);
switch (idx++) {
case 0: dst.a = src; break;
case 1: dst.b = src; break;
case 2: dst.c = src; break;
}
}
};
boost::phoenix::function<set_field_> const set_field { set_field_ {} };
template <typename It>
struct split_string_grammar: qi::grammar<It, strings (), qi::locals<unsigned> >
{
split_string_grammar (int parts)
: split_string_grammar::base_type (split_string)
{
assert (parts > 0);
using namespace qi;
using boost::phoenix::val;
split_string = eps [ _a = val (0) ]
> repeat (parts-1) [part [ set_field (_val, _1, _a) ] > '/']
> last_part [ set_field (_val, _1, _a) ];
part = +(~char_ ("/"));
last_part = +char_;
BOOST_SPIRIT_DEBUG_NODES ((split_string) (part) (last_part))
}
private:
qi::rule<It, strings (), qi::locals<unsigned> > split_string;
qi::rule<It, std::string ()> part, last_part;
};