I'm pretty new to boost::spirit. I would like to parse a string of comma separated objects into an std::vector (similarly as in the tutorials). The string could be of different types (known at compile time): integers, like "1,2,3", strings "Apple, Orange, Banana", etc. etc.
I would like to have a unified interface for all types.
If I parse a single element I can use the auto_ expression.
Is it possible to have a similar interface with vectors?
Can I define a rule that, given a template parameter, can actually parse this vector?
Here is a simple sample code (which does not compile due to the last call to phrase_parse):
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_stl.hpp>
#include <iostream>
#include <vector>
#include <boost/spirit/include/qi_auto.hpp>
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace phoenix = boost::phoenix;
using qi::auto_;
using qi::phrase_parse;
using ascii::space;
using phoenix::push_back;
int main()
{
std::string line1 = "3";
std::string line2 = "1, 2, 3";
int v;
std::vector<int> vector;
typedef std::string::iterator stringIterator;
stringIterator first = line1.begin();
stringIterator last = line1.end();
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
bool r1 = qi::phrase_parse( first,
last,
qi::auto_,
ascii::space,
v );
first = line2.begin();
last = line2.end();
//The following call is wrong!
bool r2 = qi::phrase_parse( first,
last,
// Begin grammar
(
qi::auto_[push_back(phoenix::ref(vector), qi::_1)]
>> *(',' >> qi::auto_[push_back(phoenix::ref(vector),qi::_1)])
),
// End grammar
ascii::space,
vector);
return 0;
}
UPDATE
I found a solution, in the case the size of the vector is known before parsing. On the other hand I cannot use the syntax *( ',' >> qi::auto_ ).
#include <boost/spirit/include/qi.hpp>
namespace qi = boost::spirit::qi;
int main()
{
std::string s = "1, 2, 3";
std::vector<int> vector;
//This works
qi::phrase_parse(s.begin(), s.end(), qi::auto_ >> ',' >> qi::auto_ >> ',' >> qi::auto_ , qi::blank, vector);
//This does not compile
qi::phrase_parse(s.begin(), s.end(), qi::auto_ >> *( ',' >> qi::auto_ ) , qi::blank, vector);
for(int i = 0; i < vector.size() ; i++)
std::cout << i << ": " << vector[i] << std::endl;
return 0;
}
Moreover using auto_, I cannot parse a a string. Is it possible to define e template function, where the grammar can be deduced by the template parameter?
template< typename T >
void MyParse(std::string& line, std::vector<T> vec)
{
qi::phrase_parse( line.begin(),
line.end(),
/*
How do I define a grammar based on T
such as:
double_ >> *( ',' >> double_ ) for T = double
+qi::alnum >> *( ',' >> +qi::alnum ) for T = std::string
*/,
qi::blank,
vec);
}
auto_ has support for container attributes out of the box:
Live On Coliru
std::istringstream iss("1 2 3 4 5; 6 7 8 9;");
iss.unsetf(std::ios::skipws);
std::vector<int> i;
std::vector<double> d;
if (iss >> qi::phrase_match(qi::auto_ >> ";" >> qi::auto_, qi::space, i, d))
{
for (auto e:i) std::cout << "int: " << e << "\n";
for (auto e:d) std::cout << "double: " << e << "\n";
}
Prints
int: 1
int: 2
int: 3
int: 4
int: 5
double: 6
double: 7
double: 8
double: 9
So you could basically write your template function by using ',' as the skipper. I'd prefer the operator% variant though.
Simple Take
template<typename Container>
void MyParse(std::string const& line, Container& container)
{
auto f(line.begin()), l(line.end());
bool ok = qi::phrase_parse(
f, l,
qi::auto_ % ',', qi::blank, container);
if (!ok || (f!=l))
throw "parser error: '" + std::string(f,l) + "'"; // FIXME
}
Variant 2
template<typename Container>
void MyParse(std::string const& line, Container& container)
{
auto f(line.begin()), l(line.end());
bool ok = qi::phrase_parse(
f, l,
qi::auto_, qi::blank | ',', container);
if (!ok || (f!=l))
throw "parser error: '" + std::string(f,l) + "'"; // FIXME
}
Solving the string case (and others):
If the element type is not 'deducible' by Spirit (anything could be parsed into a string), just take an optional parser/grammar that knows how to parse the element type?
template<typename Container, typename ElementParser = qi::auto_type>
void MyParse(std::string const& line, Container& container, ElementParser const& elementParser = ElementParser())
{
auto f(line.begin()), l(line.end());
bool ok = qi::phrase_parse(
f, l,
elementParser % ",", qi::blank, container);
if (!ok || (f!=l))
throw "parser error: '" + std::string(f,l) + "'"; // FIXME
}
Now, it parses strings just fine:
std::vector<int> i;
std::set<std::string> s;
MyParse("1,22,33,44,15", i);
MyParse("1,22,33,44,15", s, *~qi::char_(","));
for(auto e:i) std::cout << "i: " << e << "\n";
for(auto e:s) std::cout << "s: " << e << "\n";
Prints
i: 1
i: 22
i: 33
i: 44
i: 15
s: 1
s: 15
s: 22
s: 33
s: 44
Full Listing
Live On Coliru
#include <boost/spirit/include/qi.hpp>
#include <iostream>
namespace qi = boost::spirit::qi;
template<typename Container, typename ElementParser = qi::auto_type>
void MyParse(std::string const& line, Container& container, ElementParser const& elementParser = ElementParser())
{
auto f(line.begin()), l(line.end());
bool ok = qi::phrase_parse(
f, l,
elementParser % ",", qi::blank, container);
if (!ok || (f!=l))
throw "parser error: '" + std::string(f,l) + "'"; // FIXME
}
#include <set>
int main()
{
std::vector<int> i;
std::set<std::string> s;
MyParse("1,22,33,44,15", i);
MyParse("1,22,33,44,15", s, *~qi::char_(","));
for(auto e:i) std::cout << "i: " << e << "\n";
for(auto e:s) std::cout << "s: " << e << "\n";
}
Related
The string content is like:
20 10 5 3...
it is a list of pair of int. How to use spirit parse it to std::vector<std::pair<int, int>>?
std::string line;
std::vector<std::pair<int, int>> v;
boost::spirit::qi::phrase_parse(
line.cbegin(),
line.cend(),
(
???
),
boost::spirit::qi::space
);
You could do a simple parser expression like *(int_ >> int_) (see the tutorial and these documentation pages).
Live On Coliru
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/std_pair.hpp>
namespace qi = boost::spirit::qi;
int main() {
std::string line = "20 10 5 3";
std::vector<std::pair<int, int>> v;
qi::phrase_parse(line.cbegin(), line.cend(), *(qi::int_ >> qi::int_), qi::space, v);
for (auto& p : v) {
std::cout << "(" << p.first << ", " << p.second << ")\n";
}
}
Prints
(20, 10)
(5, 3)
Pro Tip 1: Validity
If you want to make sure there's no unwanted/unexpected input, check for remaining data:
check the iterators after parsing
auto f = line.cbegin(), l = line.cend();
qi::phrase_parse(f, l, *(qi::int_ >> qi::int_), qi::space, v);
if (f!=l)
std::cout << "Unparsed input '" << std::string(f,l) << "'\n";
or simple require qi::eoi as part of the parser expression and check the return value:
bool ok = qi::phrase_parse(line.cbegin(), line.cend(), *(qi::int_ >> qi::int_) >> qi::eoi, qi::space, v);
Pro Tip 2: "Look ma, no hands"
Since the grammar is trivially the simplest thing that could parse into this datastructure, you can let Spirit do all the guesswork:
Live On Coliru
qi::phrase_parse(line.begin(), line.end(), qi::auto_, qi::space, v);
That's, a grammar consisting of nothing but a single qi::auto_. Output is still:
(20, 10)
(5, 3)
I have the following working Qi code:
struct query_grammar
: public boost::spirit::qi::grammar<Iterator, string_map<std::string>()>
{
query_grammar() : query_grammar::base_type(query)
{
query = pair >> *(boost::spirit::qi::lit('&') >> pair);
pair = +qchar >> -(boost::spirit::qi::lit('=') >> +qchar);
qchar = ~boost::spirit::qi::char_("&=");
}
boost::spirit::qi::rule<Iterator, std::map<std::string,std::string>()> query;
boost::spirit::qi::rule<Iterator, std::map<std::string,std::string>::value_type()> pair;
boost::spirit::qi::rule<Iterator, char()> qchar;
};
I tried porting it to x3:
namespace x3 = boost::spirit::x3;
const x3::rule<class query_char_, char> query_char_ = "query_char";
const x3::rule<class string_pair_, std::map<std::string,std::string>::value_type> string_pair_ = "string_pair";
const x3::rule<class string_map_, std::map<std::string,std::string>> string_map_ = "string_map";
const auto query_char__def = ~boost::spirit::x3::char_("&=");
const auto string_pair__def = +query_char_ >> -(boost::spirit::x3::lit('=') >> +query_char_);
const auto string_map__def = string_pair_ >> *(boost::spirit::x3::lit('&') >> string_pair_);
BOOST_SPIRIT_DEFINE(string_map_)
BOOST_SPIRIT_DEFINE(string_pair_)
BOOST_SPIRIT_DEFINE(query_char_)
but I am getting the following error when trying to parse a string with string_map_ :
/usr/include/boost/spirit/home/x3/support/traits/move_to.hpp:209: erreur : no matching function for call to move_to(const char*&, const char*&, std::pair<std::__cxx11::basic_string<char>, std::__cxx11::basic_string<char> >&, boost::mpl::identity<boost::spirit::x3::traits::plain_attribute>::type)
detail::move_to(first, last, dest, typename attribute_category<Dest>::type());
~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
I saw this answer: Parsing pair of strings fails. Bad spirit x3 grammar and tried to make my string_pair raw but to no avail.
Edit:
this example code from the spirit examples does not compile either so I guess the problem is a bit deeper:
#include <boost/spirit/home/x3.hpp>
namespace x3 = boost::spirit::x3;
int main()
{
std::string input( "cosmic pizza " );
auto iter = input.begin();
auto end_iter = input.end();
std::pair<std::string, std::string> result;
x3::parse( iter, end_iter, *(~x3::char_(' ')) >> ' ' >> *x3::char_, result);
}
Qi Fixes
First off, I had to fix the rule declaration with the Qi variant before it could work:
qi::rule<Iterator, std::pair<std::string,std::string>()> pair;
For the simple reason that value_type has pair<key_type const, mapped_type> which is never assignable.
Here's a Qi SSCCE:
Live On Coliru
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/adapted/std_pair.hpp>
#include <map>
namespace qi = boost::spirit::qi;
template <typename T> using string_map = std::map<T, T>;
template <typename Iterator>
struct query_grammar : public qi::grammar<Iterator, string_map<std::string>()>
{
query_grammar() : query_grammar::base_type(query)
{
qchar = ~qi::char_("&=");
pair = +qchar >> -(qi::lit('=') >> +qchar);
query = pair >> *(qi::lit('&') >> pair);
}
private:
qi::rule<Iterator, std::map<std::string,std::string>()> query;
qi::rule<Iterator, std::pair<std::string,std::string>()> pair;
qi::rule<Iterator, char()> qchar;
};
int main() {
using It = std::string::const_iterator;
for (std::string const input : { "foo=bar&baz=boo" })
{
std::cout << "======= " << input << "\n";
It f = input.begin(), l = input.end();
string_map<std::string> sm;
if (parse(f, l, query_grammar<It>{}, sm)) {
std::cout << "Parsed " << sm.size() << " pairs\n";
} else {
std::cout << "Parse failed\n";
}
if (f != l)
std::cout << "Remaining unparsed: '" << std::string(f,l) << "'\n";
}
}
Prints
======= foo=bar&baz=boo
Parsed 2 pairs
Qi Improvements
The following simpler grammar seems better:
Live On Coliru
template <typename Iterator, typename T = std::string>
struct query_grammar : public qi::grammar<Iterator, string_map<T>()>
{
query_grammar() : query_grammar::base_type(query) {
using namespace qi;
pair = +~char_("&=") >> '=' >> *~char_("&");
query = pair % '&';
}
private:
qi::rule<Iterator, std::pair<T,T>()> pair;
qi::rule<Iterator, std::map<T,T>()> query;
};
It accepts empty values (e.g. &q=&x=) and values containing additional =: &q=7==8&rt=bool. It could be significantly more efficient (untested).
X3 version
Without looking at your code, I translated it directly into an X3 version:
Live On Coliru
#include <boost/spirit/home/x3.hpp>
#include <boost/fusion/adapted/std_pair.hpp>
#include <iostream>
#include <map>
namespace x3 = boost::spirit::x3;
template <typename T> using string_map = std::map<T, T>;
namespace grammar {
using namespace x3;
auto pair = +~char_("&=") >> '=' >> *~char_("&");
auto query = pair % '&';
}
int main() {
using It = std::string::const_iterator;
for (std::string const input : { "foo=bar&baz=boo" })
{
std::cout << "======= " << input << "\n";
It f = input.begin(), l = input.end();
string_map<std::string> sm;
if (parse(f, l, grammar::query, sm)) {
std::cout << "Parsed " << sm.size() << " pairs\n";
} else {
std::cout << "Parse failed\n";
}
if (f != l)
std::cout << "Remaining unparsed: '" << std::string(f,l) << "'\n";
}
}
Which, obviously ( --- ) prints
======= foo=bar&baz=boo
Parsed 2 pairs
X3 Improvements
You should probably want to coerce the attribute types for the rules because automatic attribute propagation can have surprising heuristics.
namespace grammar {
template <typename T = std::string> auto& query() {
using namespace x3;
static const auto s_pair
= rule<struct pair_, std::pair<T, T> > {"pair"}
= +~char_("&=") >> -('=' >> *~char_("&"));
static const auto s_query
= rule<struct query_, std::map<T, T> > {"query"}
= s_pair % '&';
return s_query;
};
}
See it Live On Coliru
What Went wrong?
The X3 version suffered the same problem with const key type in std::map<>::value_type
I am new to using qi, and have run into a difficulty. I wish to parse an input like:
X + Y + Z , A + B
Into two vectors of strings.
I have code does this, but only if the grammar parses single characters. Ideally, the following line should be readable:
Xi + Ye + Zou , Ao + Bi
Using a simple replacement such as elem = +(char_ - '+') % '+' fails to parse, because it will consume the ',' on the first elem, but I've not discovered a simple way around this.
Here is my single-character code, for reference:
#include <bits/stdc++.h>
#define BOOST_SPIRIT_DEBUG
#include <boost/fusion/adapted.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;
typedef std::vector<std::string> element_array;
struct reaction_t
{
element_array reactants;
element_array products;
};
BOOST_FUSION_ADAPT_STRUCT(reaction_t, (element_array, reactants)(element_array, products))
template<typename Iterator>
struct reaction_parser : qi::grammar<Iterator,reaction_t(),qi::blank_type>
{
reaction_parser() : reaction_parser::base_type(reaction)
{
using namespace qi;
elem = char_ % '+';
reaction = elem >> ',' >> elem;
BOOST_SPIRIT_DEBUG_NODES((reaction)(elem));
}
qi::rule<Iterator, reaction_t(), qi::blank_type> reaction;
qi::rule<Iterator, element_array(), qi::blank_type> elem;
};
int main()
{
const std::string input = "X + Y + Z, A + B";
auto f = begin(input), l = end(input);
reaction_parser<std::string::const_iterator> p;
reaction_t data;
bool ok = qi::phrase_parse(f, l, p, qi::blank, data);
if (ok) std::cout << "success\n";
else std::cout << "failed\n";
if (f!=l)
std::cout << "Remaining unparsed: '" << std::string(f,l) << "'\n";
}
Using a simple replacement such as elem = +(char_ - '+') % '+' fails to parse, because it will consume the ',' on the first elem, but I've not discovered a simple way around this.
Well, the complete (braindead) simple solution would be to use +(char_ - '+' - ',') or +~char_("+,").
Really, though, I'd make the rule for element more specific, e.g.:
elem = qi::lexeme [ +alpha ] % '+';
See Boost spirit skipper issues about lexeme and skippers
Live On Coliru
#include <boost/fusion/adapted.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;
typedef std::vector<std::string> element_array;
struct reaction_t
{
element_array reactants;
element_array products;
};
BOOST_FUSION_ADAPT_STRUCT(reaction_t, (element_array, reactants)(element_array, products))
template<typename Iterator>
struct reaction_parser : qi::grammar<Iterator,reaction_t(),qi::blank_type>
{
reaction_parser() : reaction_parser::base_type(reaction) {
using namespace qi;
elem = qi::lexeme [ +alpha ] % '+';
reaction = elem >> ',' >> elem;
BOOST_SPIRIT_DEBUG_NODES((reaction)(elem));
}
qi::rule<Iterator, reaction_t(), qi::blank_type> reaction;
qi::rule<Iterator, element_array(), qi::blank_type> elem;
};
int main()
{
reaction_parser<std::string::const_iterator> p;
for (std::string const input : {
"X + Y + Z, A + B",
"Xi + Ye + Zou , Ao + Bi",
})
{
std::cout << "----- " << input << "\n";
auto f = begin(input), l = end(input);
reaction_t data;
bool ok = qi::phrase_parse(f, l, p, qi::blank, data);
if (ok) {
std::cout << "success\n";
for (auto r : data.reactants) { std::cout << "reactant: " << r << "\n"; }
for (auto p : data.products) { std::cout << "product: " << p << "\n"; }
}
else
std::cout << "failed\n";
if (f != l)
std::cout << "Remaining unparsed: '" << std::string(f, l) << "'\n";
}
}
Printing:
----- X + Y + Z, A + B
success
reactant: X
reactant: Y
reactant: Z
product: A
product: B
----- Xi + Ye + Zou , Ao + Bi
success
reactant: Xi
reactant: Ye
reactant: Zou
product: Ao
product: Bi
Can you help me understand the difference between the a % b parser and its expanded a >> *(b >> a) form in Boost.Spirit? Even though the reference manual states that they are equivalent,
The list operator, a % b, is a binary operator that matches a list of one or more repetitions of a separated by occurrences of b. This is equivalent to a >> *(b >> a).
the following program produces different results depending on which is used:
#include <iostream>
#include <string>
#include <vector>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/spirit/include/qi.hpp>
struct Record {
int id;
std::vector<int> values;
};
BOOST_FUSION_ADAPT_STRUCT(Record,
(int, id)
(std::vector<int>, values)
)
int main() {
namespace qi = boost::spirit::qi;
const auto str = std::string{"1: 2, 3, 4"};
const auto rule1 = qi::int_ >> ':' >> (qi::int_ % ',') >> qi::eoi;
const auto rule2 = qi::int_ >> ':' >> (qi::int_ >> *(',' >> qi::int_)) >> qi::eoi;
Record record1;
if (qi::phrase_parse(str.begin(), str.end(), rule1, qi::space, record1)) {
std::cout << record1.id << ": ";
for (const auto& value : record1.values) { std::cout << value << ", "; }
std::cout << '\n';
} else {
std::cerr << "syntax error\n";
}
Record record2;
if (qi::phrase_parse(str.begin(), str.end(), rule2, qi::space, record2)) {
std::cout << record2.id << ": ";
for (const auto& value : record2.values) { std::cout << value << ", "; }
std::cout << '\n';
} else {
std::cerr << "syntax error\n";
}
}
Live on Coliru
1: 2, 3, 4,
1: 2,
rule1 and rule2 are different only in that rule1 uses the list operator ((qi::int_ % ',')) and rule2 uses its expanded form ((qi::int_ >> *(',' >> qi::int_))). However, rule1 produced 1: 2, 3, 4, (as expected) and rule2 produced 1: 2,. I cannot understand the result of rule2: 1) why is it different from that of rule1 and 2) why were 3 and 4 not included in record2.values even though phrase_parse returned true somehow?
Update X3 version added
First off, you fallen into a deep trap here:
Qi rules don't work with auto. Use qi::copy or just used qi::rule<>. Your program has undefined behaviour and indeed it crashed for me (valgrind pointed out where the dangling references originated).
So, first off:
const auto rule = qi::copy(qi::int_ >> ':' >> (qi::int_ % ',') >> qi::eoi);
Now, when you delete the redundancy in the program, you get:
Reproducing the problem
Live On Coliru
int main() {
test(qi::copy(qi::int_ >> ':' >> (qi::int_ % ',')));
test(qi::copy(qi::int_ >> ':' >> (qi::int_ >> *(',' >> qi::int_))));
}
Printing
1: 2, 3, 4,
1: 2,
The cause and the fix
What happened to 3, 4 which was successfully parsed?
Well, the attribute propagation rules indicate that qi::int_ >> *(',' >> qi::int_) exposes a tuple<int, vector<int> >. In a bid to magically DoTheRightThing(TM) Spirit accidentally misfires and "assigngs" the int into the attribute reference, ignoring the remaining vector<int>.
If you want to make container attributes parse as "an atomic group", use qi::as<>:
test(qi::copy(qi::int_ >> ':' >> qi::as<Record::values_t>() [ qi::int_ >> *(',' >> qi::int_)]));
Here as<> acts as a barrier for the attribute compatibility heuristics and the grammar knows what you meant:
Live On Coliru
#include <iostream>
#include <string>
#include <vector>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/spirit/include/qi.hpp>
struct Record {
int id;
using values_t = std::vector<int>;
values_t values;
};
BOOST_FUSION_ADAPT_STRUCT(Record, id, values)
namespace qi = boost::spirit::qi;
template <typename T>
void test(T const& rule) {
const std::string str = "1: 2, 3, 4";
Record record;
if (qi::phrase_parse(str.begin(), str.end(), rule >> qi::eoi, qi::space, record)) {
std::cout << record.id << ": ";
for (const auto& value : record.values) { std::cout << value << ", "; }
std::cout << '\n';
} else {
std::cerr << "syntax error\n";
}
}
int main() {
test(qi::copy(qi::int_ >> ':' >> (qi::int_ % ',')));
test(qi::copy(qi::int_ >> ':' >> (qi::int_ >> *(',' >> qi::int_))));
test(qi::copy(qi::int_ >> ':' >> qi::as<Record::values_t>() [ qi::int_ >> *(',' >> qi::int_)]));
}
Prints
1: 2, 3, 4,
1: 2,
1: 2, 3, 4,
Because it's time to get people started with X3 (the new version of Spirit), and because I like to challenge msyelf to do the corresponding tasks in Spirit X3, here is the Spirit X3 version.
There's no problem with auto in X3.
The "broken" case also behaves much better, triggering this static assertion:
// If you got an error here, then you are trying to pass
// a fusion sequence with the wrong number of elements
// as that expected by the (sequence) parser.
static_assert(
fusion::result_of::size<Attribute>::value == (l_size + r_size)
, "Attribute does not have the expected size."
);
That's nice, right?
The workaround seems a bit less readable:
test(int_ >> ':' >> (rule<struct _, Record::values_t>{} = (int_ >> *(',' >> int_))));
But it would be trivial to write your own as<> "directive" (or just a function), if you wanted:
namespace {
template <typename T>
struct as_type {
template <typename Expr>
auto operator[](Expr&& expr) const {
return x3::rule<struct _, T>{"as"} = x3::as_parser(std::forward<Expr>(expr));
}
};
template <typename T> static const as_type<T> as = {};
}
DEMO
Live On Coliru
#include <iostream>
#include <string>
#include <vector>
#include <boost/fusion/adapted/std_tuple.hpp>
#include <boost/spirit/home/x3.hpp>
struct Record {
int id;
using values_t = std::vector<int>;
values_t values;
};
namespace x3 = boost::spirit::x3;
template <typename T>
void test(T const& rule) {
const std::string str = "1: 2, 3, 4";
Record record;
auto attr = std::tie(record.id, record.values);
if (x3::phrase_parse(str.begin(), str.end(), rule >> x3::eoi, x3::space, attr)) {
std::cout << record.id << ": ";
for (const auto& value : record.values) { std::cout << value << ", "; }
std::cout << '\n';
} else {
std::cerr << "syntax error\n";
}
}
namespace {
template <typename T>
struct as_type {
template <typename Expr>
auto operator[](Expr&& expr) const {
return x3::rule<struct _, T>{"as"} = x3::as_parser(std::forward<Expr>(expr));
}
};
template <typename T> static const as_type<T> as = {};
}
int main() {
using namespace x3;
test(int_ >> ':' >> (int_ % ','));
//test(int_ >> ':' >> (int_ >> *(',' >> int_))); // COMPILER asserts "Attribute does not have the expected size."
// "clumsy" x3 style workaround
test(int_ >> ':' >> (rule<struct _, Record::values_t>{} = (int_ >> *(',' >> int_))));
// using an ad-hoc `as<>` implementation:
test(int_ >> ':' >> as<Record::values_t>[int_ >> *(',' >> int_)]);
}
Prints
1: 2, 3, 4,
1: 2, 3, 4,
1: 2, 3, 4,
I have a working boost spirit parser and was thinking if it is possible to do iterative update of an abstract syntax tree with boost spirit?
I have a struct similar to:
struct ast;
typedef boost::variant< boost::recursive_wrapper<ast> > node;
struct ast
{
std::vector<int> value;
std::vector<node> children;
};
Which is being parsed by use of:
bool r = phrase_parse(begin, end, grammar, space, ast);
Would it be possible to do iterative update of abstract syntax tree with boost spirit? I have not found any documentation on this, but I was thinking if the parsers semantic actions could push_back on an already existing AST. Has anyone tried this?
This would allow for parsing like this:
bool r = phrase_parse(begin, end, grammar, space, ast); //initial parsing
//the second parse will be called at a later state given some event/timer/io/something
bool r = phrase_parse(begin, end, grammar, space, ast); //additional parsing which will update the already existing AST
How would you know which nodes to merge? Or would you always add ("graft") at the root level? In that case, why don't you just parse another and merge moving the elements into the existing ast?
ast& operator+=(ast&& other) {
std::move(other.value.begin(), other.value.end(), back_inserter(value));
std::move(other.children.begin(), other.children.end(), back_inserter(children));
return *this;
}
Demo Time
Let's devise the simplest grammar I can think of for this AST:
start = '{' >> -(int_ % ',') >> ';' >> -(start % ',') >> '}';
Note I didn't even make the ; optional. Oh well. Samples. Exercises for readers. ☡ You know the drill.
We implement the trivial function ast parse(It f, It l), and then we can simply merge the asts:
int main() {
ast merged;
for(std::string const& input : {
"{1 ,2 ,3 ;{4 ;{9 , 8 ;}},{5 ,6 ;}}",
"{10,20,30;{40;{90, 80;}},{50,60;}}",
})
{
merged += parse(input.begin(), input.end());
std::cout << "merged + " << input << " --> " << merged << "\n";
}
}
Live On Coliru
//#define BOOST_SPIRIT_DEBUG
#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/karma.hpp>
namespace qi = boost::spirit::qi;
namespace karma = boost::spirit::karma;
struct ast;
//typedef boost::make_recursive_variant<boost::recursive_wrapper<ast> >::type node;
typedef boost::variant<boost::recursive_wrapper<ast> > node;
struct ast {
std::vector<int> value;
std::vector<node> children;
ast& operator+=(ast&& other) {
std::move(other.value.begin(), other.value.end(), back_inserter(value));
std::move(other.children.begin(), other.children.end(), back_inserter(children));
return *this;
}
};
BOOST_FUSION_ADAPT_STRUCT(ast,
(std::vector<int>,value)
(std::vector<node>,children)
)
template <typename It, typename Skipper = qi::space_type>
struct grammar : qi::grammar<It, ast(), Skipper>
{
grammar() : grammar::base_type(start) {
using namespace qi;
start = '{' >> -(int_ % ',') >> ';' >> -(start % ',') >> '}';
BOOST_SPIRIT_DEBUG_NODES((start));
}
private:
qi::rule<It, ast(), Skipper> start;
};
// for output:
static inline std::ostream& operator<<(std::ostream& os, ast const& v) {
using namespace karma;
rule<boost::spirit::ostream_iterator, ast()> r;
r = '{' << -(int_ % ',') << ';' << -((r|eps) % ',') << '}';
return os << format(r, v);
}
template <typename It> ast parse(It f, It l)
{
ast parsed;
static grammar<It> g;
bool ok = qi::phrase_parse(f,l,g,qi::space,parsed);
if (!ok || (f!=l)) {
std::cout << "Parse failure\n";
std::cout << "Remaining unparsed: '" << std::string(f,l) << "'\n";
exit(255);
}
return parsed;
}
int main() {
ast merged;
for(std::string const& input : {
"{1 ,2 ,3 ;{4 ;{9 , 8 ;}},{5 ,6 ;}}",
"{10,20,30;{40;{90, 80;}},{50,60;}}",
})
{
merged += parse(input.begin(), input.end());
std::cout << "merged + " << input << " --> " << merged << "\n";
}
}
Of course, it prints:
merged + {1 ,2 ,3 ;{4 ;{9 , 8 ;}},{5 ,6 ;}} --> {1,2,3;{4;{9,8;}},{5,6;}}
merged + {10,20,30;{40;{90, 80;}},{50,60;}} --> {1,2,3,10,20,30;{4;{9,8;}},{5,6;},{40;{90,80;}},{50,60;}}
UPDATE
In this - trivial - example, you can just bind the collections to the attributes in the parse call. The same thing will happen without the operator+= call needed to move the elements, because the rules are written to automatically append to the bound container attribute.
CAVEAT: A distinct disadvantage of modifying the target value in-place is what happens if parsing fails. In the version the merged value will then be "undefined" (has received partial information from the failed parse).
So if you want to parse inputs "atomically", the first, more explicit approach is a better fit.
So the following is a slightly shorter way to write the same:
Live On Coliru
// #define BOOST_SPIRIT_DEBUG
#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/karma.hpp>
namespace qi = boost::spirit::qi;
namespace karma = boost::spirit::karma;
struct ast;
//typedef boost::make_recursive_variant<boost::recursive_wrapper<ast> >::type node;
typedef boost::variant<boost::recursive_wrapper<ast> > node;
struct ast {
std::vector<int> value;
std::vector<node> children;
};
BOOST_FUSION_ADAPT_STRUCT(ast,
(std::vector<int>,value)
(std::vector<node>,children)
)
template <typename It, typename Skipper = qi::space_type>
struct grammar : qi::grammar<It, ast(), Skipper>
{
grammar() : grammar::base_type(start) {
using namespace qi;
start = '{' >> -(int_ % ',') >> ';' >> -(start % ',') >> '}';
BOOST_SPIRIT_DEBUG_NODES((start));
}
private:
qi::rule<It, ast(), Skipper> start;
};
// for output:
static inline std::ostream& operator<<(std::ostream& os, ast const& v) {
using namespace karma;
rule<boost::spirit::ostream_iterator, ast()> r;
r = '{' << -(int_ % ',') << ';' << -((r|eps) % ',') << '}';
return os << format(r, v);
}
template <typename It> void parse(It f, It l, ast& into)
{
static grammar<It> g;
bool ok = qi::phrase_parse(f,l,g,qi::space,into);
if (!ok || (f!=l)) {
std::cout << "Parse failure\n";
std::cout << "Remaining unparsed: '" << std::string(f,l) << "'\n";
exit(255);
}
}
int main() {
ast merged;
for(std::string const& input : {
"{1 ,2 ,3 ;{4 ;{9 , 8 ;}},{5 ,6 ;}}",
"{10,20,30;{40;{90, 80;}},{50,60;}}",
})
{
parse(input.begin(), input.end(), merged);
std::cout << "merged + " << input << " --> " << merged << "\n";
}
}
Still prints