C++ Boost Spirit, parsing data and storing the maximum - c++

I'm trying the code sehe gave here : Boolean expression (grammar) parser in c++
I would like to create a string variable max, that would store the maximum variable encountered at each parsing (on the lexicographic order, for example).
I tried things like :
var_ = qi::lexeme[ +alpha ] [_val = _1, if_(phx::ref(m) < _1) [phx::ref(m) = _1]];, but there is a (really long) compilation error
var_ = qi::lexeme[ +alpha [_val = _1, if_(phx::ref(m) < _1) [phx::ref(m) = _1]]]; but with this one I only get the first caracter of a variable, which is restrincting.
I also tried to simplify things using integers instead of string for variables, but var_ = int_ [...] didn't work either, because int_ is already a parser (I think).
Do you have any ideas ?
Thanks in advance

I'd say that
start = *word [ if_(_1>_val) [_val=_1] ];
should be fine. However, due to a bug (?) Phoenix statements in a single-statement semantic action do not compile. You can easily work around it using a no-op statement, like e.g. _pass=true in this context:
start = *word [ if_(_1>_val) [_val=_1], _pass = true ];
Now, for this I assumed a
rule<It, std::string()> word = +alpha;
If you insist you can cram it all into one rule though:
start = *as_string[lexeme[+alpha]] [ if_(_1>_val) [_val=_1], _pass = true ];
I don't recommend that.
Demo
Live On Colir
#define BOOST_SPIRIT_USE_PHOENIX_V3
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;
template <typename It, typename Skipper>
struct max_parser : qi::grammar<It, std::string(), Skipper> {
max_parser() : max_parser::base_type(start) {
using namespace qi;
using phx::if_;
#if 1
word = lexeme [ +alpha ];
start = *word [ if_(_1>_val) [_val=_1], _pass = true ];
#else
start = *as_string[lexeme[+alpha]] [ if_(_1>_val) [_val=_1], _pass = true ];
#endif
}
private:
qi::rule<It, std::string(), Skipper> start, word;
};
int main() {
std::string const input("beauty shall be in ze eye of the beholder");
using It = std::string::const_iterator;
max_parser<It, qi::space_type> parser;
std::string data;
It it = input.begin(), end = input.end();
bool ok = qi::phrase_parse(it, end, parser, qi::space, data);
if (ok) {
std::cout << "Parse success: " << data << "\n";
} else {
std::cout << "Parse failed\n";
}
if (it != end)
std::cout << "Remaining unparsed: '" << std::string(it,end) << "'\n";
}
Prints:
Parse success: ze

Re: comment:
Thanks for your answers. I wanted to do both usual parsing and keeping the maximum encountered string, and it worked with : var_ = *as_string[qi::lexeme[ +digit ]] [if_(phx::ref(m) < _1) [phx::ref(m) = _1], _val = _1];
For even more fun, and in the interest of complete overkill, I've come up with something that I think is close to useful:
Live On Coliru
int main() {
do_test<int>(" 1 99 -1312 4 1014", -9999);
do_test<double>(" 1 NaN -4 7e3 7e4 -31e9");
do_test<std::string>("beauty shall be in ze eye of the beholder", "", qi::as_string[qi::lexeme[+qi::graph]]);
}
The sample prints:
Parse success: 5 elements with maximum of 1014
values: 1 99 -1312 4 1014
Parse success: 6 elements with maximum of 70000
values: 1 nan -4 7000 70000 -3.1e+10
Parse success: 9 elements with maximum of ze
values: beauty shall be in ze eye of the beholder
As you can see, with string we need to help the Spirit a bit because it doesn't know how you would like to "define" a single "word". The test driver is completely generic:
template <typename T, typename ElementParser = typename boost::spirit::traits::create_parser<T>::type>
void do_test(std::string const& input,
T const& start_value = std::numeric_limits<T>::lowest(),
ElementParser const& element_parser = boost::spirit::traits::create_parser<T>::call())
{
using It = std::string::const_iterator;
vector_and_max<T> data;
It it = input.begin(), end = input.end();
bool ok = qi::phrase_parse(it, end, max_parser<It, T>(start_value, element_parser), qi::space, data);
if (ok) {
std::cout << "Parse success: " << data.first.size() << " elements with maximum of " << data.second << "\n";
std::copy(data.first.begin(), data.first.end(), std::ostream_iterator<T>(std::cout << "\t values: ", " "));
std::cout << "\n";
} else {
std::cout << "Parse failed\n";
}
if (it != end)
std::cout << "Remaining unparsed: '" << std::string(it,end) << "'\n";
}
The start-element and element-parser are passed to the constructor of our grammar:
template <typename T>
using vector_and_max = std::pair<std::vector<T>, T>;
template <typename It, typename T, typename Skipper = qi::space_type>
struct max_parser : qi::grammar<It, vector_and_max<T>(), Skipper> {
template <typename ElementParser>
max_parser(T const& start_value, ElementParser const& element_parser) : max_parser::base_type(start) {
using namespace qi;
using phx::if_;
_a_type running_max;
vector_with_max %=
eps [ running_max = start_value ]
>> *boost::proto::deep_copy(element_parser)
[ if_(_1>running_max) [running_max=_1], _pass = true ]
>> attr(running_max)
;
start = vector_with_max;
}
private:
qi::rule<It, vector_and_max<T>(), Skipper> start;
qi::rule<It, vector_and_max<T>(), Skipper, qi::locals<T> > vector_with_max;
};
Full Listing
For reference
Live On Coliru
#define BOOST_SPIRIT_USE_PHOENIX_V3
#include <boost/fusion/adapted/std_pair.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;
template <typename T>
using vector_and_max = std::pair<std::vector<T>, T>;
template <typename It, typename T, typename Skipper = qi::space_type>
struct max_parser : qi::grammar<It, vector_and_max<T>(), Skipper> {
template <typename ElementParser>
max_parser(T const& start_value, ElementParser const& element_parser) : max_parser::base_type(start) {
using namespace qi;
using phx::if_;
_a_type running_max;
vector_with_max %=
eps [ running_max = start_value ]
>> *boost::proto::deep_copy(element_parser)
[ if_(_1>running_max) [running_max=_1], _pass = true ]
>> attr(running_max)
;
start = vector_with_max;
}
private:
qi::rule<It, vector_and_max<T>(), Skipper> start;
qi::rule<It, vector_and_max<T>(), Skipper, qi::locals<T> > vector_with_max;
};
template <typename T, typename ElementParser = typename boost::spirit::traits::create_parser<T>::type>
void do_test(std::string const& input,
T const& start_value = std::numeric_limits<T>::lowest(),
ElementParser const& element_parser = boost::spirit::traits::create_parser<T>::call())
{
using It = std::string::const_iterator;
vector_and_max<T> data;
It it = input.begin(), end = input.end();
bool ok = qi::phrase_parse(it, end, max_parser<It, T>(start_value, element_parser), qi::space, data);
if (ok) {
std::cout << "Parse success: " << data.first.size() << " elements with maximum of " << data.second << "\n";
std::copy(data.first.begin(), data.first.end(), std::ostream_iterator<T>(std::cout << "\t values: ", " "));
std::cout << "\n";
} else {
std::cout << "Parse failed\n";
}
if (it != end)
std::cout << "Remaining unparsed: '" << std::string(it,end) << "'\n";
}
int main() {
do_test<int>(" 1 99 -1312 4 1014");
do_test<double>(" 1 NaN -4 7e3 7e4 -31e9");
do_test<std::string>("beauty shall be in ze eye of the beholder", "", qi::as_string[qi::lexeme[+qi::graph]]);
}

Just for fun, here's how to do roughly¹ the same as in my other answer, and more, but without using boost spirit at all:
Live On Coliru
#include <algorithm>
#include <sstream>
#include <iterator>
#include <iostream>
int main() {
std::istringstream iss("beauty shall be in ze eye of the beholder");
std::string top2[2];
auto end = std::partial_sort_copy(
std::istream_iterator<std::string>(iss), {},
std::begin(top2), std::end(top2),
std::greater<std::string>());
for (auto it=top2; it!=end; ++it)
std::cout << "(Next) highest word: '" << *it << "'\n";
}
Output:
(Next) highest word: 'ze'
(Next) highest word: 'the'
¹ we're not nearly as specific about isalpha and isspace character types here

Related

Boost.Spirit: porting string pairs from Qi to X3

I have the following working Qi code:
struct query_grammar
: public boost::spirit::qi::grammar<Iterator, string_map<std::string>()>
{
query_grammar() : query_grammar::base_type(query)
{
query = pair >> *(boost::spirit::qi::lit('&') >> pair);
pair = +qchar >> -(boost::spirit::qi::lit('=') >> +qchar);
qchar = ~boost::spirit::qi::char_("&=");
}
boost::spirit::qi::rule<Iterator, std::map<std::string,std::string>()> query;
boost::spirit::qi::rule<Iterator, std::map<std::string,std::string>::value_type()> pair;
boost::spirit::qi::rule<Iterator, char()> qchar;
};
I tried porting it to x3:
namespace x3 = boost::spirit::x3;
const x3::rule<class query_char_, char> query_char_ = "query_char";
const x3::rule<class string_pair_, std::map<std::string,std::string>::value_type> string_pair_ = "string_pair";
const x3::rule<class string_map_, std::map<std::string,std::string>> string_map_ = "string_map";
const auto query_char__def = ~boost::spirit::x3::char_("&=");
const auto string_pair__def = +query_char_ >> -(boost::spirit::x3::lit('=') >> +query_char_);
const auto string_map__def = string_pair_ >> *(boost::spirit::x3::lit('&') >> string_pair_);
BOOST_SPIRIT_DEFINE(string_map_)
BOOST_SPIRIT_DEFINE(string_pair_)
BOOST_SPIRIT_DEFINE(query_char_)
but I am getting the following error when trying to parse a string with string_map_ :
/usr/include/boost/spirit/home/x3/support/traits/move_to.hpp:209: erreur : no matching function for call to move_to(const char*&, const char*&, std::pair<std::__cxx11::basic_string<char>, std::__cxx11::basic_string<char> >&, boost::mpl::identity<boost::spirit::x3::traits::plain_attribute>::type)
detail::move_to(first, last, dest, typename attribute_category<Dest>::type());
~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
I saw this answer: Parsing pair of strings fails. Bad spirit x3 grammar and tried to make my string_pair raw but to no avail.
Edit:
this example code from the spirit examples does not compile either so I guess the problem is a bit deeper:
#include <boost/spirit/home/x3.hpp>
namespace x3 = boost::spirit::x3;
int main()
{
std::string input( "cosmic pizza " );
auto iter = input.begin();
auto end_iter = input.end();
std::pair<std::string, std::string> result;
x3::parse( iter, end_iter, *(~x3::char_(' ')) >> ' ' >> *x3::char_, result);
}
Qi Fixes
First off, I had to fix the rule declaration with the Qi variant before it could work:
qi::rule<Iterator, std::pair<std::string,std::string>()> pair;
For the simple reason that value_type has pair<key_type const, mapped_type> which is never assignable.
Here's a Qi SSCCE:
Live On Coliru
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/adapted/std_pair.hpp>
#include <map>
namespace qi = boost::spirit::qi;
template <typename T> using string_map = std::map<T, T>;
template <typename Iterator>
struct query_grammar : public qi::grammar<Iterator, string_map<std::string>()>
{
query_grammar() : query_grammar::base_type(query)
{
qchar = ~qi::char_("&=");
pair = +qchar >> -(qi::lit('=') >> +qchar);
query = pair >> *(qi::lit('&') >> pair);
}
private:
qi::rule<Iterator, std::map<std::string,std::string>()> query;
qi::rule<Iterator, std::pair<std::string,std::string>()> pair;
qi::rule<Iterator, char()> qchar;
};
int main() {
using It = std::string::const_iterator;
for (std::string const input : { "foo=bar&baz=boo" })
{
std::cout << "======= " << input << "\n";
It f = input.begin(), l = input.end();
string_map<std::string> sm;
if (parse(f, l, query_grammar<It>{}, sm)) {
std::cout << "Parsed " << sm.size() << " pairs\n";
} else {
std::cout << "Parse failed\n";
}
if (f != l)
std::cout << "Remaining unparsed: '" << std::string(f,l) << "'\n";
}
}
Prints
======= foo=bar&baz=boo
Parsed 2 pairs
Qi Improvements
The following simpler grammar seems better:
Live On Coliru
template <typename Iterator, typename T = std::string>
struct query_grammar : public qi::grammar<Iterator, string_map<T>()>
{
query_grammar() : query_grammar::base_type(query) {
using namespace qi;
pair = +~char_("&=") >> '=' >> *~char_("&");
query = pair % '&';
}
private:
qi::rule<Iterator, std::pair<T,T>()> pair;
qi::rule<Iterator, std::map<T,T>()> query;
};
It accepts empty values (e.g. &q=&x=) and values containing additional =: &q=7==8&rt=bool. It could be significantly more efficient (untested).
X3 version
Without looking at your code, I translated it directly into an X3 version:
Live On Coliru
#include <boost/spirit/home/x3.hpp>
#include <boost/fusion/adapted/std_pair.hpp>
#include <iostream>
#include <map>
namespace x3 = boost::spirit::x3;
template <typename T> using string_map = std::map<T, T>;
namespace grammar {
using namespace x3;
auto pair = +~char_("&=") >> '=' >> *~char_("&");
auto query = pair % '&';
}
int main() {
using It = std::string::const_iterator;
for (std::string const input : { "foo=bar&baz=boo" })
{
std::cout << "======= " << input << "\n";
It f = input.begin(), l = input.end();
string_map<std::string> sm;
if (parse(f, l, grammar::query, sm)) {
std::cout << "Parsed " << sm.size() << " pairs\n";
} else {
std::cout << "Parse failed\n";
}
if (f != l)
std::cout << "Remaining unparsed: '" << std::string(f,l) << "'\n";
}
}
Which, obviously ( --- ) prints
======= foo=bar&baz=boo
Parsed 2 pairs
X3 Improvements
You should probably want to coerce the attribute types for the rules because automatic attribute propagation can have surprising heuristics.
namespace grammar {
template <typename T = std::string> auto& query() {
using namespace x3;
static const auto s_pair
= rule<struct pair_, std::pair<T, T> > {"pair"}
= +~char_("&=") >> -('=' >> *~char_("&"));
static const auto s_query
= rule<struct query_, std::map<T, T> > {"query"}
= s_pair % '&';
return s_query;
};
}
See it Live On Coliru
What Went wrong?
The X3 version suffered the same problem with const key type in std::map<>::value_type

parse std::vector<int> from comma separated integers

I'm trying to implement a very specific grammar, which requires me at a certain point to parse a list of comma separated integers. The qi rule looks like the following:
qi::rule<Iterator, ascii::space_type> ident;
qi::rule<Iterator, ascii::space_type> nlist;
...
ident = char_ >> nlist;
nlist = ("(" >> int_ % "," >> ")");
...
I need to pass the values up to the ident rule (The expression ident has to create a syntax tree node, where the parsed values from nlist are required for the constructor). I thought about creating and filling a std::vector and use the semantic action like _val = vector<int>.... What is now unclear to me is how do I create a vector of arbitrary length from this rule, since I do not make any assumptions on how long the input will be or using a predefined vector like the examples.
Is this even possible or does is there a better way to do it?
This is the bread and butter of Spirit Qi.
Just use any compatible attribute type and profit:
using nlist_t = std::vector<int>;
using ident_t = std::pair<char, nlist_t>;
qi::rule<Iterator, ident_t(), qi::ascii::space_type> ident;
qi::rule<Iterator, nlist_t(), qi::ascii::space_type> nlist;
Note: For std::pair attribute compatibility, include the relevant fusion header:
Live On Coliru
#include <boost/fusion/adapted/std_pair.hpp>
#include <boost/spirit/include/qi.hpp>
namespace qi = boost::spirit::qi;
int main()
{
using nlist_t = std::vector<int>;
using ident_t = std::pair<char, nlist_t>;
using Iterator = std::string::const_iterator;
qi::rule<Iterator, ident_t(), qi::ascii::space_type> ident;
qi::rule<Iterator, nlist_t(), qi::ascii::space_type> nlist;
ident = qi::char_ >> nlist;
nlist = '(' >> qi::int_ % ',' >> ')';
for (std::string const input : { "a (1,2,3)", "+(881,-2,42) \n", "?(0)" }) {
ident_t data;
if (qi::phrase_parse(input.begin(), input.end(), ident, qi::ascii::space, data)) {
std::cout << "Parsed: " << data.first << "(";
for (auto i : data.second) std::cout << i << ",";
std::cout << ")\n";
} else
std::cout << "Parse failed: '" << input << "'\n";
}
}
Prints
Parsed: a(1,2,3,)
Parsed: +(881,-2,42,)
Parsed: ?(0,)
BONUS
Version with imagined Ast type using phoenix::construct:
Also Live On Coliru
#include <boost/fusion/adapted/std_pair.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/phoenix.hpp>
namespace qi = boost::spirit::qi;
namespace px = boost::phoenix;
namespace OoShinyAst {
using MyName = char;
using MyArgument = int;
using MyArgumentList = std::vector<MyArgument>;
struct MyIdent {
MyName name;
MyArgumentList args;
MyIdent() = default;
MyIdent(MyName name, MyArgumentList args)
: name(std::move(name)), args(std::move(args)) { }
};
}
int main()
{
using Iterator = std::string::const_iterator;
qi::rule<Iterator, OoShinyAst::MyIdent(), qi::ascii::space_type> ident;
qi::rule<Iterator, OoShinyAst::MyArgumentList(), qi::ascii::space_type> nlist;
nlist = '(' >> qi::int_ % ',' >> ')';
ident = (qi::char_ >> nlist) [ qi::_val = px::construct<OoShinyAst::MyIdent>(qi::_1, qi::_2) ];
for (std::string const input : { "a (1,2,3)", "+(881,-2,42) \n", "?(0)" }) {
OoShinyAst::MyIdent data;
if (qi::phrase_parse(input.begin(), input.end(), ident, qi::ascii::space, data)) {
std::cout << "Parsed: " << data.name << "(";
for (auto i : data.args) std::cout << i << ",";
std::cout << ")\n";
} else
std::cout << "Parse failed: '" << input << "'\n";
}
}

Treat escaped newline as line continuation

Here is an example of the syntax -- two groups of items:
I_name m_name parameter1=value parameter2=value
I_name m_name parameter1=value \
parameter2=value
My question is how to define the skip-type.
It is not just space_type but space_type minus newline.
But newline followed by backslash is a skip-type.
E.g.
I define name like that:
qi::rule<Iterator, std::string(), ascii::space_type> m_sName;
m_sName %= qi::lexeme[ascii::alpha >> *ascii::alnum];
This is obviously not correct, as the space_type must include newline-backslash.
The following grammar works for me.
*("\\\n" | ~qi::char_('\n')) % '\n'
It will ignore any newline after the backslash. And the following is a simple test.
#include <vector>
#include <string>
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#define BOOST_TEST_MODULE example
#include <boost/test/unit_test.hpp>
typedef std::vector<std::string> Lines;
inline auto ParseLines(std::string const& str) {
Lines lines;
namespace qi = boost::spirit::qi;
if (qi::parse(
str.begin(), str.end(),
*("\\\n" | ~qi::char_('\n')) % '\n',
lines)) {
return lines;
}
else {
throw std::invalid_argument("Parse error at ParseLines");
}
}
BOOST_AUTO_TEST_CASE(TestParseLines) {
std::string const str =
"I_name m_name parameter1=value parameter2=value\n"
"I_name m_name parameter1 = value \\\n"
"parameter2 = value";
Lines const expected{
"I_name m_name parameter1=value parameter2=value",
"I_name m_name parameter1 = value parameter2 = value"
};
BOOST_TEST(ParseLines(str) == expected);
}
You should use "-std=c++14 -lboost_unit_test_framework" for compilation. Anyway, it is easy to convert the code for c++03.
qi::blank is exactly that. It's qi::space without newlines.
You can do this too: ("\\\n" | qi::blank)
To be able to declare a rule with such a skipper, define a skipper grammar:
template <typename It>
struct my_skipper : qi::grammar<It> {
my_skipper() : my_skipper::base_type(start) {}
qi::rule<It> start = ("\\\n" | qi::blank);
};
Full Demo
Live On Coliru
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapted.hpp>
#include <map>
namespace qi = boost::spirit::qi;
namespace ast {
struct record {
std::string iname, mname;
std::map<std::string, std::string> params;
};
using records = std::vector<record>;
}
BOOST_FUSION_ADAPT_STRUCT(ast::record, iname, mname, params)
template <typename It>
struct my_parser : qi::grammar<It, ast::records()> {
using Skipper = qi::rule<It>;
my_parser() : my_parser::base_type(start) {
skipper = ("\\\n" | qi::blank);
name = +qi::graph;
key = +(qi::graph - '=');
param = key >> '=' >> name;
record = name >> name >> *param;
records = *(record >> +qi::eol);
start = qi::skip(qi::copy(skipper)) [ records ];
}
private:
Skipper skipper;
qi::rule<It, ast::records(), Skipper> records;
qi::rule<It, ast::record(), Skipper> record;
qi::rule<It, ast::records()> start;
qi::rule<It, std::pair<std::string, std::string>()> param;
qi::rule<It, std::string()> name, key;
};
int main() {
#if 1
using It = boost::spirit::istream_iterator;
It f(std::cin >> std::noskipws), l;
#else
using It = std::string::const_iterator;
std::string const input = "something here a=1\n";
It f = input.begin(), l = input.end();
#endif
ast::records data;
bool ok = qi::parse(f, l, my_parser<It>(), data);
if (ok) {
std::cout << "Parsed:\n";
for (auto& r : data) {
std::cout << "\t" << r.iname << " " << r.mname;
for (auto& p : r.params)
std::cout << " [" << p.first << ": " << p.second << "]";
std::cout << "\n";
}
} else {
std::cout << "Parse failed\n";
}
if (f!=l)
std::cout << "Remaining input: '" << std::string(f,l) << "'\n";
}
Prints (for the input in your question):
Parsed:
I_name m_name [parameter1: value] [parameter2: value]
I_name m_name [parameter1: value] [parameter2: value]

Iterative update of abstract syntax tree with boost spirit

I have a working boost spirit parser and was thinking if it is possible to do iterative update of an abstract syntax tree with boost spirit?
I have a struct similar to:
struct ast;
typedef boost::variant< boost::recursive_wrapper<ast> > node;
struct ast
{
std::vector<int> value;
std::vector<node> children;
};
Which is being parsed by use of:
bool r = phrase_parse(begin, end, grammar, space, ast);
Would it be possible to do iterative update of abstract syntax tree with boost spirit? I have not found any documentation on this, but I was thinking if the parsers semantic actions could push_back on an already existing AST. Has anyone tried this?
This would allow for parsing like this:
bool r = phrase_parse(begin, end, grammar, space, ast); //initial parsing
//the second parse will be called at a later state given some event/timer/io/something
bool r = phrase_parse(begin, end, grammar, space, ast); //additional parsing which will update the already existing AST
How would you know which nodes to merge? Or would you always add ("graft") at the root level? In that case, why don't you just parse another and merge moving the elements into the existing ast?
ast& operator+=(ast&& other) {
std::move(other.value.begin(), other.value.end(), back_inserter(value));
std::move(other.children.begin(), other.children.end(), back_inserter(children));
return *this;
}
Demo Time
Let's devise the simplest grammar I can think of for this AST:
start = '{' >> -(int_ % ',') >> ';' >> -(start % ',') >> '}';
Note I didn't even make the ; optional. Oh well. Samples. Exercises for readers. ☡ You know the drill.
We implement the trivial function ast parse(It f, It l), and then we can simply merge the asts:
int main() {
ast merged;
for(std::string const& input : {
"{1 ,2 ,3 ;{4 ;{9 , 8 ;}},{5 ,6 ;}}",
"{10,20,30;{40;{90, 80;}},{50,60;}}",
})
{
merged += parse(input.begin(), input.end());
std::cout << "merged + " << input << " --> " << merged << "\n";
}
}
Live On Coliru
//#define BOOST_SPIRIT_DEBUG
#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/karma.hpp>
namespace qi = boost::spirit::qi;
namespace karma = boost::spirit::karma;
struct ast;
//typedef boost::make_recursive_variant<boost::recursive_wrapper<ast> >::type node;
typedef boost::variant<boost::recursive_wrapper<ast> > node;
struct ast {
std::vector<int> value;
std::vector<node> children;
ast& operator+=(ast&& other) {
std::move(other.value.begin(), other.value.end(), back_inserter(value));
std::move(other.children.begin(), other.children.end(), back_inserter(children));
return *this;
}
};
BOOST_FUSION_ADAPT_STRUCT(ast,
(std::vector<int>,value)
(std::vector<node>,children)
)
template <typename It, typename Skipper = qi::space_type>
struct grammar : qi::grammar<It, ast(), Skipper>
{
grammar() : grammar::base_type(start) {
using namespace qi;
start = '{' >> -(int_ % ',') >> ';' >> -(start % ',') >> '}';
BOOST_SPIRIT_DEBUG_NODES((start));
}
private:
qi::rule<It, ast(), Skipper> start;
};
// for output:
static inline std::ostream& operator<<(std::ostream& os, ast const& v) {
using namespace karma;
rule<boost::spirit::ostream_iterator, ast()> r;
r = '{' << -(int_ % ',') << ';' << -((r|eps) % ',') << '}';
return os << format(r, v);
}
template <typename It> ast parse(It f, It l)
{
ast parsed;
static grammar<It> g;
bool ok = qi::phrase_parse(f,l,g,qi::space,parsed);
if (!ok || (f!=l)) {
std::cout << "Parse failure\n";
std::cout << "Remaining unparsed: '" << std::string(f,l) << "'\n";
exit(255);
}
return parsed;
}
int main() {
ast merged;
for(std::string const& input : {
"{1 ,2 ,3 ;{4 ;{9 , 8 ;}},{5 ,6 ;}}",
"{10,20,30;{40;{90, 80;}},{50,60;}}",
})
{
merged += parse(input.begin(), input.end());
std::cout << "merged + " << input << " --> " << merged << "\n";
}
}
Of course, it prints:
merged + {1 ,2 ,3 ;{4 ;{9 , 8 ;}},{5 ,6 ;}} --> {1,2,3;{4;{9,8;}},{5,6;}}
merged + {10,20,30;{40;{90, 80;}},{50,60;}} --> {1,2,3,10,20,30;{4;{9,8;}},{5,6;},{40;{90,80;}},{50,60;}}
UPDATE
In this - trivial - example, you can just bind the collections to the attributes in the parse call. The same thing will happen without the operator+= call needed to move the elements, because the rules are written to automatically append to the bound container attribute.
CAVEAT: A distinct disadvantage of modifying the target value in-place is what happens if parsing fails. In the version the merged value will then be "undefined" (has received partial information from the failed parse).
So if you want to parse inputs "atomically", the first, more explicit approach is a better fit.
So the following is a slightly shorter way to write the same:
Live On Coliru
// #define BOOST_SPIRIT_DEBUG
#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/karma.hpp>
namespace qi = boost::spirit::qi;
namespace karma = boost::spirit::karma;
struct ast;
//typedef boost::make_recursive_variant<boost::recursive_wrapper<ast> >::type node;
typedef boost::variant<boost::recursive_wrapper<ast> > node;
struct ast {
std::vector<int> value;
std::vector<node> children;
};
BOOST_FUSION_ADAPT_STRUCT(ast,
(std::vector<int>,value)
(std::vector<node>,children)
)
template <typename It, typename Skipper = qi::space_type>
struct grammar : qi::grammar<It, ast(), Skipper>
{
grammar() : grammar::base_type(start) {
using namespace qi;
start = '{' >> -(int_ % ',') >> ';' >> -(start % ',') >> '}';
BOOST_SPIRIT_DEBUG_NODES((start));
}
private:
qi::rule<It, ast(), Skipper> start;
};
// for output:
static inline std::ostream& operator<<(std::ostream& os, ast const& v) {
using namespace karma;
rule<boost::spirit::ostream_iterator, ast()> r;
r = '{' << -(int_ % ',') << ';' << -((r|eps) % ',') << '}';
return os << format(r, v);
}
template <typename It> void parse(It f, It l, ast& into)
{
static grammar<It> g;
bool ok = qi::phrase_parse(f,l,g,qi::space,into);
if (!ok || (f!=l)) {
std::cout << "Parse failure\n";
std::cout << "Remaining unparsed: '" << std::string(f,l) << "'\n";
exit(255);
}
}
int main() {
ast merged;
for(std::string const& input : {
"{1 ,2 ,3 ;{4 ;{9 , 8 ;}},{5 ,6 ;}}",
"{10,20,30;{40;{90, 80;}},{50,60;}}",
})
{
parse(input.begin(), input.end(), merged);
std::cout << "merged + " << input << " --> " << merged << "\n";
}
}
Still prints

Getting number of time a rule is matched as unsigned int

So I have this, rather ugly rule in my boost::spirit parser;
qi::rule<Iterator, bool()> empty_brackets;
...
empty_brackets = (qi::token(LEFT_BRACKET) >> qi::token(RIGHT_BRACKET))
[ qi::_val = true ]
;
The rule is used as;
array_typeexp = (primitive_typeexp >> +(empty_brackets))
So what I really want, for the second argument (qi::_2), isn't a std::vector<bool>, but rather an unsigned int, describing the number of times 'empty_brackets' was matched.
How would I go about achieving this?
I'm really unsure what you could need this for, but, hey :/
I'd suggest
empty_brackets = lit('(') >> lit(')');
n_empty_brackets = eps [ _val = 0 ] >> +empty_brackets [ ++_val ];
array_typeexp = primitive_typeexp >> n_empty_brackets;
For simplicity I have made primitive_typeexp a "no-op", and therefore the outermost rules return just the unsigned value in this simple setup:
qi::rule<It, unsigned(), Skipper> array_typeexp, n_empty_brackets;
qi::rule<It, Skipper> empty_brackets, primitive_typeexp;
See a complete sample Live on Coliru
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
namespace qi = boost::spirit::qi;
template <typename It, typename Skipper = qi::space_type>
struct parser : qi::grammar<It, unsigned(), Skipper>
{
parser() : parser::base_type(array_typeexp)
{
using namespace qi;
empty_brackets = lit('(') >> lit(')');
n_empty_brackets = eps [ _val = 0 ] >> +empty_brackets [ ++_val ];
array_typeexp = primitive_typeexp >> n_empty_brackets;
// TODO
primitive_typeexp = eps;
BOOST_SPIRIT_DEBUG_NODES((array_typeexp)(n_empty_brackets)(empty_brackets)(primitive_typeexp));
}
private:
qi::rule<It, unsigned(), Skipper> array_typeexp, n_empty_brackets;
qi::rule<It, Skipper> empty_brackets, primitive_typeexp;
};
bool doParse(const std::string& input)
{
typedef std::string::const_iterator It;
auto f(begin(input)), l(end(input));
parser<It, qi::space_type> p;
unsigned data;
try
{
bool ok = qi::phrase_parse(f,l,p,qi::space,data);
if (ok)
{
std::cout << "parse success\n";
std::cout << "data: " << data << "\n";
}
else std::cerr << "parse failed: '" << std::string(f,l) << "'\n";
if (f!=l) std::cerr << "trailing unparsed: '" << std::string(f,l) << "'\n";
return ok;
} catch(const qi::expectation_failure<It>& e)
{
std::string frag(e.first, e.last);
std::cerr << e.what() << "'" << frag << "'\n";
}
return false;
}
int main()
{
doParse("() () ( ) ()");
doParse("()");
}