unhandled exception using Boost Spirit to parse grammar

unhandled exception using Boost Spirit to parse grammar - c++

I am trying to use Boost Spirit to parse the following grammar:
sentence:
noun verb
sentence conjunction sentence
conjunction:
"and"
noun:
"birds"
"cats"
verb:
"fly"
"meow"
parsing succeeds when the grammar only includes noun >> verb rule.
when grammar is modified to include sentence>>conjunction>>sentence rule and i supply an invalid input such as "birds fly" instead of "birdsfly" i get an unhandled exception when the program runs.
here is the code which is modified from examples found on boost doc
#define BOOST_VARIANT_MINIMIZE_SIZE
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_statement.hpp>
#include <boost/spirit/include/phoenix_container.hpp>
#include <iostream>
#include <string>
using namespace boost::spirit;
using namespace boost::spirit::ascii;
template <typename Lexer>
struct token_list : lex::lexer<Lexer>
{
token_list()
{
noun = "birds|cats";
verb = "fly|meow";
conjunction = "and";
this->self.add
(noun)
(verb)
(conjunction)
;
}
lex::token_def<std::string> noun, verb, conjunction;
};
template <typename Iterator>
struct Grammar : qi::grammar<Iterator>
{
template <typename TokenDef>
Grammar(TokenDef const& tok)
: Grammar::base_type(sentence)
{
sentence = (tok.noun>>tok.verb)
|
(sentence>>tok.conjunction>>sentence)>>eoi
;
}
qi::rule<Iterator> sentence;
};
int main()
{
typedef lex::lexertl::token<char const*, boost::mpl::vector<std::string>> token_type;
typedef lex::lexertl::lexer<token_type> lexer_type;
typedef token_list<lexer_type>::iterator_type iterator_type;
token_list<lexer_type> word_count;
Grammar<iterator_type> g (word_count);
std::string str = "birdsfly";
//std::string str = "birds fly"; this input caused unhandled exception
char const* first = str.c_str();
char const* last = &first[str.size()];
bool r = lex::tokenize_and_parse(first, last, word_count, g);
if (r) {
std::cout << "Parsing passed"<< "\n";
}
else {
std::string rest(first, last);
std::cerr << "Parsing failed\n" << "stopped at: \""
<< rest << "\"\n";
}
system("PAUSE");
return 0;
}

You have left-recursion in the second branch of the sentence rule.
sentence = sentence >> ....
will always recurse on sentence, so you're seeing a stackoverflow.
I suggest writing the rule like, e.g:
sentence =
(tok.noun >> tok.verb)
>> *(tok.conjunction >> sentence)
>> qi::eoi
;
Now the result reads
g++ -Wall -pedantic -std=c++0x -g -O0 test.cpp -o test
Parsing failed
stopped at: " fly"
(and the inevitable "sh: PAUSE: command not found" of course...)
PS. Don't using namespace please. Instead:
namespace qi = boost::spirit::qi;
namespace lex = boost::spirit::lex;
Here's a cleaned up version with some other stuff removed/fixed: http://coliru.stacked-crooked.com/view?id=1fb26ca3e8c207979eaaf4592c319316-e223fd4a885a77b520bbfe69dda8fb91
#define BOOST_VARIANT_MINIMIZE_SIZE
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
// #include <boost/spirit/include/phoenix.hpp>
#include <iostream>
#include <string>
namespace qi = boost::spirit::qi;
namespace lex = boost::spirit::lex;
template <typename Lexer>
struct token_list : lex::lexer<Lexer>
{
token_list()
{
noun = "birds|cats";
verb = "fly|meow";
conjunction = "and";
this->self.add
(noun)
(verb)
(conjunction)
;
}
lex::token_def<std::string> noun, verb, conjunction;
};
template <typename Iterator>
struct Grammar : qi::grammar<Iterator>
{
template <typename TokenDef>
Grammar(TokenDef const& tok) : Grammar::base_type(sentence)
{
sentence =
(tok.noun >> tok.verb)
>> *(tok.conjunction >> sentence)
>> qi::eoi
;
}
qi::rule<Iterator> sentence;
};
int main()
{
typedef std::string::const_iterator It;
typedef lex::lexertl::token<It, boost::mpl::vector<std::string>> token_type;
typedef lex::lexertl::lexer<token_type> lexer_type;
typedef token_list<lexer_type>::iterator_type iterator_type;
token_list<lexer_type> word_count;
Grammar<iterator_type> g(word_count);
//std::string str = "birdsfly";
const std::string str = "birds fly";
It first = str.begin();
It last = str.end();
bool r = lex::tokenize_and_parse(first, last, word_count, g);
if (r) {
std::cout << "Parsing passed"<< "\n";
}
else {
std::string rest(first, last);
std::cerr << "Parsing failed\n" << "stopped at: \"" << rest << "\"\n";
}
}

Related

Spirit Qi First Parser

What did I mess up here? I'm getting 'start': undeclared identifier but I stuck pretty closely to the tutorial, so I'm not sure where I made a typo, or what I did wrong. Any hints? You all see the same thing, right?
#include <iostream>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_object.hpp>
#include <iostream>
#include <string>
#include <boost/array.hpp>
#include <boost/fusion/include/io.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/spirit/include/qi_no_skip.hpp>
#include <boost/spirit/include/phoenix.hpp>
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
using qi::lit;
using qi::int_;
using qi::double_;
using ascii::char_;
using boost::spirit::qi::phrase_parse;
using boost::spirit::qi::no_skip;
using qi::eoi;
struct LETTER
{
char hi;
// int fourtytwo;
// char mom;
};
BOOST_FUSION_ADAPT_STRUCT(
LETTER,
(char, hi)
// (int, fourtytwo)
// (char, mom)
)
template <typename Iterator>
struct LETTERParser : qi::grammar<Iterator, LETTER(), ascii::space_type>
{
LETTERParser(): LETTERParser::base_type(start)
{
start %= lit("LETTER") >> char_;
// >> char_
// >> int_
// >> char_
// >> eoi
// ;
}
};
const std::string wat("Z");
int main()
{
LETTERParser<std::string::const_iterator> f;
LETTER example;
phrase_parse(wat.begin(), wat.end(), f, no_skip, example);
return 0;
}

There are a number of issues, one of which is non obvious
where's no_skip? Why are you passing it to a grammar that requires ascii::space_type?
where is the start rule declared?
don't pollute global namespace - it creates hard problems in generic code
handle errors
the grammar starts with a mandatory character sequence, which doesn't match the input
the non-obvious one: single-element structs interfere in unfortunate ways in Spirit/Fusion land.
Simplify:
Fixing the above and modernizing (c++11) the fusion adaptation:
live On Coliru
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/spirit/include/qi.hpp>
#include <iostream>
namespace qi = boost::spirit::qi;
struct LETTER {
char hi;
int fourtytwo;
char mom;
};
BOOST_FUSION_ADAPT_STRUCT(LETTER, hi, fourtytwo, mom)
template <typename Iterator> struct LETTERParser : qi::grammar<Iterator, LETTER(), qi::ascii::space_type> {
LETTERParser() : LETTERParser::base_type(start) {
using qi::char_;
using qi::int_;
start = "LETTER" >> char_ >> int_ >> char_;
}
private:
qi::rule<Iterator, LETTER(), qi::ascii::space_type> start;
};
int main() {
const std::string input("LETTER Z 42m");
using It = std::string::const_iterator;
LETTERParser<It> parser;
LETTER example;
It f = input.begin(), l = input.end();
if (phrase_parse(f, l, parser, qi::ascii::space, example)) {
std::cout << "parsed: " << boost::fusion::as_vector(example) << "\n";
} else {
std::cout << "couldn't parse '" << input << "'\n";
}
if (f != l)
std::cout << "Remaining unparsed input: '" << std::string(f,l) << "'\n";
}
Prints
parsed: (Z 42 m)
Single Element:
You're in, luck it doesn't bite in your case:
Live On Coliru
Prints
parsed: (Z)
Remaining unparsed input: '42m'
as expected. If it strikes in the future, refer here e.g. Size of struct with a single element
Bonus
Consider encapsulating the choice of skipper. The caller should probably never be able to override it Live On Coliru - see also Boost spirit skipper issues

Learning Boost.Spirit: parsing INI

I started to learn Boost.Spirit and finish reading Qi - Writing Parsers section. When reading, everything is easy and understandable. But when I try to do something, there are a lot of errors, because there are too many includes and namespaces and I need to know when to include/use them. As the practice, I want to write simple INI parser.
Here is the code (includes are from one of examples inside Spirit lib as almost everything else):
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_stl.hpp>
#include <boost/fusion/adapted/std_pair.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/spirit/include/phoenix_object.hpp>
#include <iostream>
#include <string>
#include <vector>
#include <map>
namespace client
{
typedef std::map<std::string, std::string> key_value_map_t;
struct mini_ini
{
std::string name;
key_value_map_t key_values_map;
};
} // client
BOOST_FUSION_ADAPT_STRUCT(
client::mini_ini,
(std::string, name)
(client::key_value_map_t, key_values_map)
)
namespace client
{
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace phoenix = boost::phoenix;
template <typename Iterator>
struct ini_grammar : qi::grammar<Iterator, mini_ini(), ascii::space_type>
{
ini_grammar() : ini_grammar::base_type(section_, "section")
{
using qi::char_;
using qi::on_error;
using qi::fail;
using namespace qi::labels;
using phoenix::construct;
using phoenix::val;
key_ = +char_("a-zA-Z_0-9");
pair_ = key_ >> '=' >> *char_;
section_ = '[' >> key_ >> ']' >> '\n' >> *(pair_ >> '\n');
key_.name("key");
pair_.name("pair");
section_.name("section");
on_error<fail>
(
section_
, std::cout
<< val("Error! Expecting ")
<< _4 // what failed?
<< val(" here: \"")
<< construct<std::string>(_3, _2) // iterators to error-pos, end
<< val("\"")
<< std::endl
);
}
qi::rule<Iterator, std::string(), ascii::space_type> key_;
qi::rule<Iterator, mini_ini(), ascii::space_type> section_;
qi::rule<Iterator, std::pair<std::string, std::string>(), ascii::space_type> pair_;
};
} // client
int
main()
{
std::string storage =
"[section]\n"
"key1=val1\n"
"key2=val2\n";
client::mini_ini ini;
typedef client::ini_grammar<std::string::const_iterator> ini_grammar;
ini_grammar grammar;
using boost::spirit::ascii::space;
std::string::const_iterator iter = storage.begin();
std::string::const_iterator end = storage.end();
bool r = phrase_parse(iter, end, grammar, space, ini);
if (r && iter == end)
{
std::cout << "-------------------------\n";
std::cout << "Parsing succeeded\n";
std::cout << "-------------------------\n";
return 0;
}
else
{
std::cout << "-------------------------\n";
std::cout << "Parsing failed\n";
std::cout << "-------------------------\n";
std::cout << std::string(iter, end) << "\n";
return 1;
}
return 0;
}
As u can see I want to parse next text into mini_ini struct:
"[section]"
"key1=val1"
"key2=val2";
I have the fail and std::string(iter, end) is full input string.
My questions:
Why I see fail but don't see on_error<fail> handler ?
Have you any recommendations how to learn Boost.Spirit (I have good understanding of documentation in theory, but in practice I have a lot of WHY ???) ?
Thanks

Q. Why I see fail but don't see on_error handler
The on_error handler is only fired for the registered rule (section_) and if an expectation point is failed.
Your grammar doesn't contain expectation points (only >> are used, not >).
Q. Have you any recommendations how to learn Boost.Spirit (I have good understanding of documentation in theory, but in practice I have a lot of WHY ???) ?
Just build the parsers you need. Copy good conventions from the docs and SO answers. There are a lot of them. As you have seen, quite a number contain full examples of Ini parsers with varying levels of error reporting too.
Bonus hints:
Do more detailed status reporting:
bool ok = phrase_parse(iter, end, grammar, space, ini);
if (ok) {
std::cout << "Parse success\n";
} else {
std::cout << "Parse failure\n";
}
if (iter != end) {
std::cout << "Remaining unparsed: '" << std::string(iter, end) << "'\n";
}
return ok && (iter==end)? 0 : 1;
Use BOOST_SPIRIT_DEBUG:
#define BOOST_SPIRIT_DEBUG
// and later
BOOST_SPIRIT_DEBUG_NODES((key_)(pair_)(section_))
Prints:
<section_>
<try>[section]\nkey1=val1\n</try>
<key_>
<try>section]\nkey1=val1\nk</try>
<success>]\nkey1=val1\nkey2=val</success>
<attributes>[[s, e, c, t, i, o, n]]</attributes>
</key_>
<fail/>
</section_>
Parse failure
Remaining unparsed: '[section]
key1=val1
key2=val2
'
You'll notice that the section header isn't parsed because the newline is not matched. Your skipper (space_type) skips the newline, hence it will never match: Boost spirit skipper issues
Fix skipper
When using blank_type as the skipper you'll get a successful parse:
<section_>
<try>[section]\nkey1=val1\n</try>
<key_>
<try>section]\nkey1=val1\nk</try>
<success>]\nkey1=val1\nkey2=val</success>
<attributes>[[s, e, c, t, i, o, n]]</attributes>
</key_>
<pair_>
<try>key1=val1\nkey2=val2\n</try>
<key_>
<try>key1=val1\nkey2=val2\n</try>
<success>=val1\nkey2=val2\n</success>
<attributes>[[k, e, y, 1]]</attributes>
</key_>
<success></success>
<attributes>[[[k, e, y, 1], [v, a, l, 1,
, k, e, y, 2, =, v, a, l, 2,
]]]</attributes>
</pair_>
<success>key1=val1\nkey2=val2\n</success>
<attributes>[[[s, e, c, t, i, o, n], []]]</attributes>
</section_>
Parse success
Remaining unparsed: 'key1=val1
key2=val2
NOTE: The parse succeeds but doesn't do what you want. This is because *char_ includes newlines. So make that
pair_ = key_ >> '=' >> *(char_ - qi::eol); // or
pair_ = key_ >> '=' >> *~char_("\r\n"); // etc
Full code
Live On Coliru
#define BOOST_SPIRIT_DEBUG
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_stl.hpp>
#include <boost/fusion/adapted/std_pair.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/spirit/include/phoenix_object.hpp>
#include <iostream>
#include <string>
#include <vector>
#include <map>
namespace client
{
typedef std::map<std::string, std::string> key_value_map_t;
struct mini_ini
{
std::string name;
key_value_map_t key_values_map;
};
} // client
BOOST_FUSION_ADAPT_STRUCT(
client::mini_ini,
(std::string, name)
(client::key_value_map_t, key_values_map)
)
namespace client
{
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace phoenix = boost::phoenix;
template <typename Iterator>
struct ini_grammar : qi::grammar<Iterator, mini_ini(), ascii::blank_type>
{
ini_grammar() : ini_grammar::base_type(section_, "section")
{
using qi::char_;
using qi::on_error;
using qi::fail;
using namespace qi::labels;
using phoenix::construct;
using phoenix::val;
key_ = +char_("a-zA-Z_0-9");
pair_ = key_ >> '=' >> *char_;
section_ = '[' >> key_ >> ']' >> '\n' >> *(pair_ >> '\n');
BOOST_SPIRIT_DEBUG_NODES((key_)(pair_)(section_))
on_error<fail>
(
section_
, std::cout
<< val("Error! Expecting ")
<< _4 // what failed?
<< val(" here: \"")
<< construct<std::string>(_3, _2) // iterators to error-pos, end
<< val("\"")
<< std::endl
);
}
qi::rule<Iterator, std::string(), ascii::blank_type> key_;
qi::rule<Iterator, mini_ini(), ascii::blank_type> section_;
qi::rule<Iterator, std::pair<std::string, std::string>(), ascii::blank_type> pair_;
};
} // client
int
main()
{
std::string storage =
"[section]\n"
"key1=val1\n"
"key2=val2\n";
client::mini_ini ini;
typedef client::ini_grammar<std::string::const_iterator> ini_grammar;
ini_grammar grammar;
using boost::spirit::ascii::blank;
std::string::const_iterator iter = storage.begin();
std::string::const_iterator end = storage.end();
bool ok = phrase_parse(iter, end, grammar, blank, ini);
if (ok) {
std::cout << "Parse success\n";
} else {
std::cout << "Parse failure\n";
}
if (iter != end) {
std::cout << "Remaining unparsed: '" << std::string(iter, end) << "'\n";
}
return ok && (iter==end)? 0 : 1;
}

How can I extract std::string object via boost spirit

I have the following code:
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>
#include <boost/spirit/include/qi.hpp>
#include <iostream>
#include <string>
struct function
{
std::string ret_type;
std::string name;
};
BOOST_FUSION_ADAPT_STRUCT(
::function,
(std::string, ret_type)
(std::string, name)
)
template <typename Iterator>
struct function_parser : boost::spirit::qi::grammar<Iterator, function(), boost::spirit::qi::ascii::space_type>
{
function_parser() : function_parser::base_type(start)
{
using boost::spirit::qi::ascii::char_;
using boost::spirit::qi::int_;
start %= +char_ >> +char_;
}
boost::spirit::qi::rule<Iterator, function(), boost::spirit::qi::ascii::space_type> start;
};
int main()
{
std::string input_data("void foo");
function fn;
auto itr = input_data.begin();
auto end = input_data.end();
function_parser<decltype(itr)> g;
bool res = boost::spirit::qi::phrase_parse(itr, end, g, boost::spirit::ascii::space, fn);
if (res && itr == end)
{
std::cout << boost::fusion::tuple_open('[');
std::cout << boost::fusion::tuple_close(']');
std::cout << boost::fusion::tuple_delimiter(", ");
std::cout << "Parsing succeeded\n";
std::cout << "got: " << boost::fusion::as_vector(fn) << std::endl;
}
else
{
std::cout << "Parsing failed \n";
}
}
Output
Parsing failed
What am I doing wrong? How can I fix it?

+char_
eats all input! Now, the next
+char_
requires at least a single character, which isn't there (the first kleen plus ate it) so the parse fails.
I suggest instead:
using namespace boost::spirit::qi;
start = lexeme[+graph] >> lexeme[+graph];
The documentation should be able to tell you what that does (I hope. No time to elaborate)

Zero-filled results for unused (but matching) rules [duplicate]

I want to parse special constructs and throw the rest away. But I don't want to use a skipper.
I want to get a vector of these constructs, so I use a Kleene Star parser as main rule. But, everytime something gets thrown away, a default constructed element is inserted into the vector.
Here is a made up example. It just looks for the string Test and throws the rest away, at least this is the plan. But every time the rule garbage succeeds it adds a default constructed item to the vector in the rule all, giving an output of 7 insteat of 1. How can I tell Spirit to just add to the vector if the rule item succeeds?
#define BOOST_SPIRIT_USE_PHOENIX_V3
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <iostream>
#include <string>
#include <vector>
namespace qi = boost::spirit::qi;
struct container {
std::string name;
bool dummy;
};
BOOST_FUSION_ADAPT_STRUCT(::container,
(std::string, name)
(bool, dummy))
int main() {
typedef std::string::const_iterator iterator;
qi::rule<iterator, std::vector<container>()> all;
qi::rule<iterator, container()> item;
qi::rule<iterator, std::string()> string_rule;
qi::rule<iterator> garbage;
all = *(garbage | item);
garbage = qi::char_ - qi::lit("Test");
string_rule = qi::string("Test");
item = string_rule >> qi::attr(true);
std::vector<container> ast;
std::string input = "blaTestbla";
iterator first = input.begin();
iterator last = input.end();
bool result = qi::parse(first, last, all, ast);
if (result) {
result = first == last;
}
if (result) {
std::cout << "Parsed " << ast.size() << " element(s)" << std::endl;
} else {
std::cout << "failure" << std::endl;
}
}

Since sehe's answer was more or less for educational purposes, we have now several solutions:
*garbage >> -(item % *garbage) >> *garbage
*garbage >> *(item >> *garbage)
all = *(garbage | item[phx::push_back(qi::_val,qi::_1)]);
And the solution from cv_and_he:
#define BOOST_SPIRIT_USE_PHOENIX_V3
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <iostream>
#include <string>
#include <vector>
namespace qi = boost::spirit::qi;
struct container {
std::string name;
bool dummy;
};
BOOST_FUSION_ADAPT_STRUCT(::container,
(std::string, name)
(bool, dummy))
struct container_vector { //ADDED
std::vector<container> data;
};
namespace boost{ namespace spirit{ namespace traits //ADDED
{
template <>
struct is_container<container_vector> : boost::mpl::true_ {};
template <>
struct container_value<container_vector> {
typedef optional<container> type;
};
template <>
struct push_back_container<container_vector,optional<container> > {
static bool call(container_vector& cont, const optional<container>& val) {
if(val)
cont.data.push_back(*val);
return true;
}
};
}}}
int main() {
typedef std::string::const_iterator iterator;
qi::rule<iterator, container_vector()> all; //CHANGED
qi::rule<iterator, container()> item;
qi::rule<iterator, std::string()> string_rule;
qi::rule<iterator> garbage;
all = *(garbage | item);
garbage = qi::char_ - qi::lit("Test");
string_rule = qi::string("Test");
item = string_rule >> qi::attr(true);
container_vector ast; //CHANGED
std::string input = "blaTestbla";
iterator first = input.begin();
iterator last = input.end();
bool result = qi::parse(first, last, all, ast);
if (result) {
result = first == last;
}
if (result) {
std::cout << "Parsed " << ast.data.size() << " element(s)" << std::endl; //CHANGED
} else {
std::cout << "failure" << std::endl;
}
}
Although I didn't want to use a skipper I ended up with:
start = qi::skip(garbage.alias())[*item];
This last solution was the fastest (by 1-2%) in my unscientific tests using the c-files of the Linux kernel with my production rules.

A quick fix (not necessarily most performant) would be
all = -(item - garbage) % +garbage;
It prints:
Parsed 3 element(s)
See it Live on Coliru

Boost Spirit Qi: Omit element in Kleene Star parser

I want to parse special constructs and throw the rest away. But I don't want to use a skipper.
I want to get a vector of these constructs, so I use a Kleene Star parser as main rule. But, everytime something gets thrown away, a default constructed element is inserted into the vector.
Here is a made up example. It just looks for the string Test and throws the rest away, at least this is the plan. But every time the rule garbage succeeds it adds a default constructed item to the vector in the rule all, giving an output of 7 insteat of 1. How can I tell Spirit to just add to the vector if the rule item succeeds?
#define BOOST_SPIRIT_USE_PHOENIX_V3
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <iostream>
#include <string>
#include <vector>
namespace qi = boost::spirit::qi;
struct container {
std::string name;
bool dummy;
};
BOOST_FUSION_ADAPT_STRUCT(::container,
(std::string, name)
(bool, dummy))
int main() {
typedef std::string::const_iterator iterator;
qi::rule<iterator, std::vector<container>()> all;
qi::rule<iterator, container()> item;
qi::rule<iterator, std::string()> string_rule;
qi::rule<iterator> garbage;
all = *(garbage | item);
garbage = qi::char_ - qi::lit("Test");
string_rule = qi::string("Test");
item = string_rule >> qi::attr(true);
std::vector<container> ast;
std::string input = "blaTestbla";
iterator first = input.begin();
iterator last = input.end();
bool result = qi::parse(first, last, all, ast);
if (result) {
result = first == last;
}
if (result) {
std::cout << "Parsed " << ast.size() << " element(s)" << std::endl;
} else {
std::cout << "failure" << std::endl;
}
}

Since sehe's answer was more or less for educational purposes, we have now several solutions:
*garbage >> -(item % *garbage) >> *garbage
*garbage >> *(item >> *garbage)
all = *(garbage | item[phx::push_back(qi::_val,qi::_1)]);
And the solution from cv_and_he:
#define BOOST_SPIRIT_USE_PHOENIX_V3
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <iostream>
#include <string>
#include <vector>
namespace qi = boost::spirit::qi;
struct container {
std::string name;
bool dummy;
};
BOOST_FUSION_ADAPT_STRUCT(::container,
(std::string, name)
(bool, dummy))
struct container_vector { //ADDED
std::vector<container> data;
};
namespace boost{ namespace spirit{ namespace traits //ADDED
{
template <>
struct is_container<container_vector> : boost::mpl::true_ {};
template <>
struct container_value<container_vector> {
typedef optional<container> type;
};
template <>
struct push_back_container<container_vector,optional<container> > {
static bool call(container_vector& cont, const optional<container>& val) {
if(val)
cont.data.push_back(*val);
return true;
}
};
}}}
int main() {
typedef std::string::const_iterator iterator;
qi::rule<iterator, container_vector()> all; //CHANGED
qi::rule<iterator, container()> item;
qi::rule<iterator, std::string()> string_rule;
qi::rule<iterator> garbage;
all = *(garbage | item);
garbage = qi::char_ - qi::lit("Test");
string_rule = qi::string("Test");
item = string_rule >> qi::attr(true);
container_vector ast; //CHANGED
std::string input = "blaTestbla";
iterator first = input.begin();
iterator last = input.end();
bool result = qi::parse(first, last, all, ast);
if (result) {
result = first == last;
}
if (result) {
std::cout << "Parsed " << ast.data.size() << " element(s)" << std::endl; //CHANGED
} else {
std::cout << "failure" << std::endl;
}
}
Although I didn't want to use a skipper I ended up with:
start = qi::skip(garbage.alias())[*item];
This last solution was the fastest (by 1-2%) in my unscientific tests using the c-files of the Linux kernel with my production rules.

A quick fix (not necessarily most performant) would be
all = -(item - garbage) % +garbage;
It prints:
Parsed 3 element(s)
See it Live on Coliru

We Keep Coding

c++ django amazon-web-services regex python-2.7 google-cloud-platform list unit-testing opengl ember.js

unhandled exception using Boost Spirit to parse grammar - c++

Related

Spirit Qi First Parser

Learning Boost.Spirit: parsing INI

How can I extract std::string object via boost spirit

Zero-filled results for unused (but matching) rules [duplicate]

Boost Spirit Qi: Omit element in Kleene Star parser

Categories

Resources