Why is boost::recursive_wrapper not working in this case - c++

I have the following three rules:
unary_expression =
( '(' > expression > ')' )
| int_;
operator_expression =
unary_expression >> *(operators > expression);
expression =
( '(' > expression > ')' )
| operator_expression;
Obviously this is recursive, so I use boost::recursive_wrapper and created the following AST:
struct expression;
using unary_expression_node = boost::variant<boost::recursive_wrapper<expression>, int>;
struct unary_expression
{
unary_expression_node m_unary_expression;
};
enum operators { op_eq, op_ne };
struct expression;
struct operator_expression
{
unary_expression first;
using second_type = std::vector<std::pair<operators, expression>>;
second_type second;
};
using expression_node =
boost::variant<boost::recursive_wrapper<expression>, operator_expression>;
struct expression
{
expression_node m_expression;
};
This compiles (see full example below), but when the code attempts to construct an expression object the constructor gets into an infinite loop of calling these three constructors:
#11 0x0000000000466066 in ast::expression::expression ...
#12 0x00000000004682e0 in boost::recursive_wrapper<ast::expression>::recursive_wrapper ...
#13 0x000000000046718d in boost::variant<boost::recursive_wrapper<ast::expression>, ast::operator_expression>::variant
...
Thus, Creating an expression creates a boost::variant<boost::recursive_wrapper<ast::expression>, ast::operator_expression> (aka, an expression_node) which creates a boost::recursive_wrapper<ast::expression> which creates an expression which creates... and so on.
How can I solve this?
Here is a full example that compiles, but segfaults when the stack runs full:
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/std_pair.hpp>
#include <iostream>
#include <string>
#include <vector>
namespace ast {
struct expression;
using unary_expression_node = boost::variant<boost::recursive_wrapper<expression>, int>;
struct unary_expression
{
unary_expression_node m_unary_expression;
};
enum operators { op_eq, op_ne };
struct expression;
struct operator_expression
{
unary_expression first;
using second_type = std::vector<std::pair<operators, expression>>;
second_type second;
};
using expression_node = boost::variant<boost::recursive_wrapper<expression>, operator_expression>;
struct expression
{
expression_node m_expression;
};
std::ostream& operator<<(std::ostream& os, expression const& expression)
{
return os << expression.m_expression;
}
std::ostream& operator<<(std::ostream& os, unary_expression const& unary_expression)
{
return os << unary_expression.m_unary_expression;
}
std::ostream& operator<<(std::ostream& os, operator_expression const& operator_expression)
{
os << operator_expression.first;
for (auto& l : operator_expression.second)
{
os << ' ' << l.first << ' ' << l.second;
}
return os;
}
} // namespace ast
BOOST_FUSION_ADAPT_STRUCT(
ast::expression,
(ast::expression_node, m_expression)
)
BOOST_FUSION_ADAPT_STRUCT(
ast::unary_expression,
(ast::unary_expression_node, m_unary_expression)
)
BOOST_FUSION_ADAPT_STRUCT(
ast::operator_expression,
(ast::unary_expression, first),
(ast::operator_expression::second_type, second)
)
namespace client
{
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
template <typename Iterator>
class expression_grammar : public qi::grammar<Iterator, ast::expression(), qi::space_type>
{
private:
qi::symbols<char, ast::operators> operators;
qi::rule<Iterator, ast::unary_expression(), qi::space_type> unary_expression;
qi::rule<Iterator, ast::operator_expression(), qi::space_type> operator_expression;
qi::rule<Iterator, ast::expression(), qi::space_type> expression;
public:
expression_grammar() : expression_grammar::base_type(expression, "expression_grammar")
{
using qi::double_;
using qi::char_;
using qi::int_;
operators.add
("==", ast::op_eq)
("!=", ast::op_ne)
;
unary_expression =
( '(' > expression > ')' )
| int_;
operator_expression =
unary_expression >> *(operators > expression);
expression =
( '(' > expression > ')' )
| operator_expression;
}
};
} // namespace client
int main()
{
std::string const input{"1 == 1 != 0"};
using iterator_type = std::string::const_iterator;
using expression_grammar = client::expression_grammar<iterator_type>;
namespace qi = boost::spirit::qi;
expression_grammar program;
iterator_type iter{input.begin()};
iterator_type const end{input.end()};
ast::expression out;
bool r = qi::phrase_parse(iter, end, program, qi::space, out);
if (!r || iter != end)
{
std::cerr << "Parsing failed." << std::endl;
return 1;
}
std::cout << "Parsed: " << out << std::endl;
}
EDIT:
I tried simplifying things to just two rules (and two 'ast's):
struct expression;
using unary_expression = boost::variant<boost::recursive_wrapper<expression>, int>;
enum operators { op_eq, op_ne };
struct expression
{
unary_expression first;
using second_type = std::vector<std::pair<operators, expression>>;
second_type second;
};
BOOST_FUSION_ADAPT_STRUCT(
ast::expression,
(ast::unary_expression, first),
(ast::expression::second_type, second)
)
[...]
unary_expression =
( '(' > expression > ')' )
| int_;
expression =
unary_expression >> *(operators > expression);
but also this result in an infinite loop.
#18 0x00000000004646f2 in ast::expression::expression
#19 0x00000000004669ac in boost::recursive_wrapper<ast::expression>::recursive_wrapper
#20 0x0000000000465821 in boost::variant<boost::recursive_wrapper<ast::expression>, int>::variant
...

Variants default-construct to their first element type.
This indeed directly leads to an infinite loop. (Demo)
The way to solve it is to make the default variant element not re-entrant or to make it lazily constructed. In this case, you can simply re-arrange to make int the first element.
Better yet, there doesn't seem to be a need to reflect the operator precedence hieararchy (as it is expressed in the rules) in the resultant tree, so why not simplify to:
struct unary_expression;
struct binary_expression;
enum operators { op_eq, op_ne };
using expression = boost::variant<
int,
boost::recursive_wrapper<unary_expression>,
boost::recursive_wrapper<binary_expression>
>;
struct unary_expression {
expression expr;
};
struct binary_expression {
expression first;
std::vector<std::pair<operators, expression>> other;
};
This no longer crashes and seems a bit simpler in adaptation and usage.
Simplified Full Demo
This full demo uses that AST, but adds a true unary expression. A few style things have been fixed:
don't expose the skipper unless you intend for the caller to change it
make the parser const
show unparsed trailing data (or instead assert >> qi::eoi)
Note: I might have changed the precedence rules (specifically, associativity of binary operators). I'm not sure which version you require.
Live On Coliru
//#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/std_pair.hpp>
#include <iostream>
#include <string>
#include <vector>
namespace ast {
struct unary_expression;
struct binary_expression;
enum operators { op_eq, op_ne };
using expression = boost::variant<
int,
boost::recursive_wrapper<unary_expression>,
boost::recursive_wrapper<binary_expression>
>;
struct unary_expression {
bool negated = false;
expression expr;
};
struct binary_expression {
expression first;
std::vector<std::pair<operators, expression>> other;
};
}
BOOST_FUSION_ADAPT_STRUCT(ast::unary_expression, negated, expr)
BOOST_FUSION_ADAPT_STRUCT(ast::binary_expression, first, other)
namespace ast {
static inline std::ostream& operator<<(std::ostream& os, operators op) { return os << (op==op_eq?"==":"!="); }
static inline std::ostream& operator<<(std::ostream& os, binary_expression const& e) {
os << e.first;
for (auto& oe : e.other)
os << " " << oe.first << " " << oe.second;
return os;
}
static inline std::ostream& operator<<(std::ostream& os, unary_expression const& e) {
return os << (e.negated?"!":"") << "(" << e.expr << ")";
}
}
namespace client
{
namespace qi = boost::spirit::qi;
template <typename Iterator>
class expression_grammar : public qi::grammar<Iterator, ast::expression()> {
private:
qi::symbols<char, ast::operators> operators;
qi::rule<Iterator, ast::expression()> start;
qi::rule<Iterator, ast::expression(), qi::space_type> simple_expression;
qi::rule<Iterator, ast::unary_expression(), qi::space_type> unary_expression;
qi::rule<Iterator, ast::binary_expression(), qi::space_type> binary_expression;
qi::rule<Iterator, ast::expression(), qi::space_type> expression;
public:
expression_grammar() : expression_grammar::base_type(start, "expression") {
using namespace qi;
operators.add
("==", ast::op_eq)
("!=", ast::op_ne)
;
simple_expression =
( '(' > expression > ')' )
| int_;
unary_expression =
matches['!'] >> simple_expression;
binary_expression =
unary_expression >> *(operators > expression);
expression = binary_expression;
start = skip(space) [ expression ];
BOOST_SPIRIT_DEBUG_NODES((expression)(binary_expression)(unary_expression)(simple_expression))
}
};
} // namespace client
int main() {
using It = std::string::const_iterator;
client::expression_grammar<It> const program;
std::string const input{"1 == !(1 != 0)"};
It iter = input.begin(), end = input.end();
ast::expression out;
if (parse(iter, end, program, out)) {
std::cout << "Parsed: " << out << std::endl;
} else {
std::cerr << "Parsing failed." << std::endl;
return 1;
}
if (iter != end) {
std::cout << "Remaining unparsed input: '" << std::string(iter, end) << "'\n";
}
}
Prints
Parsed: (1) == !((1) != (0))

Related

Using lexer token attributes in grammar rules with Lex and Qi from Boost.Spirit

Let's consider following code:
#include <boost/phoenix.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/qi.hpp>
#include <algorithm>
#include <iostream>
#include <string>
#include <utility>
#include <vector>
namespace lex = boost::spirit::lex;
namespace qi = boost::spirit::qi;
namespace phoenix = boost::phoenix;
struct operation
{
enum type
{
add,
sub,
mul,
div
};
};
template<typename Lexer>
class expression_lexer
: public lex::lexer<Lexer>
{
public:
typedef lex::token_def<operation::type> operator_token_type;
typedef lex::token_def<double> value_token_type;
typedef lex::token_def<std::string> variable_token_type;
typedef lex::token_def<lex::omit> parenthesis_token_type;
typedef std::pair<parenthesis_token_type, parenthesis_token_type> parenthesis_token_pair_type;
typedef lex::token_def<lex::omit> whitespace_token_type;
expression_lexer()
: operator_add('+'),
operator_sub('-'),
operator_mul("[x*]"),
operator_div("[:/]"),
value("\\d+(\\.\\d+)?"),
variable("%(\\w+)"),
parenthesis({
std::make_pair(parenthesis_token_type('('), parenthesis_token_type(')')),
std::make_pair(parenthesis_token_type('['), parenthesis_token_type(']'))
}),
whitespace("[ \\t]+")
{
this->self
+= operator_add [lex::_val = operation::add]
| operator_sub [lex::_val = operation::sub]
| operator_mul [lex::_val = operation::mul]
| operator_div [lex::_val = operation::div]
| value
| variable [lex::_val = phoenix::construct<std::string>(lex::_start + 1, lex::_end)]
| whitespace [lex::_pass = lex::pass_flags::pass_ignore]
;
std::for_each(parenthesis.cbegin(), parenthesis.cend(),
[&](parenthesis_token_pair_type const& token_pair)
{
this->self += token_pair.first | token_pair.second;
}
);
}
operator_token_type operator_add;
operator_token_type operator_sub;
operator_token_type operator_mul;
operator_token_type operator_div;
value_token_type value;
variable_token_type variable;
std::vector<parenthesis_token_pair_type> parenthesis;
whitespace_token_type whitespace;
};
template<typename Iterator>
class expression_grammar
: public qi::grammar<Iterator>
{
public:
template<typename Tokens>
explicit expression_grammar(Tokens const& tokens)
: expression_grammar::base_type(start)
{
start %= expression >> qi::eoi;
expression %= sum_operand >> -(sum_operator >> expression);
sum_operator %= tokens.operator_add | tokens.operator_sub;
sum_operand %= fac_operand >> -(fac_operator >> sum_operand);
fac_operator %= tokens.operator_mul | tokens.operator_div;
if(!tokens.parenthesis.empty())
fac_operand %= parenthesised | terminal;
else
fac_operand %= terminal;
terminal %= tokens.value | tokens.variable;
if(!tokens.parenthesis.empty())
{
parenthesised %= tokens.parenthesis.front().first >> expression >> tokens.parenthesis.front().second;
std::for_each(tokens.parenthesis.cbegin() + 1, tokens.parenthesis.cend(),
[&](typename Tokens::parenthesis_token_pair_type const& token_pair)
{
parenthesised %= parenthesised.copy() | (token_pair.first >> expression >> token_pair.second);
}
);
}
}
private:
qi::rule<Iterator> start;
qi::rule<Iterator> expression;
qi::rule<Iterator> sum_operand;
qi::rule<Iterator> sum_operator;
qi::rule<Iterator> fac_operand;
qi::rule<Iterator> fac_operator;
qi::rule<Iterator> terminal;
qi::rule<Iterator> parenthesised;
};
int main()
{
typedef lex::lexertl::token<std::string::const_iterator, boost::mpl::vector<operation::type, double, std::string>> token_type;
typedef expression_lexer<lex::lexertl::actor_lexer<token_type>> expression_lexer_type;
typedef expression_lexer_type::iterator_type expression_lexer_iterator_type;
typedef expression_grammar<expression_lexer_iterator_type> expression_grammar_type;
expression_lexer_type lexer;
expression_grammar_type grammar(lexer);
while(std::cin)
{
std::string line;
std::getline(std::cin, line);
std::string::const_iterator first = line.begin();
std::string::const_iterator const last = line.end();
bool const result = lex::tokenize_and_parse(first, last, lexer, grammar);
if(!result)
std::cout << "Parsing failed! Reminder: >" << std::string(first, last) << "<" << std::endl;
else
{
if(first != last)
std::cout << "Parsing succeeded! Reminder: >" << std::string(first, last) << "<" << std::endl;
else
std::cout << "Parsing succeeded!" << std::endl;
}
}
}
It is a simple parser for arithmetic expressions with values and variables. It is build using expression_lexer for extracting tokens, and then with expression_grammar to parse the tokens.
Use of lexer for such a small case might seem an overkill and probably is one. But that is the cost of simplified example. Also note that use of lexer allows to easily define tokens with regular expression while that allows to easily define them by external code (and user provided configuration in particular). With the example provided it would be no issue at all to read definition of tokens from an external config file and for example allow user to change variables from %name to $name.
The code seems to be working fine (checked on Visual Studio 2013 with Boost 1.61).
The expression_lexer has attributes attached to tokens. I guess they work since they compile. But I don't really know how to check.
Ultimately I would like the grammar to build me an std::vector with reversed polish notation of the expression. (Where every element would be a boost::variant over either operator::type or double or std::string.)
The problem is however that I failed to use token attributes in my expression_grammar. For example if you try to change sum_operator following way:
qi::rule<Iterator, operation::type ()> sum_operator;
you will get compilation error. I expected this to work since operation::type is the attribute for both operator_add and operator_sub and so also for their alternative. And still it doesn't compile. Judging from the error in assign_to_attribute_from_iterators it seems that parser tries to build the attribute value directly from input stream range. Which means it ignores the [lex::_val = operation::add] I specified in my lexer.
Changing that to
qi::rule<Iterator, operation::type (operation::type)> sum_operator;
didn't help either.
Also I tried changing definition to
sum_operator %= (tokens.operator_add | tokens.operator_sub) [qi::_val = qi::_1];
didn't help either.
How to work around that? I know I could use symbols from Qi. But I want to have the lexer to make it easy to configure regexes for the tokens. I could also extend the assign_to_attribute_from_iterators as described in the documentation but this kind of double the work. I guess I could also skip the attributes on lexer and just have them on grammar. But this again doesn't work well with flexibility on variable token (in my actual case there is slightly more logic there so that it is configurable also which part of the token forms actual name of the variable - while here it is fixed to just skip the first character). Anything else?
Also a side question - maybe anyone knows. Is there a way to get to capture groups of the regular expression of the token from tokens action? So that instead of having
variable [lex::_val = phoenix::construct<std::string>(lex::_start + 1, lex::_end)]
instead I would be able to make a string from the capture group and so easily handle formats like $var$.
Edited! I have improved whitespace skipping along conclusions from Whitespace skipper when using Boost.Spirit Qi and Lex. It is a simplification that does not affect questions asked here.
Ok, here's my take on the RPN 'requirement'. I heavily favor natural (automatic) attribute propagation over semantic actions (see Boost Spirit: "Semantic actions are evil"?)
I consider the other options (uglifying) optimizations. You might do them if you're happy with the overall design and don't mind making it harder to maintain :)
Live On Coliru
Beyond the sample from my comment that you've already studied, I added that RPN transformation step:
namespace RPN {
using cell = boost::variant<AST::operation, AST::value, AST::variable>;
using rpn_stack = std::vector<cell>;
struct transform : boost::static_visitor<> {
void operator()(rpn_stack& stack, AST::expression const& e) const {
boost::apply_visitor(boost::bind(*this, boost::ref(stack), ::_1), e);
}
void operator()(rpn_stack& stack, AST::bin_expr const& e) const {
(*this)(stack, e.lhs);
(*this)(stack, e.rhs);
stack.push_back(e.op);
}
void operator()(rpn_stack& stack, AST::value const& v) const { stack.push_back(v); }
void operator()(rpn_stack& stack, AST::variable const& v) const { stack.push_back(v); }
};
}
That's all! Use it like so, e.g.:
RPN::transform compiler;
RPN::rpn_stack program;
compiler(program, expr);
for (auto& instr : program) {
std::cout << instr << " ";
}
Which makes the output:
Parsing success: (3 + (8 * 9))
3 8 9 * +
Full Listing
Live On Coliru
//#define BOOST_SPIRIT_DEBUG
#include <boost/phoenix.hpp>
#include <boost/bind.hpp>
#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/qi.hpp>
#include <algorithm>
#include <iostream>
#include <string>
#include <utility>
#include <vector>
namespace lex = boost::spirit::lex;
namespace qi = boost::spirit::qi;
namespace phoenix = boost::phoenix;
struct operation
{
enum type
{
add,
sub,
mul,
div
};
friend std::ostream& operator<<(std::ostream& os, type op) {
switch (op) {
case type::add: return os << "+";
case type::sub: return os << "-";
case type::mul: return os << "*";
case type::div: return os << "/";
}
return os << "<" << static_cast<int>(op) << ">";
}
};
template<typename Lexer>
class expression_lexer
: public lex::lexer<Lexer>
{
public:
//typedef lex::token_def<operation::type> operator_token_type;
typedef lex::token_def<lex::omit> operator_token_type;
typedef lex::token_def<double> value_token_type;
typedef lex::token_def<std::string> variable_token_type;
typedef lex::token_def<lex::omit> parenthesis_token_type;
typedef std::pair<parenthesis_token_type, parenthesis_token_type> parenthesis_token_pair_type;
typedef lex::token_def<lex::omit> whitespace_token_type;
expression_lexer()
: operator_add('+'),
operator_sub('-'),
operator_mul("[x*]"),
operator_div("[:/]"),
value("\\d+(\\.\\d+)?"),
variable("%(\\w+)"),
parenthesis({
std::make_pair(parenthesis_token_type('('), parenthesis_token_type(')')),
std::make_pair(parenthesis_token_type('['), parenthesis_token_type(']'))
}),
whitespace("[ \\t]+")
{
this->self
+= operator_add [lex::_val = operation::add]
| operator_sub [lex::_val = operation::sub]
| operator_mul [lex::_val = operation::mul]
| operator_div [lex::_val = operation::div]
| value
| variable [lex::_val = phoenix::construct<std::string>(lex::_start + 1, lex::_end)]
| whitespace [lex::_pass = lex::pass_flags::pass_ignore]
;
std::for_each(parenthesis.cbegin(), parenthesis.cend(),
[&](parenthesis_token_pair_type const& token_pair)
{
this->self += token_pair.first | token_pair.second;
}
);
}
operator_token_type operator_add;
operator_token_type operator_sub;
operator_token_type operator_mul;
operator_token_type operator_div;
value_token_type value;
variable_token_type variable;
std::vector<parenthesis_token_pair_type> parenthesis;
whitespace_token_type whitespace;
};
namespace AST {
using operation = operation::type;
using value = double;
using variable = std::string;
struct bin_expr;
using expression = boost::variant<value, variable, boost::recursive_wrapper<bin_expr> >;
struct bin_expr {
expression lhs, rhs;
operation op;
friend std::ostream& operator<<(std::ostream& os, bin_expr const& be) {
return os << "(" << be.lhs << " " << be.op << " " << be.rhs << ")";
}
};
}
BOOST_FUSION_ADAPT_STRUCT(AST::bin_expr, lhs, op, rhs)
template<typename Iterator>
class expression_grammar : public qi::grammar<Iterator, AST::expression()>
{
public:
template<typename Tokens>
explicit expression_grammar(Tokens const& tokens)
: expression_grammar::base_type(start)
{
start = expression >> qi::eoi;
bin_sum_expr = sum_operand >> sum_operator >> expression;
bin_fac_expr = fac_operand >> fac_operator >> sum_operand;
expression = bin_sum_expr | sum_operand;
sum_operand = bin_fac_expr | fac_operand;
sum_operator = tokens.operator_add >> qi::attr(AST::operation::add) | tokens.operator_sub >> qi::attr(AST::operation::sub);
fac_operator = tokens.operator_mul >> qi::attr(AST::operation::mul) | tokens.operator_div >> qi::attr(AST::operation::div);
if(tokens.parenthesis.empty()) {
fac_operand = terminal;
}
else {
fac_operand = parenthesised | terminal;
parenthesised = tokens.parenthesis.front().first >> expression >> tokens.parenthesis.front().second;
std::for_each(tokens.parenthesis.cbegin() + 1, tokens.parenthesis.cend(),
[&](typename Tokens::parenthesis_token_pair_type const& token_pair)
{
parenthesised = parenthesised.copy() | (token_pair.first >> expression >> token_pair.second);
});
}
terminal = tokens.value | tokens.variable;
BOOST_SPIRIT_DEBUG_NODES(
(start) (expression) (bin_sum_expr) (bin_fac_expr)
(fac_operand) (terminal) (parenthesised) (sum_operand)
(sum_operator) (fac_operator)
);
}
private:
qi::rule<Iterator, AST::expression()> start;
qi::rule<Iterator, AST::expression()> expression;
qi::rule<Iterator, AST::expression()> sum_operand;
qi::rule<Iterator, AST::expression()> fac_operand;
qi::rule<Iterator, AST::expression()> terminal;
qi::rule<Iterator, AST::expression()> parenthesised;
qi::rule<Iterator, int()> sum_operator;
qi::rule<Iterator, int()> fac_operator;
// extra rules to help with AST creation
qi::rule<Iterator, AST::bin_expr()> bin_sum_expr;
qi::rule<Iterator, AST::bin_expr()> bin_fac_expr;
};
namespace RPN {
using cell = boost::variant<AST::operation, AST::value, AST::variable>;
using rpn_stack = std::vector<cell>;
struct transform : boost::static_visitor<> {
void operator()(rpn_stack& stack, AST::expression const& e) const {
boost::apply_visitor(boost::bind(*this, boost::ref(stack), ::_1), e);
}
void operator()(rpn_stack& stack, AST::bin_expr const& e) const {
(*this)(stack, e.lhs);
(*this)(stack, e.rhs);
stack.push_back(e.op);
}
void operator()(rpn_stack& stack, AST::value const& v) const { stack.push_back(v); }
void operator()(rpn_stack& stack, AST::variable const& v) const { stack.push_back(v); }
};
}
int main()
{
typedef lex::lexertl::token<std::string::const_iterator, boost::mpl::vector<operation::type, double, std::string>> token_type;
typedef expression_lexer<lex::lexertl::actor_lexer<token_type>> expression_lexer_type;
typedef expression_lexer_type::iterator_type expression_lexer_iterator_type;
typedef expression_grammar<expression_lexer_iterator_type> expression_grammar_type;
expression_lexer_type lexer;
expression_grammar_type grammar(lexer);
RPN::transform compiler;
std::string line;
while(std::getline(std::cin, line) && !line.empty())
{
std::string::const_iterator first = line.begin();
std::string::const_iterator const last = line.end();
AST::expression expr;
bool const result = lex::tokenize_and_parse(first, last, lexer, grammar, expr);
if(!result)
std::cout << "Parsing failed!\n";
else
{
std::cout << "Parsing success: " << expr << "\n";
RPN::rpn_stack program;
compiler(program, expr);
for (auto& instr : program) {
std::cout << instr << " ";
}
}
if(first != last)
std::cout << "Remainder: >" << std::string(first, last) << "<\n";
}
}

Segfault when parsing with Boost::Spirit

I am trying to write a language parser with Boost::Spirit. I read the tutorial and tried the following code to parse a function with the syntax: def myfunc(arg1 type1, arg2, type2 ...) return_type:
AST:
namespace ast {
enum Type { BOOL, INT32, FLOAT32 };
using Identifier = std::string;
using TypedIdentifier = std::tuple<Identifier, Type>;
using ArgList = std::vector<TypedIdentifier>;
using FunctionDef = std::tuple<Identifier, ArgList, Type>;
}
Parser:
namespace parser {
struct Identifier
: qi::grammar<string::iterator, ast::Identifier(), ascii::space_type> {
Identifier() : Identifier::base_type(start) {
start = qi::char_("[a-zA-Z_]") >> *qi::char_("[a-zA-Z_0-9]");
}
qi::rule<string::iterator, ast::Identifier(), ascii::space_type> start;
};
struct Type : qi::symbols<char, ast::Type> {
Type() {
add("int32", ast::INT32)("float32", ast::FLOAT32)("bool", ast::BOOL);
}
};
struct TypedIdentifier
: qi::grammar<string::iterator, ast::TypedIdentifier(), ascii::space_type> {
TypedIdentifier() : TypedIdentifier::base_type(start) {
start = Identifier() >> Type();
}
qi::rule<string::iterator, ast::TypedIdentifier(), ascii::space_type> start;
};
struct FunctionDef
: qi::grammar<string::iterator, ast::FunctionDef(), ascii::space_type> {
FunctionDef() : FunctionDef::base_type(start) {
start = "def" >> Identifier() >> "(" >> (TypedIdentifier() % ",") >> ")" >>
Type() >> ":";
}
qi::rule<string::iterator, ast::FunctionDef(), ascii::space_type> start;
};
}
Then I get a segfault when trying to parse a simple code snipped. The segfault happens when trying to parse a function definition but I debugged a bit and the segfault happens already when trying to parse a typed identifier.
int main() {
string foo("foo int32");
auto begin = foo.begin();
auto end = foo.end();
ast::TypedIdentifier id;
bool result = qi::phrase_parse(begin, end, parser::TypedIdentifier(),
ascii::space, id);
cout << "Parse " << (result ? "successful " : "failed ") << endl;
return 0;
}
I tested the Identifier and Type parsers and they work fine on their own.
I also tried defining global grammars instead of instantiating new ones but I also get the segfault.
What am I doing wrong here?
The linked answer¹ indeed shows what's wrong (there are references to temporaries in the grammar rules).
I suggest that you don't need to create grammar<> instances for every single production. Instead, it's way more efficient (and elegant) to group them as rules into a grammar:
Live On Coliru
#define BOOST_SPIRIT_DEBUG
#include <iostream>
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/std_tuple.hpp>
#include <boost/fusion/include/io.hpp>
#include <boost/optional/optional_io.hpp>
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace ast {
enum Type { BOOL, INT32, FLOAT32 };
using Identifier = std::string;
using TypedIdentifier = std::tuple<Identifier, Type>;
using ArgList = std::vector<TypedIdentifier>;
using FunctionDef = std::tuple<Identifier, ArgList, Type>;
std::ostream& operator<<(std::ostream& os, Type v) {
switch (v) {
case BOOL: return os << "BOOL";
case INT32: return os << "INT32";
case FLOAT32: return os << "FLOAT32";
}
return os << "?";
}
template <typename... Ts> std::ostream& operator<<(std::ostream& os, std::tuple<Ts...> const& v) {
return os << boost::fusion::as_vector(v);
}
template <typename T> std::ostream& operator<<(std::ostream& os, std::vector<T> const& v) {
os << "{";
for (auto& el : v) os << el << " ";
return os << "}";
}
}
namespace parser {
template <typename Iterator>
struct MyGrammarImpl : qi::grammar<Iterator, ast::FunctionDef(), ascii::space_type> {
MyGrammarImpl() : MyGrammarImpl::base_type(functionDef)
{
identifier = qi::char_("[a-zA-Z_]") >> *qi::char_("[a-zA-Z_0-9]");
typedIdentifier = identifier >> type;
functionDef = "def" >> identifier >> '(' >> (typedIdentifier % ",") >> ')' >> type >> ":";
type = type_;
BOOST_SPIRIT_DEBUG_NODES((identifier)(typedIdentifier)(type)(functionDef))
}
private:
qi::rule<Iterator, ast::TypedIdentifier(), ascii::space_type> typedIdentifier;
qi::rule<Iterator, ast::FunctionDef(), ascii::space_type> functionDef;
qi::rule<Iterator, ast::Type(), ascii::space_type> type;
// lexemes
qi::rule<Iterator, ast::Identifier()> identifier;
struct Type : qi::symbols<char, ast::Type> {
Type() {
add("int32", ast::INT32)
("float32", ast::FLOAT32)
("bool", ast::BOOL)
;
}
};
Type type_;
};
using MyGrammar = MyGrammarImpl<std::string::const_iterator>;
}
int main() {
std::string const foo("def bar(foo int32 ) bool:");
auto begin = foo.begin();
auto end = foo.end();
ast::FunctionDef def;
bool result = qi::phrase_parse(begin, end, parser::MyGrammar(), ascii::space, def);
std::cout << "Parse " << (result ? "successful " : "failed ") << std::endl;
if (result)
std::cout << def << "\n";
}
Prints:
Parse successful
(bar {(foo INT32) } BOOL)
With debug info:
<functionDef>
<try>def bar(foo int32 ) </try>
<identifier>
<try>bar(foo int32 ) bool</try>
<success>(foo int32 ) bool:</success>
<attributes>[[b, a, r]]</attributes>
</identifier>
<typedIdentifier>
<try>foo int32 ) bool:</try>
<identifier>
<try>foo int32 ) bool:</try>
<success> int32 ) bool:</success>
<attributes>[[f, o, o]]</attributes>
</identifier>
<type>
<try> int32 ) bool:</try>
<success> ) bool:</success>
<attributes>[INT32]</attributes>
</type>
<success> ) bool:</success>
<attributes>[[[f, o, o], INT32]]</attributes>
</typedIdentifier>
<type>
<try> bool:</try>
<success>:</success>
<attributes>[BOOL]</attributes>
</type>
<success></success>
<attributes>[[[b, a, r], [[[f, o, o], INT32]], BOOL]]</attributes>
</functionDef>
¹ Internal Boost::Spirit code segfaults when parsing a composite grammar

Declaration of cross-recursive rules

I declared rules of my grammar as static const. That worked fine till I tried to use cross-recursive rules (rule1 is defined using rule2 which is defined using rule1). The source code still can be built, but segfaults on parsing source containing such cross-recursive case.
Here's a simplified code of the grammar:
template < typename Iterator >
class Skipper : public qi::grammar<Iterator> {
public:
Skipper ( ) : Skipper::base_type(_skip_rule) { }
private:
static qi::rule<Iterator> const
_comment,
_skip_rule;
};
template < typename Iterator >
typename qi::rule<Iterator> const
Skipper<Iterator>::_comment(
boost::spirit::repository::confix("/*", "*/")[*(qi::char_ - "*/")] // Multi-line
| boost::spirit::repository::confix("//", qi::eol)[*(qi::char_ - qi::eol)] // Single-line
);
template < typename Iterator >
typename qi::rule<Iterator> const
Skipper<Iterator>::_skip_rule(qi::ascii::space | _comment);
template < typename Iterator, typename Skipper >
class Grammar : public qi::grammar<Iterator, Skipper > {
public:
Grammar ( ) : Grammar::base_type(expression) { }
private:
static qi::rule<Iterator, Skipper> const
// Tokens
scalar_literal,
identifier,
// Rules
operand,
expression;
};
template < typename Iterator, typename Skipper >
typename qi::rule<Iterator, Skipper> const
Grammar<Iterator, Skipper>::scalar_literal(qi::uint_ | qi::int_);
template < typename Iterator, typename Skipper >
typename qi::rule<Iterator, Skipper> const
Grammar<Iterator, Skipper>::identifier(qi::lexeme[(qi::alpha | '_') >> *(qi::alnum | '_')]);
template < typename Iterator, typename Skipper >
typename qi::rule<Iterator, Skipper> const
Grammar<Iterator, Skipper>::operand((scalar_literal | identifier | ('(' >> expression >> ')')));
template < typename Iterator, typename Skipper >
typename qi::rule<Iterator, Skipper> const
Grammar<Iterator, Skipper>::expression(operand);
(expression rule is made identical to operand to make the code easier to understand; of course it should be more complicated yet based on operand). operand declaration uses expression one and vice versa. That segfaults when trying to parse_phrase for example (123). I suppose that it's because of "forward" using of expression; same happens if I put expression declaration before the operand one. So in what way should these rules be declared to avoid runtime error?
First off, the static has nothing to do with it:
Live On Coliru fails just as badly:
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/repository/include/qi.hpp>
namespace qi = boost::spirit::qi;
template <typename Iterator>
struct Skipper : qi::grammar<Iterator> {
Skipper() : Skipper::base_type(_skip_rule) { }
private:
qi::rule<Iterator> const
_comment {
boost::spirit::repository::confix("/*", "*/") [*(qi::char_ - "*/")] // Multi-line
| boost::spirit::repository::confix("//", qi::eol) [*(qi::char_ - qi::eol)] // Single-line
},
_skip_rule {
qi::ascii::space | _comment
};
};
template <typename Iterator, typename Skipper>
struct Grammar : qi::grammar<Iterator, Skipper> {
Grammar() : Grammar::base_type(expression) { }
private:
qi::rule<Iterator, Skipper> const
// Tokens
scalar_literal { qi::uint_ | qi::int_ },
identifier { qi::lexeme[(qi::alpha | '_') >> *(qi::alnum | '_')] },
// Rules
operand { (scalar_literal | identifier | ('(' >> expression >> ')')) },
expression { operand };
};
int main() {
using It = std::string::const_iterator;
Skipper<It> s;
Grammar<It, Skipper<It> > p;
std::string const input = "(123)";
It f = input.begin(), l = input.end();
bool ok = qi::phrase_parse(f,l,p,s);
if (ok) std::cout << "Parse success\n";
else std::cout << "Parse failed\n";
if (f!=l) std::cout << "Remaining input: '" << std::string(f,l) << "'\n";
}
Secondly, the skipper has nothing to with things:
Live On Coliru fails just as badly:
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/repository/include/qi.hpp>
namespace qi = boost::spirit::qi;
template <typename Iterator, typename Skipper = qi::ascii::space_type>
struct Grammar : qi::grammar<Iterator, Skipper> {
Grammar() : Grammar::base_type(expression) { }
private:
qi::rule<Iterator, Skipper> const
// Tokens
scalar_literal { qi::uint_ | qi::int_ },
identifier { qi::lexeme[(qi::alpha | '_') >> *(qi::alnum | '_')] },
// Rules
operand { (scalar_literal | identifier | ('(' >> expression >> ')')) },
expression { operand };
};
int main() {
using It = std::string::const_iterator;
Grammar<It> p;
std::string const input = "(123)";
It f = input.begin(), l = input.end();
bool ok = qi::phrase_parse(f,l,p,qi::ascii::space);
if (ok) std::cout << "Parse success\n";
else std::cout << "Parse failed\n";
if (f!=l) std::cout << "Remaining input: '" << std::string(f,l) << "'\n";
}
Thirdly, the timing of initialization has to do with it:
Live On Coliru succeeds:
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/repository/include/qi.hpp>
namespace qi = boost::spirit::qi;
template <typename Iterator, typename Skipper = qi::ascii::space_type>
struct Grammar : qi::grammar<Iterator, Skipper> {
Grammar() : Grammar::base_type(expression) {
scalar_literal = qi::uint_ | qi::int_;
identifier = (qi::alpha | '_') >> *(qi::alnum | '_');
// Rules
operand = (scalar_literal | identifier | ('(' >> expression >> ')'));
expression = operand;
}
private:
qi::rule<Iterator> scalar_literal, identifier; // Tokens
qi::rule<Iterator, Skipper> operand, expression; // Rules
};
int main() {
using It = std::string::const_iterator;
Grammar<It> p;
std::string const input = "(123)";
It f = input.begin(), l = input.end();
bool ok = qi::phrase_parse(f,l,p,qi::ascii::space);
if (ok) std::cout << "Parse success\n";
else std::cout << "Parse failed\n";
if (f!=l) std::cout << "Remaining input: '" << std::string(f,l) << "'\n";
}
Prints
Parse success
Finally, you can have all the cake and eat it too:
Live On Coliru
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/repository/include/qi.hpp>
namespace qi = boost::spirit::qi;
namespace parsing {
namespace detail {
template <typename Iterator>
struct Skipper : qi::grammar<Iterator> {
Skipper() : Skipper::base_type(_skip_rule) {
_comment = boost::spirit::repository::confix("/*", "*/") [*(qi::char_ - "*/")] // Multi-line
| boost::spirit::repository::confix("//", qi::eol) [*(qi::char_ - qi::eol)] // Single-line
;
_skip_rule = qi::ascii::space | _comment;
}
private:
qi::rule<Iterator> _comment, _skip_rule;
};
template <typename Iterator, typename Skipper = Skipper<Iterator> >
struct Grammar : qi::grammar<Iterator, Skipper> {
Grammar() : Grammar::base_type(expression) {
scalar_literal = qi::uint_ | qi::int_;
identifier = (qi::alpha | '_') >> *(qi::alnum | '_');
// Rules
operand = (scalar_literal | identifier | ('(' >> expression >> ')'));
expression = operand;
}
private:
qi::rule<Iterator> scalar_literal, identifier; // Tokens
qi::rule<Iterator, Skipper> operand, expression; // Rules
};
}
template <typename Iterator, typename Skipper = detail::Skipper<Iterator> >
struct facade {
template <typename Range> static bool parse(Range const& input) {
Iterator f = boost::begin(input), l = boost::end(input);
bool ok = qi::phrase_parse(f, l, _parser, _skipper);
if (f!=l)
std::cout << "Remaining input: '" << std::string(f,l) << "'\n";
return ok;
}
private:
static const detail::Skipper<Iterator> _skipper;
static const detail::Grammar<Iterator, Skipper> _parser;
};
template <class I, class S> const detail::Skipper<I> facade<I,S>::_skipper = {};
template <class I, class S> const detail::Grammar<I, S> facade<I,S>::_parser = {};
}
int main() {
using It = std::string::const_iterator;
std::string const input = "(123)";
bool ok = parsing::facade<It>::parse(input);
if (ok) std::cout << "Parse success\n";
else std::cout << "Parse failed\n";
}
Note that the result is the same, the parser/skipper are every bit as static and const as in the original code, the code is a lot easier to maintain (and has a bit more structure to it at the same time).
This is basically where the Singletons-are-bad theme meets the inner-const-is-problematic theme. You don't need to make the fields const. You don't need to make the instances static.
Just, create only one instance if you prefer. Also, it's not a problem that the parser is now copyable (you don't have to copy it; but now you can).

Spirit qi parsing to an Abstract Syntax Tree for nested functions

I am trying to create a parser using boost's spirit qi parser. It is parsing a string that contains three types of values. A constant, a variable, or a function. The functions can be nested inside of each other. The test string is f(a, b) = f(g(z, x), g(x, h(x)), c), where a-e are constants, f-r are functions, and s-z are variables. I successfully created a rule that can correctly parse the expression. The trouble arose when I changed the function parsing the rule into a grammar. There were several errors that I was able to fix. I almost got the grammar to parse the expression and turn it into an abstract syntax tree I created. However I got this error about a file contained in the boost library and I could not figure out where it is coming from because I don't understand the compiler message. I was following the example put up on the website for putting data from a parser to a struct using the employee example: http://www.boost.org/doc/libs/1_41_0/libs/spirit/example/qi/employee.cpp
main.cpp
#include "Parser.h"
#include "Term.h"
#include <boost/spirit/include/qi.hpp>
#include <string>
#include <iostream>
#include <list>
using std::string;
using std::cout;
using std::endl;
int main()
{
cout << "Unification Algorithm" << endl << endl;
string phrase = "f(a, b) = f(g(z, x), g(x, h(x)), c)";
string::const_iterator itr = phrase.begin();
string::const_iterator last = phrase.end();
cout << phrase << endl;
// Parser grammar
Parser<string::const_iterator> g;
// Output data
Expression expression;
if (phrase_parse(itr, last, g, boost::spirit::ascii::space, expression))
{
cout << "Expression parsed." << endl;
}
else
{
cout << "Could not parse expression." << endl;
}
}
Parser.h
#ifndef _Parser_h_
#define _Parser_h_
#include "Term.h"
#include <boost/spirit/include/qi.hpp>
#include <vector>
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
template <typename Iterator>
struct Parser : qi::grammar<Iterator, Expression(), ascii::space_type>
{
Parser() : Parser::base_type(expression)
{
using qi::char_;
const_char = char_("a-eA-E");
fn_char = char_("f-rF-R");
var_char = char_("s-zS-Z");
basic_fn = fn_char >> char_('(') >> (const_char | var_char) % char_(',') >> char_(')');
first_fn_wrapper = fn_char >> char_('(') >> (basic_fn | const_char | var_char) % char_(',') >> char_(')');
nested_fn = fn_char >> char_('(') >> (first_fn_wrapper | const_char | var_char) % char_(',') >> char_(')');
expression = nested_fn >> char_("=") >> nested_fn;
}
// Constant character a - e
qi::rule<Iterator, T_Cons, ascii::space_type> const_char;
// Function character f - r
qi::rule<Iterator, char(), ascii::space_type> fn_char;
// Variable character s - z
qi::rule<Iterator, T_Var, ascii::space_type> var_char;
// Allows for basic function parsing eg. f(x, y, z)
qi::rule<Iterator, T_Fn, ascii::space_type> basic_fn;
// Allows for single nested functions eg. f(g(x), y, z)
qi::rule<Iterator, T_Fn, ascii::space_type> first_fn_wrapper;
// Allows for fully nested functions eg. f(g(x, h(y)), z) and so on
qi::rule<Iterator, T_Fn, ascii::space_type> nested_fn;
// Full rule for a nested function expression
qi::rule<Iterator, Expression, ascii::space_type> expression;
};
#endif // _Parser_h_
Term.h
#ifndef _Term_h_
#define _Term_h_
#include <boost/fusion/include/adapt_struct.hpp>
#include <vector>
struct Term
{
char name;
};
BOOST_FUSION_ADAPT_STRUCT(Term, (char, name))
struct T_Cons : Term
{
};
BOOST_FUSION_ADAPT_STRUCT(T_Cons, (char, name))
struct T_Var : Term
{
};
BOOST_FUSION_ADAPT_STRUCT(T_Var, (char, name))
struct T_Fn : Term
{
std::vector<Term> * params;
T_Fn() { params = new std::vector<Term>(); }
~T_Fn() { delete params; }
};
BOOST_FUSION_ADAPT_STRUCT(T_Fn, (std::vector<Term>*, params))
struct Expression
{
Term lh_term;
Term rh_term;
};
BOOST_FUSION_ADAPT_STRUCT(Expression, (char, name) (Term, lh_term) (Term, rh_term))
#endif // _Term_h_
I cannot link the entire error message from the compiler because it is extremely long, but here are the last few. These are the compile errors that it gave:
boost_1_46_0\boost\mpl\assert.hpp|360|error: no matching function for call to 'assertion_failed(mpl_::failed************ (boost::spirit::qi::grammar<Iterator, T1, T2, T3, T4>::grammar(const boost::spirit::qi::rule<Iterator_, T1_, T2_, T3_, T4_>&, const string&) [with Iterator_ = __gnu_cxx::__normal_iterator<const char*, std::basic_string<char> >; T1_ = Expression; T2_ = boost::proto::exprns_::expr<boost::proto::tag::terminal, boost::proto::argsns_::term<boost::spirit::tag::char_code<boost::spirit::tag::space, boost::spirit::char_encoding::asci|
boost_1_46_0\boost\proto\extends.hpp|540|error: use of deleted function 'boost::proto::exprns_::expr<boost::proto::tag::terminal, boost::proto::argsns_::term<boost::spirit::qi::reference<const boost::spirit::qi::rule<__gnu_cxx::__normal_iterator<const char*, std::basic_string<char> >, Expression(), boost::proto::exprns_::expr<boost::proto::tag::terminal, boost::proto::argsns_::term<boost::spirit::tag::char_code<boost::spirit::tag::space, boost::spirit::char_encoding::ascii> >, 0l>, boost::spirit::unused_type, boost::spirit::unused_type> > >, 0l>:|
boost_1_46_0\boost\proto\detail\expr0.hpp|165|error: no matching function for call to 'boost::spirit::qi::reference<const boost::spirit::qi::rule<__gnu_cxx::__normal_iterator<const char*, std::basic_string<char> >, Expression(), boost::proto::exprns_::expr<boost::proto::tag::terminal, boost::proto::argsns_::term<boost::spirit::tag::char_code<boost::spirit::tag::space, boost::spirit::char_encoding::ascii> >, 0l>, boost::spirit::unused_type, boost::spirit::unused_type> >::reference()'|
UPDATE Showing a simplified parser with a a recursive ast parsing the sample expression shown
As always, the assertion message leads to exactly the problem:
// If you see the assertion below failing then the start rule
// passed to the constructor of the grammar is not compatible with
// the grammar (i.e. it uses different template parameters).
BOOST_SPIRIT_ASSERT_MSG(
(is_same<start_type, rule<Iterator_, T1_, T2_, T3_, T4_> >::value)
, incompatible_start_rule, (rule<Iterator_, T1_, T2_, T3_, T4_>));
So it tells you you should match the grammar with the start rule: you have
struct Parser : qi::grammar<Iterator, Expression(), ascii::space_type>
but
qi::rule<Iterator, Expression, ascii::space_type> expression;
Clearly you forgot parentheses there:
qi::rule<Iterator, Expression(), ascii::space_type> expression;
Guidelines when using generic libraries:
Some of these "rules" are generically applicable, with the exception of no. 2 which is specifically related to Boost Spirit:
baby steps; start small (empty, even)
start with the AST to match the grammar exactly
build gradually,
compiling every step along the way
UPDATE
Here's a much simplified grammar. As mentioned, in the "first rules of spirit" just before, start with the AST to match the grammar exactly:
namespace ast {
namespace tag {
struct constant;
struct variable;
struct function;
}
template <typename Tag> struct Identifier { char name; };
using Constant = Identifier<tag::constant>;
using Variable = Identifier<tag::variable>;
using Function = Identifier<tag::function>;
struct FunctionCall;
using Expression = boost::make_recursive_variant<
Constant,
Variable,
boost::recursive_wrapper<FunctionCall>
>::type;
struct FunctionCall {
Function function;
std::vector<Expression> params;
};
struct Equation {
Expression lhs, rhs;
};
}
Of course this could be much simpler still since all identifiers are just char and you could do the switching dynamically (impression).
Now, the grammar will have to follow. 1. Keep it simple 2. Format carefully 3. Match the ast directly, 4. add debug macros:
template <typename It, typename Skipper = ascii::space_type>
struct Parser : qi::grammar<It, ast::Equation(), Skipper>
{
Parser() : Parser::base_type(equation_)
{
using namespace qi;
constant_ = qi::eps >> char_("a-eA-E");
function_ = qi::eps >> char_("f-rF-R");
variable_ = qi::eps >> char_("s-zS-Z");
function_call = function_ >> '(' >> -(expression_ % ',') >> ')';
expression_ = constant_ | variable_ | function_call;
equation_ = expression_ >> '=' >> expression_;
BOOST_SPIRIT_DEBUG_NODES((constant_)(function_)(variable_)(function_call)(expression_)(equation_))
}
qi::rule<It, ast::Constant()> constant_;
qi::rule<It, ast::Function()> function_;
qi::rule<It, ast::Variable()> variable_;
qi::rule<It, ast::FunctionCall(), Skipper> function_call;
qi::rule<It, ast::Expression(), Skipper> expression_;
qi::rule<It, ast::Equation(), Skipper> equation_;
};
Note how the comments have become completely unneeded. Also note how recursively using expression_ solved your biggest headache!
Full Program
Live On Coliru
//#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace ast {
namespace tag {
struct constant;
struct variable;
struct function;
}
template <typename Tag> struct Identifier { char name; };
using Constant = Identifier<tag::constant>;
using Variable = Identifier<tag::variable>;
using Function = Identifier<tag::function>;
struct FunctionCall;
using Expression = boost::make_recursive_variant<
Constant,
Variable,
boost::recursive_wrapper<FunctionCall>
>::type;
struct FunctionCall {
Function function;
std::vector<Expression> params;
};
struct Equation {
Expression lhs, rhs;
};
}
BOOST_FUSION_ADAPT_STRUCT(ast::Constant, (char, name))
BOOST_FUSION_ADAPT_STRUCT(ast::Variable, (char, name))
BOOST_FUSION_ADAPT_STRUCT(ast::Function, (char, name))
BOOST_FUSION_ADAPT_STRUCT(ast::FunctionCall, (ast::Function, function)(std::vector<ast::Expression>, params))
BOOST_FUSION_ADAPT_STRUCT(ast::Equation, (ast::Expression, lhs)(ast::Expression, rhs))
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
template <typename It, typename Skipper = ascii::space_type>
struct Parser : qi::grammar<It, ast::Equation(), Skipper>
{
Parser() : Parser::base_type(equation_)
{
using namespace qi;
constant_ = qi::eps >> char_("a-eA-E");
function_ = qi::eps >> char_("f-rF-R");
variable_ = qi::eps >> char_("s-zS-Z");
function_call = function_ >> '(' >> -(expression_ % ',') >> ')';
expression_ = constant_ | variable_ | function_call;
equation_ = expression_ >> '=' >> expression_;
BOOST_SPIRIT_DEBUG_NODES((constant_)(function_)(variable_)(function_call)(expression_)(equation_))
}
qi::rule<It, ast::Constant()> constant_;
qi::rule<It, ast::Function()> function_;
qi::rule<It, ast::Variable()> variable_;
qi::rule<It, ast::FunctionCall(), Skipper> function_call;
qi::rule<It, ast::Expression(), Skipper> expression_;
qi::rule<It, ast::Equation(), Skipper> equation_;
};
int main() {
std::cout << "Unification Algorithm\n\n";
std::string const phrase = "f(a, b) = f(g(z, x), g(x, h(x)), c)";
using It = std::string::const_iterator;
It itr = phrase.begin(), last = phrase.end();
std::cout << phrase << std::endl;
Parser<It> g;
ast::Equation parsed;
if (phrase_parse(itr, last, g, ascii::space, parsed)) {
std::cout << "Expression parsed.\n";
} else {
std::cout << "Could not parse equation.\n";
}
if (itr != last) {
std::cout << "Remaining unparsed input: '" << std::string(itr,last) << "'\n";
}
}
A vanilla C++ solution (as per popular request)
I compiled it with MSVC 2013.
Lack of unrestricted unions support lead me to duplicate the 3 possible values of an argument.
There are workarounds for this limitation, but (like so many other things in C++) they are rather messy, so I kept them out to limit code obfuscation.
#include <string>
#include <vector>
#include <iostream>
using namespace std;
// possible token types
enum tTokenType {
T_CONST, // s-z
T_VAR, // a-e
T_FUNC, // f-r
T_EQUAL, // =
T_COMMA, // ,
T_OBRACE, // (
T_CBRACE, // )
T_SPACE, // ' ' or '\t'
T_ERROR, // anything but spaces
T_EOI // end of input
};
// tokens
struct tToken {
tTokenType _type; // lexical element type
char _value; // the actual const/var/func letter
size_t _index; // position in translation unit
static const string constants, variables, functions, spacing;
static const char * type_name[];
tToken(tTokenType t, size_t index) : _type(t), _value(0), _index(index) {}
static tTokenType type(char c)
{
if (constants.find(c) != string::npos) return T_CONST;
if (variables.find(c) != string::npos) return T_VAR;
if (functions.find(c) != string::npos) return T_FUNC;
if (spacing .find(c) != string::npos) return T_SPACE;
if (c == '=') return T_EQUAL;
if (c == ',') return T_COMMA;
if (c == '(') return T_OBRACE;
if (c == ')') return T_CBRACE;
return T_ERROR;
}
tToken(char c, size_t index) : _value(c), _index(index)
{
_type = type(c);
}
void croak(tTokenType type)
{
string err(_index - 1, '-');
cerr << err << "^ expecting " << type_name[(int)type] << "\n";
}
};
const string tToken::variables("abcde");
const string tToken::functions("fghijklmnopqr");
const string tToken::constants("stuvwxyz");
const string tToken::spacing (" \t");
const char * tToken::type_name[] = { "constant", "variable", "function", "=", ",", "(", ")", "space", "error", "end of input" };
// parser
class Parser {
friend class Compiler;
string _input; // remaining program input
size_t _index; // current token index (for error tracking)
void skip_spaces(void)
{
while (_input.length() != 0 && tToken::type(_input[0]) == T_SPACE) next();
}
void next(void)
{
_input.erase(0, 1);
_index++;
}
public:
void read (string program)
{
_input = program;
_index = 0;
skip_spaces();
}
tToken get(void)
{
tToken res = peek();
next();
skip_spaces();
return res;
}
tToken peek(void)
{
if (_input.length() == 0) return tToken(T_EOI, _index);
return tToken (_input[0], _index);
}
tToken accept(tTokenType type)
{
tToken t = get();
return (t._type == type) ? t : tToken (T_ERROR, _index-1);
}
bool consume(tTokenType type)
{
tToken t = get();
bool res = t._type == type;
if (!res) t.croak(type);
return res;
}
};
// syntactic elements
struct tSyntacticElement {
char name;
bool valid;
tSyntacticElement() : name('?'), valid(false) {}
tSyntacticElement(char c) : name(c), valid(false) {}
};
class tConstant : private tSyntacticElement {
friend class tArgument;
tConstant() {}
tConstant(tToken t) : tSyntacticElement(t._value) { }
};
class tVariable : private tSyntacticElement {
friend class tArgument;
tVariable() {}
tVariable(tToken t) : tSyntacticElement(t._value) { }
};
class tFunCall : private tSyntacticElement {
friend class Compiler;
friend class tProgram;
friend class tArgument;
vector<tArgument>params;
tFunCall() {}
tFunCall(tToken t) : tSyntacticElement(t._value) { }
void add_argument(tArgument a);
string dump(void);
};
class tArgument {
friend class Compiler;
friend class tProgram;
friend class tFunCall;
tTokenType type;
// MSVC 2013 does not support unrestricted unions, so for the
// sake of simplicity I'll leave it as 3 separate attributes
tConstant c;
tVariable v;
tFunCall f;
tArgument() {}
tArgument(tToken val) : type(val._type)
{
if (val._type == T_CONST) c = val;
if (val._type == T_VAR ) v = val;
}
tArgument(tFunCall f) : type(T_FUNC ), f(f) {}
string dump(void)
{
if (type == T_VAR) return string("$") + v.name;
if (type == T_CONST) return string("#") + c.name;
if (type == T_FUNC) return f.dump();
return "!";
}
};
class tProgram {
friend class Compiler;
tArgument left;
tArgument right;
bool valid;
string dump(void) { return left.dump() + " = " + right.dump(); }
};
// syntactic analyzer
void tFunCall::add_argument(tArgument a) { params.push_back(a); }
string tFunCall::dump(void)
{
string res(1, name);
res += '(';
// it's 2015 and still no implode() in C++...
for (size_t i = 0; i != params.size(); i++)
{
res += params[i].dump();
if (i != params.size() - 1) res += ',';
}
res += ')';
return res;
}
class Compiler {
Parser parser;
tProgram program;
tFunCall parse_function(void)
{
tToken f = parser.accept(T_FUNC);
tFunCall res (f);
parser.accept(T_OBRACE);
for (;;)
{
tArgument a = parse_argument();
res.add_argument(a);
tToken next = parser.get();
if (next._type == T_CBRACE) break;
if (next._type != T_COMMA) return res;
}
res.valid = true;
return res;
}
tArgument parse_argument(void)
{
tToken id = parser.peek();
if (id._type == T_FUNC) return parse_function();
id = parser.get();
if (id._type == T_CONST) return id;
if (id._type == T_VAR) return id;
return tArgument(tToken (T_ERROR, id._index));
}
public:
void analyze(string input)
{
parser.read(input);
cerr << input << "\n";
program.left = parse_argument();
program.valid &= parser.consume(T_EQUAL);
program.right = parse_argument();
program.valid &= parser.consume(T_EOI);
}
string dump(void)
{
return program.dump();
}
};
int main(int argc, char * argv[])
{
Compiler compiler;
// compiler.analyze("f(a, b) = f(g(z, x), g(x, h(x)), c)");
compiler.analyze(argv[1]);
cout << compiler.dump();
return 0;
}
Grammar
Given the rather terse problem definition, I invented a grammar that should at least match the test input:
program : argument = argument
argument: variable
| constant
| fun_call
fun_call: fun_name ( arg_list )
arg_list: argument
| argument , arg_list
Parsing
Given the simplicity of the syntax, parsing is pretty straightforward.
Each character is basically something valid, a space or something invalid.
Spaces are silently consumed, so that the analyzer only gets useful tokens to process.
Analyze
Since I'm doing this barehanded, I simply define a function for each grammatical rule (program, fun_call, arg_list, argument).
The grammar is predictive (can't remember how it's called in posh books, LL1 maybe?) and there are no arithmetic expressions so the code is relatively lightweight.
Error reporting
Bah, just the barest minimum, and I did not really test it.
Proper error handling can easily double code size (even with yacc), so I drew the line early.
Invalid characters will be replaced by "!", and some expected symbols will be pointed at in a semblance of vintage C compilers output.
There are absolutely no re-synchronization attempts, so a typo inside a function call (especially a braces imbalance) will likely cast the rest of the translation unit to the bin.
Using the hard earned syntactic tree
The mighty compiler manages to spit out an equivalent of the input.
Just to show that something was done beside trimming white spaces, variables are preceded by a '$' and constants by a '#' (showing a deplorable lack of imagination).
Sample output
ExpressionCompiler "f(a) = z"
f(a) = z
f($a) = #z
ExpressionCompiler "f(a) = f(c,z)"
f(a) = f(c,z)
f($a) = f($c,#z)
ExpressionCompiler "f(a, b) = f(g(z, x), g(x, h(x)), c)"
f(a, b) = f(g(z, x), g(x, h(x)), c)
f($a,$b) = f(g(#z,#x),g(#x,h(#x)),$c)
ExpressionCompiler "f(a, b) + f(g(z, x), g(x, h(x)), c)"
f(a, b) + f(g(z, x), g(x, h(x)), c)
-------^ expecting =
f($a,$b) = f(g(#z,#x),g(#x,h(#x)),$c)
ExpressionCompiler "f(A, b) = f(g(z, x), g(x, h(x)), c)"
f(A, b) = f(g(z, x), g(x, h(x)), c)
f(!,$b) = f(g(#z,#x),g(#x,h(#x)),$c)
ExpressionCompiler "f(a, b) = K(g(z, x), g(x, h(x)), c)"
f(a, b) = K(g(z, x), g(x, h(x)), c)
----------^ expecting end of input
f($a,$b) = !

How do I capture the original input into the synthesized output from a spirit grammar?

I'm working on a boost::spirit::qi::grammar and would like to copy a portion of the original text into the synthesized output structure of the grammar (more specifically, the portion that matched one of the components of the rule). The grammar would ultimately be used as a sub-grammar for a more complicated grammar, so I don't really have access to the original input.
I'm guessing that this can be done through semantic actions or the grammar context, but I can't find an example that does this without access to the original parse().
Here's what I have so far:
#include <iostream>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
namespace qi = boost::spirit::qi;
struct A
{
std::string header;
std::vector<int> ints;
std::string inttext;
};
BOOST_FUSION_ADAPT_STRUCT(
A,
(std::string, header)
(std::vector<int>, ints)
//(std::string, inttext)
)
template <typename Iterator>
struct parser : qi::grammar< Iterator, A() >
{
parser() : parser::base_type(start)
{
header %= qi::lexeme[ +qi::alpha ];
ints %= qi::lexeme[ qi::int_ % qi::char_(",_") ]; // <---- capture the original text that matches this into inttext
start %= header >> ' ' >> ints;
}
qi::rule<Iterator, std::string()> header;
qi::rule<Iterator, std::vector<int>() > ints;
qi::rule<Iterator, A()> start;
};
int main()
{
A output;
std::string input("out 1,2_3");
auto iter = input.begin();
parser<decltype(iter)> p;
bool r = qi::parse(iter, input.end(), p, output);
if( !r || iter != input.end() )
{
std::cout << "did not parse";
}
else
{
// would like output.inttext to be "1,2_3"
std::cout << output.header << ": " << output.inttext << " -> [ ";
for( auto & i: output.ints )
std::cout << i << ' ';
std::cout << ']' << std::endl;
}
}
Something similar to what you asked without using semantic actions:
#include <iostream>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/spirit/repository/include/qi_iter_pos.hpp>
namespace qi = boost::spirit::qi;
using boost::spirit::repository::qi::iter_pos;
struct ints_type
{
std::vector<int> data;
std::string::const_iterator begin;
std::string::const_iterator end;
};
struct A
{
std::string header;
ints_type ints;
};
BOOST_FUSION_ADAPT_STRUCT(
ints_type,
(std::string::const_iterator, begin)
(std::vector<int>, data)
(std::string::const_iterator, end)
)
BOOST_FUSION_ADAPT_STRUCT(
A,
(std::string, header)
(ints_type, ints)
)
template <typename Iterator>
struct parser : qi::grammar< Iterator, A() >
{
parser() : parser::base_type(start)
{
header %= qi::lexeme[ +qi::alpha ];
ints %= qi::lexeme[ iter_pos >> qi::int_ % qi::char_(",_") >> iter_pos ]; // <---- capture the original text that matches this into inttext
start %= header >> ' ' >> ints;
}
qi::rule<Iterator, std::string()> header;
qi::rule<Iterator, ints_type() > ints;
qi::rule<Iterator, A()> start;
};
int main()
{
A output;
std::string input("out 1,2_3");
auto iter = input.begin();
parser<decltype(iter)> p;
bool r = qi::parse(iter, input.end(), p, output);
if( !r || iter != input.end() )
{
std::cout << "did not parse";
}
else
{
// would like output.inttext to be "1,2_3"
std::cout << output.header << ": " << std::string(output.ints.begin,output.ints.end) << " -> [ ";
for( auto & i: output.ints.data )
std::cout << i << ' ';
std::cout << ']' << std::endl;
}
}
Using semantic actions:
#include <iostream>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/spirit/repository/include/qi_iter_pos.hpp>
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;
using boost::spirit::repository::qi::iter_pos;
struct ints_type
{
std::vector<int> data;
std::string inttext;
};
struct A
{
std::string header;
ints_type ints;
};
BOOST_FUSION_ADAPT_STRUCT(
ints_type,
(std::vector<int>, data)
(std::string, inttext)
)
BOOST_FUSION_ADAPT_STRUCT(
A,
(std::string, header)
(ints_type, ints)
)
template <typename Iterator>
struct parser : qi::grammar< Iterator, A() >
{
parser() : parser::base_type(start)
{
header %= qi::lexeme[ +qi::alpha ];
ints = qi::lexeme[
(iter_pos >> qi::int_ % qi::char_(",_") >> iter_pos)
[phx::at_c<0>(qi::_val)=qi::_2,
phx::at_c<1>(qi::_val)=phx::construct<std::string>(qi::_1,qi::_3)]
];
start %= header >> ' ' >> ints;
}
qi::rule<Iterator, std::string()> header;
qi::rule<Iterator, ints_type() > ints;
qi::rule<Iterator, A()> start;
};
int main()
{
A output;
std::string input("out 1,2_3");
auto iter = input.begin();
parser<decltype(iter)> p;
bool r = qi::parse(iter, input.end(), p, output);
if( !r || iter != input.end() )
{
std::cout << "did not parse";
}
else
{
// would like output.inttext to be "1,2_3"
std::cout << output.header << ": " << output.ints.inttext << " -> [ ";
for( auto & i: output.ints.data )
std::cout << i << ' ';
std::cout << ']' << std::endl;
}
}
Another alternative using a custom directive dont_eat that returns the subject attribute but does not consume any input. This is possibly slower since the rule ints is parsed twice, but I believe that the syntax is nicer (and it's a good excuse to try creating your own directive)(It's a slightly modified version of "boost/spirit/home/qi/directive/lexeme.hpp").
dont_eat.hpp
#if !defined(DONT_EAT_HPP)
#define DONT_EAT_HPP
#if defined(_MSC_VER)
#pragma once
#endif
#include <boost/spirit/home/qi/meta_compiler.hpp>
#include <boost/spirit/home/qi/skip_over.hpp>
#include <boost/spirit/home/qi/parser.hpp>
#include <boost/spirit/home/support/unused.hpp>
#include <boost/spirit/home/support/common_terminals.hpp>
#include <boost/spirit/home/qi/detail/attributes.hpp>
#include <boost/spirit/home/support/info.hpp>
#include <boost/spirit/home/support/handles_container.hpp>
namespace custom
{
BOOST_SPIRIT_TERMINAL(dont_eat);
}
namespace boost { namespace spirit
{
///////////////////////////////////////////////////////////////////////////
// Enablers
///////////////////////////////////////////////////////////////////////////
template <>
struct use_directive<qi::domain, custom::tag::dont_eat> // enables dont_eat
: mpl::true_ {};
}}
namespace custom
{
template <typename Subject>
struct dont_eat_directive : boost::spirit::qi::unary_parser<dont_eat_directive<Subject> >
{
typedef Subject subject_type;
dont_eat_directive(Subject const& subject)
: subject(subject) {}
template <typename Context, typename Iterator>
struct attribute
{
typedef typename
boost::spirit::traits::attribute_of<subject_type, Context, Iterator>::type
type;
};
template <typename Iterator, typename Context
, typename Skipper, typename Attribute>
bool parse(Iterator& first, Iterator const& last
, Context& context, Skipper const& skipper
, Attribute& attr) const
{
Iterator temp = first;
boost::spirit::qi::skip_over(temp, last, skipper);
return subject.parse(temp, last, context, skipper, attr);
}
template <typename Context>
boost::spirit::info what(Context& context) const
{
return info("dont_eat", subject.what(context));
}
Subject subject;
};
}//custom
///////////////////////////////////////////////////////////////////////////
// Parser generators: make_xxx function (objects)
///////////////////////////////////////////////////////////////////////////
namespace boost { namespace spirit { namespace qi
{
template <typename Subject, typename Modifiers>
struct make_directive<custom::tag::dont_eat, Subject, Modifiers>
{
typedef custom::dont_eat_directive<Subject> result_type;
result_type operator()(unused_type, Subject const& subject, unused_type) const
{
return result_type(subject);
}
};
}}}
namespace boost { namespace spirit { namespace traits
{
///////////////////////////////////////////////////////////////////////////
template <typename Subject>
struct has_semantic_action<custom::dont_eat_directive<Subject> >
: unary_has_semantic_action<Subject> {};
///////////////////////////////////////////////////////////////////////////
template <typename Subject, typename Attribute, typename Context
, typename Iterator>
struct handles_container<custom::dont_eat_directive<Subject>, Attribute
, Context, Iterator>
: unary_handles_container<Subject, Attribute, Context, Iterator> {};
}}}
#endif
main.cpp
#include <iostream>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include "dont_eat.hpp"
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;
struct ints_type
{
std::vector<int> data;
std::string inttext;
};
struct A
{
std::string header;
ints_type ints;
};
BOOST_FUSION_ADAPT_STRUCT(
ints_type,
(std::vector<int>, data)
(std::string, inttext)
)
BOOST_FUSION_ADAPT_STRUCT(
A,
(std::string, header)
(ints_type, ints)
)
template <typename Iterator>
struct parser : qi::grammar< Iterator, A() >
{
parser() : parser::base_type(start)
{
header %= qi::lexeme[ +qi::alpha ];
ints = qi::lexeme[qi::int_ % qi::char_(",_")];
ints_string = custom::dont_eat[ints] >> qi::as_string[qi::raw[ints]];
start %= header >> ' ' >> ints_string;
}
qi::rule<Iterator, std::string()> header;
qi::rule<Iterator, std::vector<int>() > ints;
qi::rule<Iterator, ints_type() > ints_string;
qi::rule<Iterator, A()> start;
};
int main()
{
A output;
std::string input("out 1,2_3");
auto iter = input.begin();
parser<decltype(iter)> p;
bool r = qi::parse(iter, input.end(), p, output);
if( !r || iter != input.end() )
{
std::cout << "did not parse";
}
else
{
// would like output.inttext to be "1,2_3"
std::cout << output.header << ": " << output.ints.inttext << " -> [ ";
for( auto & i: output.ints.data )
std::cout << i << ' ';
std::cout << ']' << std::endl;
}
}
This directive returns a fusion::vector2<> with the subject's attribute as its first member and the string corresponding to the synthesized attribute as its second. I think this is the easiest method to reuse as long as you adapt your structs adequately. I'm not sure that this fusion::vector2<> is the best way to handle the attributes but in the limited testing I've done it has worked fine. With this directive the ints_string rule would simply be:
ints_string=custom::annotate[ints];
//or ints_string=custom::annotate[qi::lexeme[qi::int_ % qi::char_(",_")]];
Example on LWS.
annotate.hpp
#if !defined(ANNOTATE_HPP)
#define ANNOTATE_HPP
#if defined(_MSC_VER)
#pragma once
#endif
#include <boost/spirit/home/qi/meta_compiler.hpp>
#include <boost/spirit/home/qi/skip_over.hpp>
#include <boost/spirit/home/qi/parser.hpp>
#include <boost/spirit/home/support/unused.hpp>
#include <boost/spirit/home/support/common_terminals.hpp>
#include <boost/spirit/home/qi/detail/attributes.hpp>
#include <boost/spirit/home/support/info.hpp>
#include <boost/spirit/home/support/handles_container.hpp>
namespace custom
{
BOOST_SPIRIT_TERMINAL(annotate);
}
namespace boost { namespace spirit
{
///////////////////////////////////////////////////////////////////////////
// Enablers
///////////////////////////////////////////////////////////////////////////
template <>
struct use_directive<qi::domain, custom::tag::annotate> // enables annotate
: mpl::true_ {};
}}
namespace custom
{
template <typename Subject>
struct annotate_directive : boost::spirit::qi::unary_parser<annotate_directive<Subject> >
{
typedef Subject subject_type;
annotate_directive(Subject const& subject)
: subject(subject) {}
template <typename Context, typename Iterator>
struct attribute
{
typedef
boost::fusion::vector2<
typename boost::spirit::traits::attribute_of<subject_type, Context, Iterator>::type
,std::string
>
type;
};
template <typename Iterator, typename Context
, typename Skipper, typename Attribute>
bool parse(Iterator& first, Iterator const& last
, Context& context, Skipper const& skipper
, Attribute& attr) const
{
boost::spirit::qi::skip_over(first, last, skipper);
Iterator save = first;
typename boost::spirit::traits::attribute_of<subject_type, Context, Iterator>::type attr_;
if(subject.parse(first, last, context, skipper, attr_))
{
boost::spirit::traits::assign_to(attr_,boost::fusion::at_c<0>(attr));
boost::spirit::traits::assign_to(std::string(save,first),boost::fusion::at_c<1>(attr));
return true;
}
first = save;
return false;
}
template <typename Context>
boost::spirit::info what(Context& context) const
{
return info("annotate", subject.what(context));
}
Subject subject;
};
}//custom
///////////////////////////////////////////////////////////////////////////
// Parser generators: make_xxx function (objects)
///////////////////////////////////////////////////////////////////////////
namespace boost { namespace spirit { namespace qi
{
template <typename Subject, typename Modifiers>
struct make_directive<custom::tag::annotate, Subject, Modifiers>
{
typedef custom::annotate_directive<Subject> result_type;
result_type operator()(unused_type, Subject const& subject, unused_type) const
{
return result_type(subject);
}
};
}}}
namespace boost { namespace spirit { namespace traits
{
///////////////////////////////////////////////////////////////////////////
template <typename Subject>
struct has_semantic_action<custom::annotate_directive<Subject> >
: unary_has_semantic_action<Subject> {};
///////////////////////////////////////////////////////////////////////////
template <typename Subject, typename Attribute, typename Context
, typename Iterator>
struct handles_container<custom::annotate_directive<Subject>, Attribute
, Context, Iterator>
: unary_handles_container<Subject, Attribute, Context, Iterator> {};
}}}
#endif
main.cpp
#include <iostream>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include "annotate.hpp"
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;
struct ints_type
{
std::vector<int> data;
std::string inttext;
};
struct A
{
std::string header;
ints_type ints;
};
BOOST_FUSION_ADAPT_STRUCT(
ints_type,
(std::vector<int>, data)
(std::string, inttext)
)
BOOST_FUSION_ADAPT_STRUCT(
A,
(std::string, header)
(ints_type, ints)
)
template <typename Iterator>
struct parser : qi::grammar< Iterator, A() >
{
parser() : parser::base_type(start)
{
header %= qi::lexeme[ +qi::alpha ];
ints = qi::lexeme[qi::int_ % qi::char_(",_")];
ints_string = custom::annotate[ints];
start %= header >> ' ' >> ints_string;
}
qi::rule<Iterator, std::string()> header;
qi::rule<Iterator, std::vector<int>() > ints;
qi::rule<Iterator, ints_type() > ints_string;
qi::rule<Iterator, A()> start;
};
int main()
{
A output;
std::string input("out 1,2_3");
auto iter = input.begin();
parser<decltype(iter)> p;
std::string annotation;
bool r = qi::parse(iter, input.end(), custom::annotate[p], output, annotation);
if( !r || iter != input.end() )
{
std::cout << "did not parse";
}
else
{
// would like output.inttext to be "1,2_3"
std::cout << "annotation: " << annotation << std::endl;
std::cout << output.header << ": " << output.ints.inttext << " -> [ ";
for( auto & i: output.ints.data )
std::cout << i << ' ';
std::cout << ']' << std::endl;
}
}