Optimizing the grammar - c++

Also have asked the question at boost spirit mailing list
http://boost.2283326.n4.nabble.com/Spirit-X3-Boost-1-59-Compilation-never-finishes-for-a-recursive-grammar-td4693813.html
I am working on creating an xpath2.0 parser as per the RFC. It's basically a subproject of another project that I am working on.
After some initial success, I did the mistake of writing a bunch of grammar rules and AST instead of compiling and testing it at every point. After that I basically had a novel of template error messages to read (my fault actually).
Below I present a reduced grammar for the xpath (not particularly as per RFC), which does not finish compilation OR I had to stop the process when my mac started slowing down after about 7 mins.
#include <iostream>
#include <string>
#include <vector>
#include <boost/optional.hpp>
#include <boost/optional/optional_io.hpp>
#include <boost/fusion/include/std_pair.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
#include <boost/fusion/adapted/struct/adapt_struct.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/spirit/home/x3.hpp>
#include <boost/fusion/include/io.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
namespace x3 = boost::spirit::x3;
namespace ast {
struct or_expression;
struct function_call;
template <typename T>
struct operation_sequence_entry
{
std::string op;
T expr;
};
struct primary_expression: x3::variant<
std::string,
x3::forward_ast<or_expression>,
std::string,
int32_t,
uint32_t,
double,
x3::forward_ast<function_call>
>
{
using base_type::base_type;
using base_type::operator=;
};
struct filter_expression
{
primary_expression prim_expr;
std::vector<x3::forward_ast<or_expression>> predicates;
};
struct path_expression: x3::variant<
boost::optional<filter_expression>,
boost::optional<primary_expression>
>
{
using base_type::base_type;
using base_type::operator=;
};
using union_expression = std::vector<path_expression>;
struct unary_expression
{
union_expression expr;
};
struct eq_expression
{
using expr_seq_type = operation_sequence_entry<unary_expression>;
unary_expression lhs_expr;
std::vector<expr_seq_type> rhs_expr;
};
struct and_expression
{
using expr_seq_type = operation_sequence_entry<eq_expression>;
eq_expression lhs_expr;
std::vector<expr_seq_type> rhs_expr;
};
struct or_expression
{
using expr_seq_type = operation_sequence_entry<and_expression>;
and_expression lhs_expr;
std::vector<expr_seq_type> rhs_expr;
};
struct function_call
{
std::string func_name;
std::vector<or_expression> args;
};
}
BOOST_FUSION_ADAPT_TPL_STRUCT(
(T),
(ast::operation_sequence_entry)(T),
(std::string, op),
(T, expr)
);
BOOST_FUSION_ADAPT_STRUCT(
ast::unary_expression,
(ast::union_expression, expr)
);
BOOST_FUSION_ADAPT_STRUCT(
ast::eq_expression,
(ast::unary_expression, lhs_expr),
(std::vector<typename ast::eq_expression::expr_seq_type>, rhs_expr)
);
BOOST_FUSION_ADAPT_STRUCT(
ast::and_expression,
(ast::eq_expression, lhs_expr),
(std::vector<typename ast::and_expression::expr_seq_type>, rhs_expr)
);
BOOST_FUSION_ADAPT_STRUCT(
ast::or_expression,
(ast::and_expression, lhs_expr),
(std::vector<typename ast::or_expression::expr_seq_type>, rhs_expr)
);
BOOST_FUSION_ADAPT_STRUCT(
ast::function_call,
(std::string, func_name),
(std::vector<ast::or_expression>, args)
);
BOOST_FUSION_ADAPT_STRUCT(
ast::filter_expression,
(ast::primary_expression, prim_expr),
(std::vector<x3::forward_ast<ast::or_expression>>, predicates)
);
namespace grammar {
// Bring in the spirit parsers
using x3::lexeme;
using x3::alpha;
using x3::alnum;
using x3::ascii::char_;
using x3::ascii::string;
using x3::lit;
using x3::ascii::digit;
using x3::int_;
using x3::uint_;
using x3::double_;
template<typename T>
auto as = [](auto p) { return x3::rule<struct _, T>{} = as_parser(p); };
auto str_ = [](const char* lit) { return x3::string(lit); };
x3::rule<class path_expr, ast::path_expression> path_expr = "path-expr";
auto ncname = x3::rule<class ncname, std::string>{"ncname"}
= x3::lexeme[+(char_ - ':')]
;
auto qname = x3::rule<class qname, std::string>{"qname"}
= as<std::string>(ncname >> char_(':') >> ncname)
| as<std::string>(ncname)
;
auto union_expr = x3::rule<class union_expr, ast::union_expression>{"union-expr"}
= path_expr % '/'
;
auto unary_expr = x3::rule<class unary_expr, ast::unary_expression>{"unary-expr"}
= -x3::lit('-') >> union_expr
;
auto equality_expr = x3::rule<class eq_expr, ast::eq_expression>{"equality-expr"}
= unary_expr
>> *(as<ast::operation_sequence_entry<ast::unary_expression>>
( (str_("=") | str_("!=")) > unary_expr )
)
;
auto and_expr = x3::rule<class and_expr, ast::and_expression>{"and-expr"}
= equality_expr
>> *(as<ast::operation_sequence_entry<ast::eq_expression>>
( str_("and") > equality_expr )
)
;
auto or_expr = x3::rule<class or_expr, ast::or_expression>{"or-expr"}
= and_expr
>> *(as<ast::operation_sequence_entry<ast::and_expression>>
( str_("or") >> and_expr )
)
;
auto function_name = as<std::string>(qname);
auto function_arg = or_expr;
auto function_call = x3::rule<class func_call, ast::function_call>{"func-call"}
= function_name > '(' > (or_expr % ',') > ')'
;
auto prim_expr = x3::rule<class prim_expr, ast::primary_expression>{"prim-expr"}
= ('$' > qname)
| ('"' > *(char_ - '"') > '"')
| ('(' > or_expr > ')')
| (int_ | uint_ | double_)
| function_call
;
auto predicate = '[' > or_expr > ']';
auto filter_expr = x3::rule<class filter_expr, ast::filter_expression>{"filter-expr"}
= prim_expr >> *(predicate)
;
auto path_expr_def = -(filter_expr) >> -(lit("/") | lit("//")) >> -(prim_expr);
BOOST_SPIRIT_DEFINE (path_expr);
}
int main() {
using x3::space;
using grammar::or_expr;
ast::or_expression oexpr;
std::string input = "$ab/$cd or $ef";
bool res = phrase_parse(input.begin(),
input.end(),
or_expr,
space,
oexpr);
if (!res) {
std::cout << "Parsing failed miserably!\n";
return 1;
}
return 0;
}
Compiling it as
g++ -std=c++14 -ftemplate-depth=1024 -o rec_ex rec_ex.cc
Compiler : Clang 3.8
Boost Version : 1.59
Based on the template error instantiation for lower template-depth, I am pretty sure that somewhere deep recursion is going on.
Is there anyway to optimize the above grammar so as to not cause this issue ?
Thanks.

Related

Boost spirit x3 - how to parse nested structures?

I try to parse
list<char> fldName
I used the nested structures. But I have a trouble with parsing when one structure nested in other. Look at the sollowing minimal sample code:
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/home/x3.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <string>
#include <string_view>
using namespace std::string_view_literals;
using namespace boost::spirit::x3;
namespace ascii = boost::spirit::x3::ascii;
namespace client::ast
{
struct ValidType
{
std::string Name;
std::string SubName1;
std::string SubName2;
};
struct StructField
{
ValidType Type;
std::string Name;
};
} // namespace client::ast
BOOST_FUSION_ADAPT_STRUCT(client::ast::ValidType,
Name, SubName1, SubName2
)
BOOST_FUSION_ADAPT_STRUCT(client::ast::StructField,
Type, Name
)
namespace client::parser
{
using ascii::char_;
template <typename T> static auto as = [](auto p) { return rule<struct tag, T> {"as"} = p; };
#define STRING(x) as<std::string>(x)
rule<class ValidType, ast::ValidType> const ValidType = "ValidType";
rule<class StructField, ast::StructField> const StructField = "StructField";
auto const ValidName = lexeme[(alpha | char_('_')) > *(alnum | char_('_'))];
auto const ValidType_SecondPart = char('<') > STRING(ValidName) > ('>' | ',' > STRING(ValidName) > '>');
auto const ValidType_def = STRING(ValidName) > -(ValidType_SecondPart);
auto const StructField_def = ValidType_def > STRING(ValidName);
BOOST_SPIRIT_DEFINE(ValidType);
BOOST_SPIRIT_DEFINE(StructField);
} // namespace client::parser
int main()
{
using boost::spirit::x3::ascii::space;
auto theData = R"(
list<char> fldName
)"sv;
using client::parser::StructField;
client::ast::StructField fld;
bool result = phrase_parse(theData.begin(), theData.end(), StructField, space, fld);
return result;
}
I receive following error:
Error C2338 Size of the passed attribute is less than expected
But I have no idea what is wrong. Its looks like boost::spirit::x3 have a bug with parsing nested structures.
Is there exists any way how to parse nested stuctures?
Answer - ValidType_def -> ValidType :
auto const StructField_def = ValidType_def > STRING(ValidName);
->
auto const StructField_def = ValidType > STRING(ValidName);

boost spirit x3 variant and std::pair

I tried to run some simple parser that will parse [ 1, 11, 3, 6-4]. Basically, integer list with range notation.
I want to put everything into AST without semantic action. So I use x3::variant. My code 'seems' very similar to the expression example. However, it can't compile under g++ 6.2. It indeed compile ok with clang++ 6.0 but yield wrong result.
The boost version is 1.63.
It seems that I have some 'move' or initialization issue.
#include <iostream>
#include <list>
#include <vector>
#include <utility>
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/adapted/std_pair.hpp>
#include <boost/fusion/include/io.hpp>
namespace ns
{
namespace ast
{
namespace x3 = boost::spirit::x3;
// forward definition
class uintObj;
struct varVec;
// define type
using uintPair_t = std::pair<unsigned int, unsigned int>;
using uintVec_t = std::vector<uintObj>;
// general token value:
class uintObj : public x3::variant <
unsigned int,
uintPair_t
>
{
public:
using base_type::base_type;
using base_type::operator=;
};
struct varVec
{
uintVec_t valVector;
};
}
}
BOOST_FUSION_ADAPT_STRUCT(
ns::ast::varVec,
valVector
)
namespace ns
{
namespace parser
{
// namespace x3 = boost::spirit::x3;
// using namespace x3;
using namespace boost::spirit::x3;
// definition of the range pair:
rule<class uintPair, ast::uintPair_t> const uintPair = "uintPair";
auto const uintPair_def =
uint_
>> '-'
>> uint_
;
rule<class uintObj, ast::uintObj> const uintObj = "uintObj";
auto const uintObj_def =
uint_
| uintPair
;
// define rule definition : rule<ID, attrib>
// more terse definition :
// struct varVec_class;
// using varVec_rule_t = x3::rule<varVec_class, ast::varVec>;
// varVec_rule_t const varVec = "varVec";
// varVec is the rule, "varVec" is the string name of the rule.
rule<class varVec, ast::varVec> const varVec = "varVec";
auto const varVec_def =
'['
>> uintObj % ','
>> ']'
;
BOOST_SPIRIT_DEFINE(
varVec,
uintObj,
uintPair
);
}
}
int main()
{
std::string input ("[1, 11, 3, 6-4]\n");
std::string::const_iterator begin = input.begin();
std::string::const_iterator end = input.end();
ns::ast::varVec result; // ast tree
using ns::parser::varVec; // grammar
using boost::spirit::x3::ascii::space;
bool success = phrase_parse(begin, end, varVec, space, result);
if (success && begin == end)
std::cout << "good" << std::endl;
else
std::cout << "bad" << std::endl;
return 0;
}
Swap the alternative order for the uintObj_def
auto const uintObj_def =
uintPair
| uint_
;
The formulation you have now will always match on a uint_ because the uintPair begins with a valid uint_.
mjcaisse's answer calls out the main problem I think you had. There were a few missing pieces, so I decided to make a simplified version that shows parsing results:
Live On Wandbox
#include <iostream>
#include <iomanip>
//#include <boost/fusion/adapted.hpp>
//#include <boost/fusion/include/io.hpp>
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
namespace x3 = boost::spirit::x3;
namespace ns { namespace ast {
// forward definition
struct uintObj;
//struct varVec;
// define type
using uintPair_t = std::pair<unsigned int, unsigned int>;
using uintVec_t = std::vector<uintObj>;
// general token value:
struct uintObj : x3::variant<unsigned int, uintPair_t> {
using base_type::base_type;
using base_type::operator=;
friend std::ostream& operator<<(std::ostream& os, uintObj const& This) {
struct {
std::ostream& os;
void operator()(unsigned int v) const { os << v; }
void operator()(uintPair_t v) const { os << v.first << "-" << v.second; }
} vis { os };
boost::apply_visitor(vis, This);
return os;
}
};
using varVec = uintVec_t;
} }
namespace ns { namespace parser {
using namespace boost::spirit::x3;
template <typename T> auto as = [](auto p) { return rule<struct _, T> {} = p; };
auto const uintPair = as<ast::uintPair_t> ( uint_ >> '-' >> uint_ );
auto const uintObj = as<ast::uintObj> ( uintPair | uint_ );
auto const varVec = as<ast::varVec> ( '[' >> uintObj % ',' >> ']' );
} }
int main() {
using namespace ns;
std::string const input("[1, 11, 3, 6-4]\n");
auto begin = input.begin(), end = input.end();
ast::varVec result; // ast tree
bool success = phrase_parse(begin, end, parser::varVec, x3::ascii::space, result);
if (success) {
std::cout << "good\n";
for (auto& r : result)
std::cout << r << "\n";
}
else
std::cout << "bad\n";
if (begin != end)
std::cout << "Remaining unparsed: " << std::quoted(std::string(begin, end)) << std::endl;
}
Prints
good
1
11
3
6-4

Boost.Spirit X3 compile time explodes with recursive rule

The following program takes 10s to compile. When I change the parenProcess rule below to '(' >> process >> ')' the compiler spends CPU but does not seem to finish. (I tried making a smaller reproducible program -- by removing rules between the process and parenProcess, but then the compile time no longer exploded).
How do I fix the compile (time) when embedding process instead?
(Minor other question: is there a nicer way to make rule 'x' and 'xActual'?)
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <iostream>
#include <string>
#include <vector>
namespace wccs_parser {
namespace x3 = boost::spirit::x3;
namespace ascii = x3::ascii;
using x3::long_;
using x3::ulong_;
using x3::lexeme;
//--- Ast structures
struct AstChannel {
std::string label;
bool complement;
};
struct AstAction {
AstChannel channel;
uint32_t weight;
};
struct AstRenaming {
std::string from;
std::string to;
};
struct AstNullProcess;
struct AstActionPrefixProcess;
struct AstChoiceProcess;
struct AstCompositionProcess;
struct AstRestrictionProcess;
struct AstRenamingProcess;
struct AstConstantProcess;
using AstAnyProcess = x3::variant<
x3::forward_ast<AstNullProcess>,
x3::forward_ast<AstActionPrefixProcess>,
x3::forward_ast<AstChoiceProcess>,
x3::forward_ast<AstCompositionProcess>,
x3::forward_ast<AstRestrictionProcess>,
x3::forward_ast<AstRenamingProcess>,
x3::forward_ast<AstConstantProcess>
>;
struct AstNullProcess {};
struct AstActionPrefixProcess {
AstAction action;
AstAnyProcess subProcess;
};
struct AstChoiceProcess {
std::vector<AstAnyProcess> subProcesses;
};
struct AstCompositionProcess {
std::vector<AstAnyProcess> subProcesses;
};
struct AstRestrictionProcess {
AstAnyProcess subProcess;
std::vector<std::string> labels;
};
struct AstRenamingProcess {
AstAnyProcess subProcess;
std::vector<AstRenaming> renamings;
};
struct AstConstantProcess {
std::string processName;
};
} // End namespace
BOOST_FUSION_ADAPT_STRUCT(wccs_parser::AstChannel, label, complement)
BOOST_FUSION_ADAPT_STRUCT(wccs_parser::AstAction, channel, weight)
BOOST_FUSION_ADAPT_STRUCT(wccs_parser::AstRenaming, from, to)
BOOST_FUSION_ADAPT_STRUCT(wccs_parser::AstActionPrefixProcess, action, subProcess)
BOOST_FUSION_ADAPT_STRUCT(wccs_parser::AstChoiceProcess, subProcesses)
BOOST_FUSION_ADAPT_STRUCT(wccs_parser::AstCompositionProcess, subProcesses)
BOOST_FUSION_ADAPT_STRUCT(wccs_parser::AstRestrictionProcess, subProcess, labels)
BOOST_FUSION_ADAPT_STRUCT(wccs_parser::AstRenamingProcess, subProcess, renamings)
BOOST_FUSION_ADAPT_STRUCT(wccs_parser::AstConstantProcess, processName)
namespace wccs_parser {
//--- Rules
auto const constantName = x3::rule<struct constantRule, std::string> {"constantName"} =
x3::lexeme[ascii::upper >> *(ascii::alnum)];
auto const label = x3::rule<struct labelRule, std::string> {"label"} =
x3::lexeme[ascii::lower >> *(ascii::alnum)];
auto const channel = x3::rule<struct channelRule, AstChannel> {"channel"} =
label >> x3::matches['!'];
auto const action = x3::rule<struct actionRule, AstAction> {"action"} =
'<' >> channel >> ',' >> ulong_ >> '>';
auto renamingPair = x3::rule<struct renamingPairRule, AstRenaming> {"renamingPair"} =
label > "=>" > label;
x3::rule<struct processRule, AstAnyProcess> process{"process"};
auto const nullProcess = x3::rule<struct nullProcessRule, AstNullProcess> {"nullProcess"} = '0' >> x3::attr(AstNullProcess());
auto const constant = x3::rule<struct constantRule, AstConstantProcess> {"constant"} = constantName;
/// HERE:
auto const parenProcess = '(' > nullProcess > ')';
auto const primitive = x3::rule<struct primitiveRule, AstAnyProcess> {"primitive"} =
parenProcess
| nullProcess
| constant;
auto const restrictionActual = x3::rule<struct restrictionActual, AstRestrictionProcess> {"restrictionActual"} =
primitive >> '\\' >> '{' >> label % ',' >> '}';
auto const restriction = x3::rule<struct restrictionRule, AstAnyProcess> {"restriction"} =
primitive >> !x3::lit('\\')
| restrictionActual;
auto const renamingActual = x3::rule<struct renamingActualRule, AstRenamingProcess> {"renamingActual"} =
restriction >> '[' >> renamingPair % ',' >> ']';
auto const renaming = x3::rule<struct renamingRule, AstAnyProcess> {"renaming"} =
restriction >> !x3::lit('[')
| renamingActual;
x3::rule<struct actionPrefixingRule, AstAnyProcess> actionPrefix{"actionPrefix"};
auto const actionPrefixActual = x3::rule<struct actionPrefixActualRule, AstActionPrefixProcess> {"actionPrefixActual"} =
action > ('.' > actionPrefix);
auto const actionPrefix_def =
actionPrefixActual
| renaming;
BOOST_SPIRIT_DEFINE(actionPrefix)
auto const compositionActual = x3::rule<struct choiceActualrule, AstCompositionProcess> {"compositionActual"} =
actionPrefix % '|';
auto const composition = x3::rule<struct compositionRule, AstAnyProcess> {"composition"} =
actionPrefix >> !x3::lit('|')
| compositionActual;
auto const choiceActual = x3::rule<struct choiceActualrule, AstChoiceProcess> {"choiceActual"} =
composition % '+';
auto const choice = x3::rule<struct choiceRule, AstAnyProcess> {"choice"} =
composition >> !x3::lit('+')
| choiceActual;
auto const process_def = choice;
BOOST_SPIRIT_DEFINE(process)
auto const entry = x3::skip(ascii::space) [process];
} //End namespace
int main() {
std::string str("0 + (0)");
wccs_parser::AstAnyProcess root;
auto iter = str.begin();
auto end = str.end();
bool r = parse(iter, end, wccs_parser::entry, root);
if (r) {
std::cout << str << std::endl << std::endl << " Parses OK: " << std::endl;
}
else {
std::cout << "Parsing failed\n";
}
if (iter != end) std::cout << "Partial match" << std::endl;
return 0;
}
This is a known problem. CppEvans (?) on the mailing list claims to have a workaround on a branch, but that branch is far behind and the changes very intrusive, so I can't vet it/vouch for it.
So, the right recourse would be to post on the mailing list in a bid to get the main developer(s) involved, and raise awareness of this stopping issue.
Regardless, without changing the behaviour of your code, you can use a shorthand:
template <typename T> auto rule = [](const char* name = typeid(T).name()) {
struct _{};
return x3::rule<_, T> {name};
};
template <typename T> auto as = [](auto p) { return rule<T>() = p; };
This will make it much more convenient to write the repetitive Ast coercions:
auto constantName = as<std::string>(x3::lexeme[ascii::upper >> *(ascii::alnum)]);
auto label = as<std::string>(x3::lexeme[ascii::lower >> *(ascii::alnum)]);
auto channel = as<AstChannel>(label >> x3::matches['!']);
auto action = as<AstAction>('<' >> channel >> ',' >> x3::ulong_ >> '>');
auto renamingPair = as<AstRenaming>(label > "=>" > label);
auto nullProcess = as<AstNullProcess>(x3::omit['0']);
auto constant = as<AstConstantProcess>(constantName);
auto parenProcess = '(' > nullProcess > ')';
auto primitive = rule<AstAnyProcess> ("primitive")
= parenProcess
| nullProcess
| constant;
auto restrictionActual = as<AstRestrictionProcess>(primitive >> '\\' >> '{' >> label % ',' >> '}');
auto restriction = rule<AstAnyProcess> ("restriction")
= primitive >> !x3::lit('\\')
| restrictionActual
;
auto renamingActual = as<AstRenamingProcess>(restriction >> '[' >> renamingPair % ',' >> ']');
auto renaming = rule<AstAnyProcess> ("renaming")
= restriction >> !x3::lit('[')
| renamingActual
;
auto actionPrefixActual = as<AstActionPrefixProcess>(action > ('.' > actionPrefix));
auto actionPrefix_def = actionPrefixActual | renaming;
auto compositionActual = as<AstCompositionProcess>(actionPrefix % '|');
auto composition = rule<AstAnyProcess> ("composition")
= actionPrefix >> !x3::lit('|')
| compositionActual
;
auto choiceActual = as<AstChoiceProcess>(composition % '+');
auto choice = rule<AstAnyProcess> ("choice")
= composition >> !x3::lit('+')
| choiceActual
;
auto process_def = choice;
BOOST_SPIRIT_DEFINE(actionPrefix, process)
auto const entry = x3::skip(ascii::space) [process];
Program still runs with same output.

Why is boost::recursive_wrapper not working in this case

I have the following three rules:
unary_expression =
( '(' > expression > ')' )
| int_;
operator_expression =
unary_expression >> *(operators > expression);
expression =
( '(' > expression > ')' )
| operator_expression;
Obviously this is recursive, so I use boost::recursive_wrapper and created the following AST:
struct expression;
using unary_expression_node = boost::variant<boost::recursive_wrapper<expression>, int>;
struct unary_expression
{
unary_expression_node m_unary_expression;
};
enum operators { op_eq, op_ne };
struct expression;
struct operator_expression
{
unary_expression first;
using second_type = std::vector<std::pair<operators, expression>>;
second_type second;
};
using expression_node =
boost::variant<boost::recursive_wrapper<expression>, operator_expression>;
struct expression
{
expression_node m_expression;
};
This compiles (see full example below), but when the code attempts to construct an expression object the constructor gets into an infinite loop of calling these three constructors:
#11 0x0000000000466066 in ast::expression::expression ...
#12 0x00000000004682e0 in boost::recursive_wrapper<ast::expression>::recursive_wrapper ...
#13 0x000000000046718d in boost::variant<boost::recursive_wrapper<ast::expression>, ast::operator_expression>::variant
...
Thus, Creating an expression creates a boost::variant<boost::recursive_wrapper<ast::expression>, ast::operator_expression> (aka, an expression_node) which creates a boost::recursive_wrapper<ast::expression> which creates an expression which creates... and so on.
How can I solve this?
Here is a full example that compiles, but segfaults when the stack runs full:
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/std_pair.hpp>
#include <iostream>
#include <string>
#include <vector>
namespace ast {
struct expression;
using unary_expression_node = boost::variant<boost::recursive_wrapper<expression>, int>;
struct unary_expression
{
unary_expression_node m_unary_expression;
};
enum operators { op_eq, op_ne };
struct expression;
struct operator_expression
{
unary_expression first;
using second_type = std::vector<std::pair<operators, expression>>;
second_type second;
};
using expression_node = boost::variant<boost::recursive_wrapper<expression>, operator_expression>;
struct expression
{
expression_node m_expression;
};
std::ostream& operator<<(std::ostream& os, expression const& expression)
{
return os << expression.m_expression;
}
std::ostream& operator<<(std::ostream& os, unary_expression const& unary_expression)
{
return os << unary_expression.m_unary_expression;
}
std::ostream& operator<<(std::ostream& os, operator_expression const& operator_expression)
{
os << operator_expression.first;
for (auto& l : operator_expression.second)
{
os << ' ' << l.first << ' ' << l.second;
}
return os;
}
} // namespace ast
BOOST_FUSION_ADAPT_STRUCT(
ast::expression,
(ast::expression_node, m_expression)
)
BOOST_FUSION_ADAPT_STRUCT(
ast::unary_expression,
(ast::unary_expression_node, m_unary_expression)
)
BOOST_FUSION_ADAPT_STRUCT(
ast::operator_expression,
(ast::unary_expression, first),
(ast::operator_expression::second_type, second)
)
namespace client
{
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
template <typename Iterator>
class expression_grammar : public qi::grammar<Iterator, ast::expression(), qi::space_type>
{
private:
qi::symbols<char, ast::operators> operators;
qi::rule<Iterator, ast::unary_expression(), qi::space_type> unary_expression;
qi::rule<Iterator, ast::operator_expression(), qi::space_type> operator_expression;
qi::rule<Iterator, ast::expression(), qi::space_type> expression;
public:
expression_grammar() : expression_grammar::base_type(expression, "expression_grammar")
{
using qi::double_;
using qi::char_;
using qi::int_;
operators.add
("==", ast::op_eq)
("!=", ast::op_ne)
;
unary_expression =
( '(' > expression > ')' )
| int_;
operator_expression =
unary_expression >> *(operators > expression);
expression =
( '(' > expression > ')' )
| operator_expression;
}
};
} // namespace client
int main()
{
std::string const input{"1 == 1 != 0"};
using iterator_type = std::string::const_iterator;
using expression_grammar = client::expression_grammar<iterator_type>;
namespace qi = boost::spirit::qi;
expression_grammar program;
iterator_type iter{input.begin()};
iterator_type const end{input.end()};
ast::expression out;
bool r = qi::phrase_parse(iter, end, program, qi::space, out);
if (!r || iter != end)
{
std::cerr << "Parsing failed." << std::endl;
return 1;
}
std::cout << "Parsed: " << out << std::endl;
}
EDIT:
I tried simplifying things to just two rules (and two 'ast's):
struct expression;
using unary_expression = boost::variant<boost::recursive_wrapper<expression>, int>;
enum operators { op_eq, op_ne };
struct expression
{
unary_expression first;
using second_type = std::vector<std::pair<operators, expression>>;
second_type second;
};
BOOST_FUSION_ADAPT_STRUCT(
ast::expression,
(ast::unary_expression, first),
(ast::expression::second_type, second)
)
[...]
unary_expression =
( '(' > expression > ')' )
| int_;
expression =
unary_expression >> *(operators > expression);
but also this result in an infinite loop.
#18 0x00000000004646f2 in ast::expression::expression
#19 0x00000000004669ac in boost::recursive_wrapper<ast::expression>::recursive_wrapper
#20 0x0000000000465821 in boost::variant<boost::recursive_wrapper<ast::expression>, int>::variant
...
Variants default-construct to their first element type.
This indeed directly leads to an infinite loop. (Demo)
The way to solve it is to make the default variant element not re-entrant or to make it lazily constructed. In this case, you can simply re-arrange to make int the first element.
Better yet, there doesn't seem to be a need to reflect the operator precedence hieararchy (as it is expressed in the rules) in the resultant tree, so why not simplify to:
struct unary_expression;
struct binary_expression;
enum operators { op_eq, op_ne };
using expression = boost::variant<
int,
boost::recursive_wrapper<unary_expression>,
boost::recursive_wrapper<binary_expression>
>;
struct unary_expression {
expression expr;
};
struct binary_expression {
expression first;
std::vector<std::pair<operators, expression>> other;
};
This no longer crashes and seems a bit simpler in adaptation and usage.
Simplified Full Demo
This full demo uses that AST, but adds a true unary expression. A few style things have been fixed:
don't expose the skipper unless you intend for the caller to change it
make the parser const
show unparsed trailing data (or instead assert >> qi::eoi)
Note: I might have changed the precedence rules (specifically, associativity of binary operators). I'm not sure which version you require.
Live On Coliru
//#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/std_pair.hpp>
#include <iostream>
#include <string>
#include <vector>
namespace ast {
struct unary_expression;
struct binary_expression;
enum operators { op_eq, op_ne };
using expression = boost::variant<
int,
boost::recursive_wrapper<unary_expression>,
boost::recursive_wrapper<binary_expression>
>;
struct unary_expression {
bool negated = false;
expression expr;
};
struct binary_expression {
expression first;
std::vector<std::pair<operators, expression>> other;
};
}
BOOST_FUSION_ADAPT_STRUCT(ast::unary_expression, negated, expr)
BOOST_FUSION_ADAPT_STRUCT(ast::binary_expression, first, other)
namespace ast {
static inline std::ostream& operator<<(std::ostream& os, operators op) { return os << (op==op_eq?"==":"!="); }
static inline std::ostream& operator<<(std::ostream& os, binary_expression const& e) {
os << e.first;
for (auto& oe : e.other)
os << " " << oe.first << " " << oe.second;
return os;
}
static inline std::ostream& operator<<(std::ostream& os, unary_expression const& e) {
return os << (e.negated?"!":"") << "(" << e.expr << ")";
}
}
namespace client
{
namespace qi = boost::spirit::qi;
template <typename Iterator>
class expression_grammar : public qi::grammar<Iterator, ast::expression()> {
private:
qi::symbols<char, ast::operators> operators;
qi::rule<Iterator, ast::expression()> start;
qi::rule<Iterator, ast::expression(), qi::space_type> simple_expression;
qi::rule<Iterator, ast::unary_expression(), qi::space_type> unary_expression;
qi::rule<Iterator, ast::binary_expression(), qi::space_type> binary_expression;
qi::rule<Iterator, ast::expression(), qi::space_type> expression;
public:
expression_grammar() : expression_grammar::base_type(start, "expression") {
using namespace qi;
operators.add
("==", ast::op_eq)
("!=", ast::op_ne)
;
simple_expression =
( '(' > expression > ')' )
| int_;
unary_expression =
matches['!'] >> simple_expression;
binary_expression =
unary_expression >> *(operators > expression);
expression = binary_expression;
start = skip(space) [ expression ];
BOOST_SPIRIT_DEBUG_NODES((expression)(binary_expression)(unary_expression)(simple_expression))
}
};
} // namespace client
int main() {
using It = std::string::const_iterator;
client::expression_grammar<It> const program;
std::string const input{"1 == !(1 != 0)"};
It iter = input.begin(), end = input.end();
ast::expression out;
if (parse(iter, end, program, out)) {
std::cout << "Parsed: " << out << std::endl;
} else {
std::cerr << "Parsing failed." << std::endl;
return 1;
}
if (iter != end) {
std::cout << "Remaining unparsed input: '" << std::string(iter, end) << "'\n";
}
}
Prints
Parsed: (1) == !((1) != (0))

boost::spirit append a vector

I have a problem with inserting data into a vector using phoenix::insert.
The code should parse input such as "(move x y z - loc r - robot item)" into a struct Predicate with name "move" and 3 variables of type loc, 1 variable of type robot and 1 variable with default type object. All those symbols are just strings not really relevant to the problem (I believe). The problem is using phoenix::insert in the definition of the rule for predicate.
Here is the code I have:
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_fusion.hpp>
#include <boost/spirit/include/phoenix_stl.hpp>
#include <boost/spirit/include/phoenix_object.hpp>
#include <boost/spirit/home/phoenix/container.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
namespace client {
namespace fusion = boost::fusion;
namespace phoenix = boost::phoenix;
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
struct Variable {
std::string name;
std::string type;
};
struct Predicate {
std::string name;
std::vector<Variable> vars;
};
struct TermList {
std::vector<Variable> vars;
TermList() = default;
TermList(std::vector<std::string> names, std::string type)
{
for (auto& n : names)
{
Variable t;
t.name = n;
t.type = type;
vars.push_back(t);
}
}
TermList& operator=(const TermList& rhs) = default;
TermList(const TermList& from) = default;
TermList(TermList&& from) = default;
};
}
BOOST_FUSION_ADAPT_STRUCT(
client::Variable,
(std::string, name)
(std::string, type)
)
BOOST_FUSION_ADAPT_STRUCT(
client::Predicate,
(std::string, name)
(std::vector<client::Variable>, vars)
)
BOOST_FUSION_ADAPT_STRUCT(
client::TermList,
(std::vector<client::Variable>, vars)
)
namespace client {
template <typename Iterator, typename Skipper = ascii::space_type>
struct strips_domain_grammar
: qi::grammar<Iterator, Predicate(),
qi::locals<std::vector<Variable>>, Skipper>
{
strips_domain_grammar()
: strips_domain_grammar::base_type(predicate, "predicate")
{
using qi::eps;
using qi::lit;
using qi::lexeme;
using qi::raw;
using qi::on_error;
using qi::fail;
using phoenix::at_c;
using phoenix::push_back;
using phoenix::insert;
using phoenix::begin;
using phoenix::end;
using phoenix::construct;
using phoenix::val;
using ascii::char_;
using ascii::string;
using ascii::alpha;
using ascii::alnum;
using namespace qi::labels;
// identifier such as move or ?from
identifier %= raw[lexeme[((alpha | char_('_') | char_('?'))
>> *(alnum | char_('_') | char_('-')))]];
// x | x y | x - type | x y z - type
term_list =
+(identifier [push_back(_a, _1)])
>>
(
('-' >
identifier [qi::_val = phoenix::construct<TermList>(qi::_a, qi::_1)])
|
eps [qi::_val = phoenix::construct<TermList>(qi::_a, "object")]
)
;
// (move x y z - loc r - robot item) // item is detault type - object
predicate =
char_('(')
> identifier [at_c<0>(_val) = _1]
> +(term_list [insert(at_c<1>(_val), end(at_c<1>(_val)), // <- ERROR
begin(at_c<0>(_1)), end(at_c<0>(_1)))])
> ')'
;
predicate.name("predicate");
term_list.name("term list");
identifier.name("id");
// on_error is called only when an expectation fails (> instead of >>)
on_error<fail>
(
predicate
, std::cout
<< val("Error! Expecting ")
<< _4 // what failed?
<< val(" here: \"")
<< construct<std::string>(_3, _2) // iterators to error-pos, end
<< val("\"")
<< std::endl
);
}
qi::rule<Iterator, std::string(), Skipper> identifier;
qi::rule<Iterator, TermList(),
qi::locals<std::vector<std::string>>, Skipper> term_list;
qi::rule<Iterator, Predicate(),
qi::locals<std::vector<Variable>>, Skipper> predicate;
};
} // namespace client
int main(int argc, const char** argv)
{
typedef std::string::const_iterator iterator_type;
typedef client::strips_domain_grammar<iterator_type> domain_grammar;
domain_grammar g;
std::string str;
while (std::getline(std::cin, str))
{
if (str.empty() || str[0] == 'q' || str[0] == 'Q')
break;
using boost::spirit::ascii::space;
client::Predicate predicate;
std::string::const_iterator iter = str.begin();
std::string::const_iterator end = str.end();
bool r = phrase_parse(iter, end, g, space, predicate);
if (r && iter == end)
{
std::cout << "-------------------------\n";
std::cout << "Parsing succeeded\n";
std::cout << "got: " << predicate.name;
std::cout << "\n-------------------------\n";
}
else
{
std::cout << "-------------------------\n";
std::cout << "Parsing failed\n";
std::cout << "-------------------------\n";
}
}
}
but the code leads to the following error (clang3.3 with libc++ and c++11; mac os x 10.8):
boost/spirit/home/phoenix/stl/container/container.hpp:416:16: error: void function 'operator()' should not return a value [-Wreturn-type]
return c.insert(arg1, arg2, arg3);
As mentioned above, I believe the error is the result of using phoenix::insert in an action in the predicate rule.
I "fixed" the problem by editing the boost header and removing the return statement, but given my limited understanding of this library I would like to avoid that...
Can someone please explain the problem or suggest a different solution?