I’m trying to write a grammar for a language which allows the following expressions:
Function calls of the form f args (note: no parentheses!)
Addition (and more complex expressions but that’s not the point here) of the form a + b
For example:
f 42 => f(42)
42 + b => (42 + b)
f 42 + b => f(42 + b)
The grammar is unambiguous (every expression can be parsed in exactly one way) but I don’t know how to write this grammar as a PEG since both productions potentially start with the same token, id. This is my wrong PEG. How can I rewrite it to make it valid?
expression ::= call / addition
call ::= id addition*
addition ::= unary
( ('+' unary)
/ ('-' unary) )*
unary ::= primary
/ '(' ( ('+' unary)
/ ('-' unary)
/ expression)
')'
primary ::= number / id
number ::= [1-9]+
id ::= [a-z]+
Now, when this grammar tries to parse the input “a + b” it parses “a” as a function call with zero arguments and chokes on “+ b”.
I’ve uploaded a C++ / Boost.Spirit.Qi implementation of the grammar in case anybody wants to play with it.
(Note that unary disambiguates unary operations and additions: In order to call a function with a negative number as an argument, you need to specify parentheses, e.g. f (-1).)
As proposed in chat you could start out with something like:
expression = addition | simple;
addition = simple >>
( ('+' > expression)
| ('-' > expression)
);
simple = '(' > expression > ')' | call | unary | number;
call = id >> *expression;
unary = qi::char_("-+") > expression;
// terminals
id = qi::lexeme[+qi::char_("a-z")];
number = qi::double_;
Since then I implemented this in C++ with an AST presentation, so you can get a feel for how this grammar actually build the expression tree by pretty printing it.
All source code is on github: https://gist.github.com/2152518
There are two versions (scroll down to 'Alternative' to read more
Grammar:
template <typename Iterator>
struct mini_grammar : qi::grammar<Iterator, expression_t(), qi::space_type>
{
qi::rule<Iterator, std::string(), qi::space_type> id;
qi::rule<Iterator, expression_t(), qi::space_type> addition, expression, simple;
qi::rule<Iterator, number_t(), qi::space_type> number;
qi::rule<Iterator, call_t(), qi::space_type> call;
qi::rule<Iterator, unary_t(), qi::space_type> unary;
mini_grammar() : mini_grammar::base_type(expression)
{
expression = addition | simple;
addition = simple [ qi::_val = qi::_1 ] >>
+(
(qi::char_("+-") > simple) [ phx::bind(&append_term, qi::_val, qi::_1, qi::_2) ]
);
simple = '(' > expression > ')' | call | unary | number;
call = id >> *expression;
unary = qi::char_("-+") > expression;
// terminals
id = qi::lexeme[+qi::char_("a-z")];
number = qi::double_;
}
};
The corresponding AST structures are defined quick-and-dirty using the very powerful Boost Variant:
struct addition_t;
struct call_t;
struct unary_t;
typedef double number_t;
typedef boost::variant<
number_t,
boost::recursive_wrapper<call_t>,
boost::recursive_wrapper<unary_t>,
boost::recursive_wrapper<addition_t>
> expression_t;
struct addition_t
{
expression_t lhs;
char binop;
expression_t rhs;
};
struct call_t
{
std::string id;
std::vector<expression_t> args;
};
struct unary_t
{
char unop;
expression_t operand;
};
BOOST_FUSION_ADAPT_STRUCT(addition_t, (expression_t, lhs)(char,binop)(expression_t, rhs));
BOOST_FUSION_ADAPT_STRUCT(call_t, (std::string, id)(std::vector<expression_t>, args));
BOOST_FUSION_ADAPT_STRUCT(unary_t, (char, unop)(expression_t, operand));
In the full code, I've also overloaded operator<< for these structures.
Full Demo
//#define BOOST_SPIRIT_DEBUG
#include <iostream>
#include <iterator>
#include <string>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/adapted.hpp>
#include <boost/optional.hpp>
namespace qi = boost::spirit::qi;
namespace phx= boost::phoenix;
struct addition_t;
struct call_t;
struct unary_t;
typedef double number_t;
typedef boost::variant<
number_t,
boost::recursive_wrapper<call_t>,
boost::recursive_wrapper<unary_t>,
boost::recursive_wrapper<addition_t>
> expression_t;
struct addition_t
{
expression_t lhs;
char binop;
expression_t rhs;
friend std::ostream& operator<<(std::ostream& os, const addition_t& a)
{ return os << "(" << a.lhs << ' ' << a.binop << ' ' << a.rhs << ")"; }
};
struct call_t
{
std::string id;
std::vector<expression_t> args;
friend std::ostream& operator<<(std::ostream& os, const call_t& a)
{ os << a.id << "("; for (auto& e : a.args) os << e << ", "; return os << ")"; }
};
struct unary_t
{
char unop;
expression_t operand;
friend std::ostream& operator<<(std::ostream& os, const unary_t& a)
{ return os << "(" << a.unop << ' ' << a.operand << ")"; }
};
BOOST_FUSION_ADAPT_STRUCT(addition_t, (expression_t, lhs)(char,binop)(expression_t, rhs));
BOOST_FUSION_ADAPT_STRUCT(call_t, (std::string, id)(std::vector<expression_t>, args));
BOOST_FUSION_ADAPT_STRUCT(unary_t, (char, unop)(expression_t, operand));
void append_term(expression_t& lhs, char op, expression_t operand)
{
lhs = addition_t { lhs, op, operand };
}
template <typename Iterator>
struct mini_grammar : qi::grammar<Iterator, expression_t(), qi::space_type>
{
qi::rule<Iterator, std::string(), qi::space_type> id;
qi::rule<Iterator, expression_t(), qi::space_type> addition, expression, simple;
qi::rule<Iterator, number_t(), qi::space_type> number;
qi::rule<Iterator, call_t(), qi::space_type> call;
qi::rule<Iterator, unary_t(), qi::space_type> unary;
mini_grammar() : mini_grammar::base_type(expression)
{
expression = addition | simple;
addition = simple [ qi::_val = qi::_1 ] >>
+(
(qi::char_("+-") > simple) [ phx::bind(&append_term, qi::_val, qi::_1, qi::_2) ]
);
simple = '(' > expression > ')' | call | unary | number;
call = id >> *expression;
unary = qi::char_("-+") > expression;
// terminals
id = qi::lexeme[+qi::char_("a-z")];
number = qi::double_;
BOOST_SPIRIT_DEBUG_NODE(expression);
BOOST_SPIRIT_DEBUG_NODE(call);
BOOST_SPIRIT_DEBUG_NODE(addition);
BOOST_SPIRIT_DEBUG_NODE(simple);
BOOST_SPIRIT_DEBUG_NODE(unary);
BOOST_SPIRIT_DEBUG_NODE(id);
BOOST_SPIRIT_DEBUG_NODE(number);
}
};
std::string read_input(std::istream& stream) {
return std::string(
std::istreambuf_iterator<char>(stream),
std::istreambuf_iterator<char>());
}
int main() {
std::cin.unsetf(std::ios::skipws);
std::string const code = read_input(std::cin);
auto begin = code.begin();
auto end = code.end();
try {
mini_grammar<decltype(end)> grammar;
qi::space_type space;
std::vector<expression_t> script;
bool ok = qi::phrase_parse(begin, end, *(grammar > ';'), space, script);
if (begin!=end)
std::cerr << "Unparsed: '" << std::string(begin,end) << "'\n";
std::cout << std::boolalpha << "Success: " << ok << "\n";
if (ok)
{
for (auto& expr : script)
std::cout << "AST: " << expr << '\n';
}
}
catch (qi::expectation_failure<decltype(end)> const& ex) {
std::cout << "Failure; parsing stopped after \""
<< std::string(ex.first, ex.last) << "\"\n";
}
}
Alternative:
I have an alternative version that build addition_t iteratively instead of recursively, so to say:
struct term_t
{
char binop;
expression_t rhs;
};
struct addition_t
{
expression_t lhs;
std::vector<term_t> terms;
};
This removes the need to use Phoenix to build the expression:
addition = simple >> +term;
term = qi::char_("+-") > simple;
Related
I am trying to model a parser for a subset of the C language, for a school project. However, I seem stuck in the process of generating recursive parsing rules for Boost.Spirit, as my rules either overflow the stack or simply do not pick up anything.
For example, I want to model the following syntax:
a ::= ... | A[a] | a1 op a2 | ...
There are some other subsets of syntax for this expression rule, but those are working without problems. For example, if I were to parse A[3*4], it should be read as a recursive parsing where A[...] (A[a] in the syntax) is the array accessor and 3*4 (a1 op a2 in the syntax) is the index.
I've tried defining the following rule objects in the grammar struct:
qi::rule<Iterator, Type(), Skipper> expr_arr;
qi::rule<Iterator, Type(), Skipper> expr_binary_arith;
qi::rule<Iterator, Type(), Skipper> expr_a;
And giving them the following grammar:
expr_arr %= qi::lexeme[identifier >> qi::omit['[']] >> expr_a >> qi::lexeme[qi::omit[']']];
expr_binary_arith %= expr_a >> op_binary_arith >> expr_a;
expr_a %= (expr_binary_arith | expr_arr);
where "op_binary_arith" is a qi::symbol<> object with the allowed operator symbols.
This compiles fine, but upon execution enters a supposedly endless loop, and the stack overflows. I've tried looking at the answer by Sehe in the following question: How to set max recursion in boost spirit.
However, I have been unsuccessful in setting a max recursion depth. Firstly, I failed to make it compile without errors for almost any of my attempts, but on the last attempt it built successfully, albeit with very unexpected results.
Can someone guide me in the right direction, as to how I should go about implementing this grammar correctly?
PEG grammars do not handle left-recursion well. In general you have to split out helper rules to write without left-recursion.
In your particular case, the goal production
a ::= ... | A[a] | a1 op a2 | ...
Seems a little off. This would allows foo[bar] or foo + bar but not foo + bar[qux].
Usually, the choice between array element reference or plain identifier is at a lower level of precedence (often "simple expression").
Here's a tiny elaboration:
literal = number_literal | string_literal; // TODO exapnd?
expr_arr = identifier >> '[' >> (expr_a % ',') >> ']';
simple_expression = literal | expr_arr | identifier;
expr_binary_arith = simple_expression >> op_binary_arith >> expr_a;
expr_a = expr_binary_arith | simple_expression;
Now you can parse e.g.:
for (std::string const& input : {
"A[3*4]",
"A[F[3]]",
"A[8 + F[0x31]]",
"3 * \"foo\"",
})
{
std::cout << std::quoted(input) << " -> ";
It f=begin(input), l=end(input);
AST::Expr e;
if (parse(f,l,g,e)) {
std::cout << "Parsed: " << e << "\n";
} else {
std::cout << "Failed\n";
}
if (f!=l) {
std::cout << "Remaining: " << std::quoted(std::string(f,l)) << "\b";
}
}
Which prints Live On Coliru
"A[3*4]" -> Parsed: A[3*4]
"A[F[3]]" -> Parsed: A[F[3]]
"A[8 + F[0x31]]" -> Parsed: A[8+F[49]]
"3 * \"foo\"" -> Parsed: 3*"foo"
NOTE I deliberately left efficiency and operator precedence out of the picture for now.
These are talked about in detail in other answers:
Boost::Spirit Expression Parser
Implementing operator precedence with boost spirit
Boost::Spirit : Optimizing an expression parser
And many more
Full Demo Listing
Live On Coliru
//#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <iomanip>
#include <experimental/iterator>
namespace qi = boost::spirit::qi;
namespace AST {
using Var = std::string;
struct String : std::string {
using std::string::string;
};
using Literal = boost::variant<String, intmax_t, double>;
enum class ArithOp {
addition, subtraction, division, multplication
};
struct IndexExpr;
struct BinOpExpr;
using Expr = boost::variant<
Literal,
Var,
boost::recursive_wrapper<IndexExpr>,
boost::recursive_wrapper<BinOpExpr>
>;
struct IndexExpr {
Expr expr;
std::vector<Expr> indices;
};
struct BinOpExpr {
Expr lhs, rhs;
ArithOp op;
};
std::ostream& operator<<(std::ostream& os, Literal const& lit) {
struct {
std::ostream& os;
void operator()(String const& s) const { os << std::quoted(s); }
void operator()(double d) const { os << d; }
void operator()(intmax_t i) const { os << i; }
} vis {os};
boost::apply_visitor(vis, lit);
return os;
}
std::ostream& operator<<(std::ostream& os, ArithOp const& op) {
switch(op) {
case ArithOp::addition: return os << '+';
case ArithOp::subtraction: return os << '-';
case ArithOp::division: return os << '/';
case ArithOp::multplication: return os << '*';
}
return os << '?';
}
std::ostream& operator<<(std::ostream& os, BinOpExpr const& e) {
return os << e.lhs << e.op << e.rhs;
}
std::ostream& operator<<(std::ostream& os, IndexExpr const& e) {
std::copy(
begin(e.indices),
end(e.indices),
std::experimental::make_ostream_joiner(os << e.expr << '[', ","));
return os << ']';
}
}
BOOST_FUSION_ADAPT_STRUCT(AST::IndexExpr, expr, indices)
BOOST_FUSION_ADAPT_STRUCT(AST::BinOpExpr, lhs, op, rhs)
template <typename Iterator, typename Skipper = qi::space_type>
struct G : qi::grammar<Iterator, AST::Expr()> {
G() : G::base_type(start) {
using namespace qi;
identifier = alpha >> *alnum;
number_literal =
qi::real_parser<double, qi::strict_real_policies<double> >{}
| "0x" >> qi::uint_parser<intmax_t, 16> {}
| qi::int_parser<intmax_t, 10> {}
;
string_literal = '"' >> *('\\' >> char_escape | ~char_('"')) >> '"';
literal = number_literal | string_literal; // TODO exapnd?
expr_arr = identifier >> '[' >> (expr_a % ',') >> ']';
simple_expression = literal | expr_arr | identifier;
expr_binary_arith = simple_expression >> op_binary_arith >> expr_a;
expr_a = expr_binary_arith | simple_expression;
start = skip(space) [expr_a];
BOOST_SPIRIT_DEBUG_NODES(
(start)
(expr_a)(expr_binary_arith)(simple_expression)(expr_a)
(literal)(number_literal)(string_literal)
(identifier))
}
private:
struct escape_sym : qi::symbols<char, char> {
escape_sym() {
this->add
("b", '\b')
("f", '\f')
("r", '\r')
("n", '\n')
("t", '\t')
("\\", '\\')
;
}
} char_escape;
struct op_binary_arith_sym : qi::symbols<char, AST::ArithOp> {
op_binary_arith_sym() {
this->add
("+", AST::ArithOp::addition)
("-", AST::ArithOp::subtraction)
("/", AST::ArithOp::division)
("*", AST::ArithOp::multplication)
;
}
} op_binary_arith;
qi::rule<Iterator, AST::Expr()> start;
qi::rule<Iterator, AST::IndexExpr(), Skipper> expr_arr;
qi::rule<Iterator, AST::BinOpExpr(), Skipper> expr_binary_arith;
qi::rule<Iterator, AST::Expr(), Skipper> simple_expression, expr_a;
// implicit lexemes
qi::rule<Iterator, AST::Literal()> literal, string_literal, number_literal;
qi::rule<Iterator, AST::Var()> identifier;
};
int main() {
using It = std::string::const_iterator;
G<It> const g;
for (std::string const& input : {
"A[3*4]",
"A[F[3]]",
"A[8 + F[0x31]]",
"3 * \"foo\"",
})
{
std::cout << std::quoted(input) << " -> ";
It f=begin(input), l=end(input);
AST::Expr e;
if (parse(f,l,g,e)) {
std::cout << "Parsed: " << e << "\n";
} else {
std::cout << "Failed\n";
}
if (f!=l) {
std::cout << "Remaining: " << std::quoted(std::string(f,l)) << "\b";
}
}
}
Think about a preprocessor which will read the raw text (no significant white space or tokens).
There are 3 rules.
resolve_para_entry should solve the Argument inside a call. The top-level text is returned as string.
resolve_para should resolve the whole Parameter list and put all the top-level Parameter in a string list.
resolve is the entry
On the way I track the iterator and get the text portion
Samples:
sometext(para) → expect para in the string list
sometext(para1,para2) → expect para1 and para2 in string list
sometext(call(a)) → expect call(a) in the string list
sometext(call(a,b)) ← here it fails; it seams that the "!lit(',')" wont take the Parser to step outside ..
Rules:
resolve_para_entry = +(
(iter_pos >> lit('(') >> (resolve_para_entry | eps) >> lit(')') >> iter_pos) [_val= phoenix::bind(&appendString, _val, _1,_3)]
| (!lit(',') >> !lit(')') >> !lit('(') >> (wide::char_ | wide::space)) [_val = phoenix::bind(&appendChar, _val, _1)]
);
resolve_para = (lit('(') >> lit(')'))[_val = std::vector<std::wstring>()] // empty para -> old style
| (lit('(') >> resolve_para_entry >> *(lit(',') >> resolve_para_entry) > lit(')'))[_val = phoenix::bind(&appendStringList, _val, _1, _2)]
| eps;
;
resolve = (iter_pos >> name_valid >> iter_pos >> resolve_para >> iter_pos);
In the end doesn't seem very elegant. Maybe there is a better way to parse such stuff without skipper
Indeed this should be a lot simpler.
First off, I fail to see why the absense of a skipper is at all relevant.
Second, exposing the raw input is best done using qi::raw[] instead of dancing with iter_pos and clumsy semantic actions¹.
Among the other observations I see:
negating a charset is done with ~, so e.g. ~char_(",()")
(p|eps) would be better spelled -p
(lit('(') >> lit(')')) could be just "()" (after all, there's no skipper, right)
p >> *(',' >> p) is equivalent to p % ','
With the above, resolve_para simplifies to this:
resolve_para = '(' >> -(resolve_para_entry % ',') >> ')';
resolve_para_entry seems weird, to me. It appears that any nested parentheses are simply swallowed. Why not actually parse a recursive grammar so you detect syntax errors?
Here's my take on it:
Define An AST
I prefer to make this the first step because it helps me think about the parser productions:
namespace Ast {
using ArgList = std::list<std::string>;
struct Resolve {
std::string name;
ArgList arglist;
};
using Resolves = std::vector<Resolve>;
}
Creating The Grammar Rules
qi::rule<It, Ast::Resolves()> start;
qi::rule<It, Ast::Resolve()> resolve;
qi::rule<It, Ast::ArgList()> arglist;
qi::rule<It, std::string()> arg, identifier;
And their definitions:
identifier = char_("a-zA-Z_") >> *char_("a-zA-Z0-9_");
arg = raw [ +('(' >> -arg >> ')' | +~char_(",)(")) ];
arglist = '(' >> -(arg % ',') >> ')';
resolve = identifier >> arglist;
start = *qr::seek[hold[resolve]];
Notes:
No more semantic actions
No more eps
No more iter_pos
I've opted to make arglist not-optional. If you really wanted that, change it back:
resolve = identifier >> -arglist;
But in our sample it will generate a lot of noisy output.
Of course your entry point (start) will be different. I just did the simplest thing that could possibly work, using another handy parser directive from the Spirit Repository (like iter_pos that you were already using): seek[]
The hold is there for this reason: boost::spirit::qi duplicate parsing on the output - You might not need it in your actual parser.
Live On Coliru
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/repository/include/qi_seek.hpp>
namespace Ast {
using ArgList = std::list<std::string>;
struct Resolve {
std::string name;
ArgList arglist;
};
using Resolves = std::vector<Resolve>;
}
BOOST_FUSION_ADAPT_STRUCT(Ast::Resolve, name, arglist)
namespace qi = boost::spirit::qi;
namespace qr = boost::spirit::repository::qi;
template <typename It>
struct Parser : qi::grammar<It, Ast::Resolves()>
{
Parser() : Parser::base_type(start) {
using namespace qi;
identifier = char_("a-zA-Z_") >> *char_("a-zA-Z0-9_");
arg = raw [ +('(' >> -arg >> ')' | +~char_(",)(")) ];
arglist = '(' >> -(arg % ',') >> ')';
resolve = identifier >> arglist;
start = *qr::seek[hold[resolve]];
}
private:
qi::rule<It, Ast::Resolves()> start;
qi::rule<It, Ast::Resolve()> resolve;
qi::rule<It, Ast::ArgList()> arglist;
qi::rule<It, std::string()> arg, identifier;
};
#include <iostream>
int main() {
using It = std::string::const_iterator;
std::string const samples = R"--(
Samples:
sometext(para) → expect para in the string list
sometext(para1,para2) → expect para1 and para2 in string list
sometext(call(a)) → expect call(a) in the string list
sometext(call(a,b)) ← here it fails; it seams that the "!lit(',')" wont make the parser step outside
)--";
It f = samples.begin(), l = samples.end();
Ast::Resolves data;
if (parse(f, l, Parser<It>{}, data)) {
std::cout << "Parsed " << data.size() << " resolves\n";
} else {
std::cout << "Parsing failed\n";
}
for (auto& resolve: data) {
std::cout << " - " << resolve.name << "\n (\n";
for (auto& arg : resolve.arglist) {
std::cout << " " << arg << "\n";
}
std::cout << " )\n";
}
}
Prints
Parsed 6 resolves
- sometext
(
para
)
- sometext
(
para1
para2
)
- sometext
(
call(a)
)
- call
(
a
)
- call
(
a
b
)
- lit
(
'
'
)
More Ideas
That last output shows you a problem with your current grammar: lit(',') should obviously not be seen as a call with two parameters.
I recently did an answer on extracting (nested) function calls with parameters which does things more neatly:
Boost spirit parse rule is not applied
or this one boost spirit reporting semantic error
BONUS
Bonus version that uses string_view and also shows exact line/column information of all extracted words.
Note that it still doesn't require any phoenix or semantic actions. Instead it simply defines the necesary trait to assign to boost::string_view from an iterator range.
Live On Coliru
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/repository/include/qi_seek.hpp>
#include <boost/utility/string_view.hpp>
namespace Ast {
using Source = boost::string_view;
using ArgList = std::list<Source>;
struct Resolve {
Source name;
ArgList arglist;
};
using Resolves = std::vector<Resolve>;
}
BOOST_FUSION_ADAPT_STRUCT(Ast::Resolve, name, arglist)
namespace boost { namespace spirit { namespace traits {
template <typename It>
struct assign_to_attribute_from_iterators<boost::string_view, It, void> {
static void call(It f, It l, boost::string_view& attr) {
attr = boost::string_view { f.base(), size_t(std::distance(f.base(),l.base())) };
}
};
} } }
namespace qi = boost::spirit::qi;
namespace qr = boost::spirit::repository::qi;
template <typename It>
struct Parser : qi::grammar<It, Ast::Resolves()>
{
Parser() : Parser::base_type(start) {
using namespace qi;
identifier = raw [ char_("a-zA-Z_") >> *char_("a-zA-Z0-9_") ];
arg = raw [ +('(' >> -arg >> ')' | +~char_(",)(")) ];
arglist = '(' >> -(arg % ',') >> ')';
resolve = identifier >> arglist;
start = *qr::seek[hold[resolve]];
}
private:
qi::rule<It, Ast::Resolves()> start;
qi::rule<It, Ast::Resolve()> resolve;
qi::rule<It, Ast::ArgList()> arglist;
qi::rule<It, Ast::Source()> arg, identifier;
};
#include <iostream>
struct Annotator {
using Ref = boost::string_view;
struct Manip {
Ref fragment, context;
friend std::ostream& operator<<(std::ostream& os, Manip const& m) {
return os << "[" << m.fragment << " at line:" << m.line() << " col:" << m.column() << "]";
}
size_t line() const {
return 1 + std::count(context.begin(), fragment.begin(), '\n');
}
size_t column() const {
return 1 + (fragment.begin() - start_of_line().begin());
}
Ref start_of_line() const {
return context.substr(context.substr(0, fragment.begin()-context.begin()).find_last_of('\n') + 1);
}
};
Ref context;
Manip operator()(Ref what) const { return {what, context}; }
};
int main() {
using It = std::string::const_iterator;
std::string const samples = R"--(Samples:
sometext(para) → expect para in the string list
sometext(para1,para2) → expect para1 and para2 in string list
sometext(call(a)) → expect call(a) in the string list
sometext(call(a,b)) ← here it fails; it seams that the "!lit(',')" wont make the parser step outside
)--";
It f = samples.begin(), l = samples.end();
Ast::Resolves data;
if (parse(f, l, Parser<It>{}, data)) {
std::cout << "Parsed " << data.size() << " resolves\n";
} else {
std::cout << "Parsing failed\n";
}
Annotator annotate{samples};
for (auto& resolve: data) {
std::cout << " - " << annotate(resolve.name) << "\n (\n";
for (auto& arg : resolve.arglist) {
std::cout << " " << annotate(arg) << "\n";
}
std::cout << " )\n";
}
}
Prints
Parsed 6 resolves
- [sometext at line:3 col:1]
(
[para at line:3 col:10]
)
- [sometext at line:4 col:1]
(
[para1 at line:4 col:10]
[para2 at line:4 col:16]
)
- [sometext at line:5 col:1]
(
[call(a) at line:5 col:10]
)
- [call at line:5 col:34]
(
[a at line:5 col:39]
)
- [call at line:6 col:10]
(
[a at line:6 col:15]
[b at line:6 col:17]
)
- [lit at line:6 col:62]
(
[' at line:6 col:66]
[' at line:6 col:68]
)
¹ Boost Spirit: "Semantic actions are evil"?
Below is a very compact version of a grammar I'm trying to write using boost::spirit::qi.
Environment: VS2013, x86, Boost1.64
When #including the header file, the compiler complains about the line
rBlock = "{" >> +(rInvocation) >> "}";
with a very long log (I've only copied the beginning and the end):
more than one partial specialization matches the template argument list
...
...
see reference to function template instantiation
'boost::spirit::qi::rule
&boost::spirit::qi::rule::operator =>(const Expr &)' being compiled
Where is my mistake?
The header file:
//mygrammar.h
#pragma once
#include <boost/spirit/include/qi.hpp>
namespace myNS
{
typedef std::string Identifier;
typedef ::boost::spirit::qi::rule <const char*, Identifier()> myIdentifierRule;
typedef ::boost::variant<char, int> Expression;
typedef ::boost::spirit::qi::rule <const char*, Expression()> myExpressionRule;
struct IdntifierEqArgument
{
Identifier ident;
Expression arg;
};
typedef ::boost::variant < IdntifierEqArgument, Expression > Argument;
typedef ::boost::spirit::qi::rule <const char*, Argument()> myArgumentRule;
typedef ::std::vector<Argument> ArgumentList;
typedef ::boost::spirit::qi::rule <const char*, myNS::ArgumentList()> myArgumentListRule;
struct Invocation
{
Identifier identifier;
::boost::optional<ArgumentList> args;
};
typedef ::boost::spirit::qi::rule <const char*, Invocation()> myInvocationRule;
typedef ::std::vector<Invocation> Block;
typedef ::boost::spirit::qi::rule <const char*, myNS::Block()> myBlockRule;
}
BOOST_FUSION_ADAPT_STRUCT(
myNS::IdntifierEqArgument,
(auto, ident)
(auto, arg)
);
BOOST_FUSION_ADAPT_STRUCT(
myNS::Invocation,
(auto, identifier)
(auto, args)
);
namespace myNS
{
struct myRules
{
myIdentifierRule rIdentifier;
myExpressionRule rExpression;
myArgumentRule rArgument;
myArgumentListRule rArgumentList;
myInvocationRule rInvocation;
myBlockRule rBlock;
myRules()
{
using namespace ::boost::spirit;
using namespace ::boost::spirit::qi;
rIdentifier = as_string[((qi::alpha | '_') >> *(qi::alnum | '_'))];
rExpression = char_ | int_;
rArgument = (rIdentifier >> "=" >> rExpression) | rExpression;
rArgumentList = rArgument >> *("," >> rArgument);
rInvocation = rIdentifier >> "(" >> -rArgumentList >> ")";
rBlock = "{" >> +(rInvocation) >> "}";
}
};
}
I'm not exactly sure where the issue is triggered, but it clearly is a symptom of too many ambiguities in the attribute forwarding rules.
Conceptually this could be triggered by your attribute types having similar/compatible layouts. In language theory, you're looking at a mismatch between C++'s nominative type system versus the approximation of structural typing in the attribute propagation system. But enough theorism :)
I don't think attr_cast<> will save you here as it probably uses the same mechanics and heuristics under the hood.
It drew my attention that making the ArgumentList optional is ... not very useful (as an empty list already accurately reflects absense of arguments).
So I tried simplifying the rules:
rArgumentList = -(rArgument % ',');
rInvocation = rIdentifier >> '(' >> rArgumentList >> ')';
And the declared attribute type can be simply ArgumentList instead of boost::optional::ArgumentList.
This turns out to remove the ambiguity when propagating into the vector<Invocation>, so ... you're saved.
If this feels "accidental" to you, you should! What would I do if this hadn't removed the ambiguity "by chance"? I'd have created a semantic action to propagate the Invocation by simpler mechanics. There's a good chance that fusion::push_back(_val, _1) or similar would have worked.
See also Boost Spirit: "Semantic actions are evil"?
Review And Demo
In the cleaned up review here I present a few fixes/improvements and a test run that dumps the parsed AST.
Separate AST from parser (you don't want use qi in the AST types. You specifically do not want using namespace directives in the face of generic template libraries)
Do not use auto in the adapt macros. That's not a feature. Instead, since you can ostensibly use C++11, use the C++11 (decltype) based macros
BOOST_FUSION_ADAPT_STRUCT(myAST::IdntifierEqArgument, ident,arg);
BOOST_FUSION_ADAPT_STRUCT(myAST::Invocation, identifier,args);
AST is leading (also, prefer c++11 for clarity):
namespace myAST {
using Identifier = std::string;
using Expression = boost::variant<char, int>;
struct IdntifierEqArgument {
Identifier ident;
Expression arg;
};
using Argument = boost::variant<IdntifierEqArgument, Expression>;
using ArgumentList = std::vector<Argument>;
struct Invocation {
Identifier identifier;
ArgumentList args;
};
using Block = std::vector<Invocation>;
}
It's nice to have the definitions separate
Regarding the parser,
I'd prefer the qi::grammar convention. Also,
You didn't declare any of the rules with a skipper. I "guessed" from context that whitespace is insignificant outside of the rules for Expression and Identifier.
Expression ate every char_, so also would eat ')' or even '3'. I noticed this only when testing and after debugging with:
//#define BOOST_SPIRIT_DEBUG
BOOST_SPIRIT_DEBUG_NODES((start)(rBlock)(rInvocation)(rIdentifier)(rArgumentList)(rArgument)(rExpression))
I highly recommend using these facilities
All in all the parser comes down to
namespace myNS {
namespace qi = boost::spirit::qi;
template <typename Iterator = char const*>
struct myRules : qi::grammar<Iterator, myAST::Block()> {
myRules() : myRules::base_type(start) {
rIdentifier = qi::raw [(qi::alpha | '_') >> *(qi::alnum | '_')];
rExpression = qi::alpha | qi::int_;
rArgument = (rIdentifier >> '=' >> rExpression) | rExpression;
rArgumentList = -(rArgument % ',');
rInvocation = rIdentifier >> '(' >> rArgumentList >> ')';
rBlock = '{' >> +rInvocation >> '}';
start = qi::skip(qi::space) [ rBlock ];
BOOST_SPIRIT_DEBUG_NODES((start)(rBlock)(rInvocation)(rIdentifier)(rArgumentList)(rArgument)(rExpression))
}
private:
qi::rule<Iterator, myAST::Block()> start;
using Skipper = qi::space_type;
qi::rule<Iterator, myAST::Argument(), Skipper> rArgument;
qi::rule<Iterator, myAST::ArgumentList(), Skipper> rArgumentList;
qi::rule<Iterator, myAST::Invocation(), Skipper> rInvocation;
qi::rule<Iterator, myAST::Block(), Skipper> rBlock;
// implicit lexemes
qi::rule<Iterator, myAST::Identifier()> rIdentifier;
qi::rule<Iterator, myAST::Expression()> rExpression;
};
}
Adding a test driver
int main() {
std::string const input = R"(
{
foo()
bar(a, b, 42)
qux(someThing_awful01 = 9)
}
)";
auto f = input.data(), l = f + input.size();
myAST::Block block;
bool ok = parse(f, l, myNS::myRules<>{}, block);
if (ok) {
std::cout << "Parse success\n";
for (auto& invocation : block) {
std::cout << invocation.identifier << "(";
for (auto& arg : invocation.args) std::cout << arg << ",";
std::cout << ")\n";
}
}
else
std::cout << "Parse failed\n";
if (f!=l)
std::cout << "Remaining unparsed input: '" << std::string(f,l) << "'\n";
}
Complete Demo
See it Live On Coliru
//#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
namespace myAST {
using Identifier = std::string;
using Expression = boost::variant<char, int>;
struct IdntifierEqArgument {
Identifier ident;
Expression arg;
};
using Argument = boost::variant<IdntifierEqArgument, Expression>;
using ArgumentList = std::vector<Argument>;
struct Invocation {
Identifier identifier;
ArgumentList args;
};
using Block = std::vector<Invocation>;
// for debug printing
static inline std::ostream& operator<<(std::ostream& os, myAST::IdntifierEqArgument const& named) {
return os << named.ident << "=" << named.arg;
}
}
BOOST_FUSION_ADAPT_STRUCT(myAST::IdntifierEqArgument, ident,arg);
BOOST_FUSION_ADAPT_STRUCT(myAST::Invocation, identifier,args);
namespace myNS {
namespace qi = boost::spirit::qi;
template <typename Iterator = char const*>
struct myRules : qi::grammar<Iterator, myAST::Block()> {
myRules() : myRules::base_type(start) {
rIdentifier = qi::raw [(qi::alpha | '_') >> *(qi::alnum | '_')];
rExpression = qi::alpha | qi::int_;
rArgument = (rIdentifier >> '=' >> rExpression) | rExpression;
rArgumentList = -(rArgument % ',');
rInvocation = rIdentifier >> '(' >> rArgumentList >> ')';
rBlock = '{' >> +rInvocation >> '}';
start = qi::skip(qi::space) [ rBlock ];
BOOST_SPIRIT_DEBUG_NODES((start)(rBlock)(rInvocation)(rIdentifier)(rArgumentList)(rArgument)(rExpression))
}
private:
qi::rule<Iterator, myAST::Block()> start;
using Skipper = qi::space_type;
qi::rule<Iterator, myAST::Argument(), Skipper> rArgument;
qi::rule<Iterator, myAST::ArgumentList(), Skipper> rArgumentList;
qi::rule<Iterator, myAST::Invocation(), Skipper> rInvocation;
qi::rule<Iterator, myAST::Block(), Skipper> rBlock;
// implicit lexemes
qi::rule<Iterator, myAST::Identifier()> rIdentifier;
qi::rule<Iterator, myAST::Expression()> rExpression;
};
}
int main() {
std::string const input = R"(
{
foo()
bar(a, b, 42)
qux(someThing_awful01 = 9)
}
)";
auto f = input.data(), l = f + input.size();
myAST::Block block;
bool ok = parse(f, l, myNS::myRules<>{}, block);
if (ok) {
std::cout << "Parse success\n";
for (auto& invocation : block) {
std::cout << invocation.identifier << "(";
for (auto& arg : invocation.args) std::cout << arg << ",";
std::cout << ")\n";
}
}
else
std::cout << "Parse failed\n";
if (f!=l)
std::cout << "Remaining unparsed input: '" << std::string(f,l) << "'\n";
}
Prints output
Parse success
foo()
bar(a,b,42,)
qux(someThing_awful01=9,)
Remaining unparsed input: '
'
I want to find out whether I can detect function call using regex. The basic case is easy: somefunction(1, 2);
But what if I had code:
somefunction(someotherfunction(), someotherotherfunction());
or
somefunction(function () { return 1; }, function() {return 2;});
or
caller_function(somefunction(function () { return 1; }, function() {return 2;}))
In this case I need to match same number of opening braces and closing braces so that I can find end of call to somefunction
Is it possible?
Thanks in advance.
Your question is misleading. It's not as simple as you think.
First, the grammar isn't regular. Regular expressions are not the right tool.
Second, you ask "detecting function call" but the samples show anonymous function definitions, a totally different ball game.
Here's a start using Boost Spirit:
start = skip(space) [ fcall ];
fcall = ident >> '(' >> -args >> ')';
args = arg % ',';
arg = fdef | fcall;
fdef = lexeme["function"] >> '(' >> -formals >> ')' >> body;
formals = ident % ',';
identch = alpha | char_("_");
ident = identch >> *(identch|digit);
body = '{' >> *~char_('}') >> '}';
Which would map onto an AST like:
struct function_definition {
std::vector<std::string> formal_arguments;
std::string body;
};
struct function_call;
using argument = boost::variant<
function_definition,
boost::recursive_wrapper<function_call>
>;
struct function_call {
std::string name;
std::vector<argument> args;
};
DEMO
Live On Coliru
// #define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/io.hpp>
#include <boost/fusion/adapted/struct.hpp>
struct function_definition {
std::vector<std::string> formal_arguments;
std::string body;
};
struct function_call;
using argument = boost::variant<
function_definition,
boost::recursive_wrapper<function_call>
>;
struct function_call {
std::string name;
std::vector<argument> args;
};
BOOST_FUSION_ADAPT_STRUCT(function_call, name, args)
BOOST_FUSION_ADAPT_STRUCT(function_definition, formal_arguments, body)
namespace qi = boost::spirit::qi;
template <typename It>
struct Parser : qi::grammar<It, function_call()> {
Parser() : Parser::base_type(start) {
using namespace qi;
start = skip(space) [ fcall ];
fcall = ident >> '(' >> -args >> ')';
args = arg % ',';
arg = fdef | fcall;
fdef = lexeme["function"] >> '(' >> -formals >> ')' >> body;
formals = ident % ',';
identch = alpha | char_("_");
ident = identch >> *(identch|digit);
body = '{' >> *~char_('}') >> '}';
BOOST_SPIRIT_DEBUG_NODES((start)(fcall)(args)(arg)(fdef)(formals)(ident)(body))
}
private:
using Skipper = qi::space_type;
qi::rule<It, function_call()> start;
qi::rule<It, function_call(), Skipper> fcall;
qi::rule<It, argument(), Skipper> arg;
qi::rule<It, std::vector<argument>(), Skipper> args;
qi::rule<It, function_definition(), Skipper> fdef;
qi::rule<It, std::vector<std::string>(), Skipper> formals;
qi::rule<It, char()> identch;
qi::rule<It, std::string()> ident, body;
};
// for debug:
#include <experimental/iterator>
static inline std::ostream& operator<<(std::ostream& os, function_definition const& v) {
os << "function(";
std::copy(v.formal_arguments.begin(), v.formal_arguments.end(), std::experimental::make_ostream_joiner(os, ", "));
return os << ") {" << v.body << "}";
}
static inline std::ostream& operator<<(std::ostream& os, function_call const& v) {
os << v.name << "(";
std::copy(v.args.begin(), v.args.end(), std::experimental::make_ostream_joiner(os, ", "));
return os << ")";
}
int main() {
std::string const input = "caller_function(somefunction(function () { return 1; }, function() {return 2;}))";
using It = std::string::const_iterator;
Parser<It> const p;
It f = input.begin(), l = input.end();
function_call parsed;
bool ok = parse(f, l, p, parsed);
if (ok) {
std::cout << "Parsed ok: " << parsed << "\n";
} else {
std::cout << "Parse failed\n";
}
if (f!=l)
std::cout << "Remaining unparsed input: '" << std::string(f,l) << "'\n";
}
Prints
Parsed ok: caller_function(somefunction(function() { return 1; }, function() {return 2;}))
I was attempting to replicate this example in order to implement C++ like operator precedence rules (I started with a subset, but I eventually plan to add the others).
Try as I might, I could not get the grammar to parse a single binary operation. It would parse literals (44, 3.42, "stackoverflow") just fine, but would fail anything like 3 + 4.
I did look at this question, and this one in an attempt to get my solution to work, but got the same result.
(In an attempt to keep things short, I'll post only the relevant bits here, the full code is here)
Relevant data structures for the AST:
enum class BinaryOperator
{
ADD, SUBTRACT, MULTIPLY, DIVIDE, MODULO, LEFT_SHIFT, RIGHT_SHIFT, EQUAL, NOT_EQUAL, LOWER, LOWER_EQUAL, GREATER, GREATER_EQUAL,
};
typedef boost::variant<double, int, std::string> Litteral;
struct Identifier { std::string name; };
typedef boost::variant<
Litteral,
Identifier,
boost::recursive_wrapper<UnaryOperation>,
boost::recursive_wrapper<BinaryOperation>,
boost::recursive_wrapper<FunctionCall>
> Expression;
struct BinaryOperation
{
Expression rhs, lhs;
BinaryOperator op;
BinaryOperation() {}
BinaryOperation(Expression rhs, BinaryOperator op, Expression lhs) : rhs(rhs), op(op), lhs(lhs) {}
};
The grammar:
template<typename Iterator, typename Skipper>
struct BoltGrammar : qi::grammar<Iterator, Skipper, Program()>
{
BoltGrammar() : BoltGrammar::base_type(start, "start")
{
equalOp.add("==", BinaryOperator::EQUAL)("!=", BinaryOperator::NOT_EQUAL);
equal %= (lowerGreater >> equalOp >> lowerGreater);
equal.name("equal");
lowerGreaterOp.add("<", BinaryOperator::LOWER)("<=", BinaryOperator::LOWER_EQUAL)(">", BinaryOperator::GREATER)(">=", BinaryOperator::GREATER_EQUAL);
lowerGreater %= (shift >> lowerGreaterOp >> shift);
lowerGreater.name("lower or greater");
shiftOp.add("<<", BinaryOperator::LEFT_SHIFT)(">>", BinaryOperator::RIGHT_SHIFT);
shift %= (addSub >> shiftOp >> addSub);
shift.name("shift");
addSubOp.add("+", BinaryOperator::ADD)("-", BinaryOperator::SUBTRACT);
addSub %= (multDivMod >> addSubOp >> multDivMod);
addSub.name("add or sub");
multDivModOp.add("*", BinaryOperator::MULTIPLY)("/", BinaryOperator::DIVIDE)("%", BinaryOperator::MODULO);
multDivMod %= (value >> multDivModOp >> value);
multDivMod.name("mult, div, or mod");
value %= identifier | litteral | ('(' > expression > ')');
value.name("value");
start %= qi::eps >> *(value >> qi::lit(';'));
start.name("start");
expression %= identifier | litteral | equal;
expression.name("expression");
identifier %= qi::lexeme[ascii::char_("a-zA-Z") >> *ascii::char_("0-9a-zA-Z")];
identifier.name("identifier");
litteral %= qi::double_ | qi::int_ | quotedString;
litteral.name("litteral");
quotedString %= qi::lexeme['"' >> +(ascii::char_ - '"') >> '"'];
quotedString.name("quoted string");
namespace phx = boost::phoenix;
using namespace qi::labels;
qi::on_error<qi::fail>(start, std::cout << phx::val("Error! Expecting: ") << _4 << phx::val(" here: \"") << phx::construct<std::string>(_3, _2) << phx::val("\"") << std::endl);
}
qi::symbols<char, BinaryOperator> equalOp, lowerGreaterOp, shiftOp, addSubOp, multDivModOp;
qi::rule<Iterator, Skipper, BinaryOperation()> equal, lowerGreater, shift, addSub, multDivMod;
qi::rule<Iterator, Skipper, Expression()> value;
qi::rule<Iterator, Skipper, Program()> start;
qi::rule<Iterator, Skipper, Expression()> expression;
qi::rule<Iterator, Skipper, Identifier()> identifier;
qi::rule<Iterator, Skipper, Litteral()> litteral;
qi::rule<Iterator, Skipper, std::string()> quotedString;
};
The main problem (indeed) appears to be addressed in that second answer you linked to.
Let me address some points:
the main problem was was compound:
your start rule is
start %= qi::eps >> *(value >> qi::lit(';'));
this means it expects values:
value %= identifier | literal | ('(' > expression > ')');
however, since this parses only identifiers and literals or parenthesized subexpressions, the 3+4 binary operation will never be parsed.
your expression rule, again allows identifier or literal first (redundant/confusing):
expression %= identifier | literal | equal;
I think you'd want something more like
expression = '(' >> expression >> ')' | equal | value;
value = identifier | literal;
// and then
start = qi::eps >> -expression % ';';
your BinaryOperation productions allow only for the case where the operator is present; this breaks the way the rules are nested for operator precedence: a multDivOp would never be accepted as match, unless it happens to be followed by an addSubOp:
addSub %= (multDivMod >> addSubOp >> multDivMod);
multDivMod %= (value >> multDivModOp >> value);
This can best be fixed as shown in the linked answer:
addSub = multDivMod >> -(addSubOp >> multDivMod);
multDivMod = value >> -(multDivModOp >> value);
where you can use semantic actions to build the AST nodes "dynamically":
addSub = multDivMod >> -(addSubOp >> multDivMod) [ _val = phx::construct<BinaryOperation>(_val, _1, _2) ];
multDivMod = value >> -(multDivModOp >> value) [ _val = phx::construct<BinaryOperation>(_val, _1, _2) ];
This beats the 'tedious" declarative approach hands-down (which leads to a lot of backtracking, see e.g. Boost spirit poor performance with Alternative parser)
the literal rule will parse an integer as a double, because it isn't strict:
literal %= qi::double_ | qi::int_ | quotedString;
you can fix this like:
qi::real_parser<double, qi::strict_real_policies<double> > strict_double;
literal = quotedString | strict_double | qi::int_;
FunctionCall should adapt functionName as an Identifier (not std::string)
BOOST_FUSION_ADAPT_STRUCT(FunctionCall, (Identifier, functionName)(std::vector<Expression>, args))
You Expression operator<< could (should) be a boost::static_visitor so that you
eliminate magic type switch numbers
get compiler checking of completeness of the switch
can leverage overload resolution to switch on variant member types
Using c++11, the code could still be inside the one function:
std::ostream& operator<<(std::ostream& os, const Expression& expr)
{
os << "Expression ";
struct v : boost::static_visitor<> {
v(std::ostream& os) : os(os) {}
std::ostream& os;
void operator()(Literal const& e) const { os << "(literal: " << e << ")"; }
void operator()(Identifier const& e) const { os << "(identifier: " << e.name << ")"; }
void operator()(UnaryOperation const& e) const { os << "(unary op: " << boost::fusion::as_vector(e) << ")"; }
void operator()(BinaryOperation const& e) const { os << "(binary op: " << boost::fusion::as_vector(e) << ")"; }
void operator()(FunctionCall const& e) const {
os << "(function call: " << e.functionName << "(";
if (e.args.size() > 0) os << e.args.front();
for (auto it = e.args.begin() + 1; it != e.args.end(); it++) { os << ", " << *it; }
os << ")";
}
};
boost::apply_visitor(v(os), expr);
return os;
}
you can use the BOOST_SPIRIT_DEBUG_NODES macro to name your rules
BOOST_SPIRIT_DEBUG_NODES(
(start)(expression)(identifier)(literal)(quotedString)
(equal)(lowerGreater)(shift)(addSub)(multDivMod)(value)
)
you should include from the spirit/include/ directory, which then relays to spirit/home/ or phoenix/include/ instead of including them directly.
Here is a fully working sample, that also improved the grammar rules for readability Live On Coliru:
//#define BOOST_SPIRIT_DEBUG
#define BOOST_SPIRIT_USE_PHOENIX_V3
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/variant.hpp>
#include <iostream>
#include <string>
#include <vector>
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;
namespace ascii = boost::spirit::ascii;
enum class UnaryOperator
{
NOT,
PLUS,
MINUS,
};
std::ostream& operator<<(std::ostream& os, const UnaryOperator op)
{
switch (op)
{
case UnaryOperator::NOT: return os << "!";
case UnaryOperator::PLUS: return os << "+";
case UnaryOperator::MINUS: return os << "-";
}
assert(false);
}
enum class BinaryOperator
{
ADD, SUBTRACT, MULTIPLY, DIVIDE,
MODULO,
LEFT_SHIFT, RIGHT_SHIFT,
EQUAL, NOT_EQUAL,
LOWER, LOWER_EQUAL,
GREATER, GREATER_EQUAL,
};
std::ostream& operator<<(std::ostream& os, const BinaryOperator op)
{
switch (op)
{
case BinaryOperator::ADD: return os << "+";
case BinaryOperator::SUBTRACT: return os << "-";
case BinaryOperator::MULTIPLY: return os << "*";
case BinaryOperator::DIVIDE: return os << "/";
case BinaryOperator::MODULO: return os << "%";
case BinaryOperator::LEFT_SHIFT: return os << "<<";
case BinaryOperator::RIGHT_SHIFT: return os << ">>";
case BinaryOperator::EQUAL: return os << "==";
case BinaryOperator::NOT_EQUAL: return os << "!=";
case BinaryOperator::LOWER: return os << "<";
case BinaryOperator::LOWER_EQUAL: return os << "<=";
case BinaryOperator::GREATER: return os << ">";
case BinaryOperator::GREATER_EQUAL: return os << ">=";
}
assert(false);
}
typedef boost::variant<
double,
int,
std::string
> Literal;
struct Identifier
{
std::string name;
};
BOOST_FUSION_ADAPT_STRUCT(Identifier, (std::string, name))
struct UnaryOperation;
struct BinaryOperation;
struct FunctionCall;
typedef boost::variant<
Literal,
Identifier,
boost::recursive_wrapper<UnaryOperation>,
boost::recursive_wrapper<BinaryOperation>,
boost::recursive_wrapper<FunctionCall>
> Expression;
struct UnaryOperation
{
Expression rhs;
UnaryOperator op;
};
BOOST_FUSION_ADAPT_STRUCT(UnaryOperation, (Expression,rhs)(UnaryOperator,op))
struct BinaryOperation
{
Expression rhs;
BinaryOperator op;
Expression lhs;
BinaryOperation() {}
BinaryOperation(Expression rhs, BinaryOperator op, Expression lhs) : rhs(rhs), op(op), lhs(lhs) {}
};
BOOST_FUSION_ADAPT_STRUCT(BinaryOperation, (Expression,rhs)(BinaryOperator,op)(Expression,lhs))
struct FunctionCall
{
Identifier functionName;
std::vector<Expression> args;
};
BOOST_FUSION_ADAPT_STRUCT(FunctionCall, (Identifier, functionName)(std::vector<Expression>, args))
struct Program
{
std::vector<Expression> statements;
};
BOOST_FUSION_ADAPT_STRUCT(Program, (std::vector<Expression>, statements))
std::ostream& operator<<(std::ostream& os, const Expression& expr)
{
os << "Expression ";
struct v : boost::static_visitor<> {
v(std::ostream& os) : os(os) {}
std::ostream& os;
void operator()(Literal const& e) const { os << "(literal: " << e << ")"; }
void operator()(Identifier const& e) const { os << "(identifier: " << e.name << ")"; }
void operator()(UnaryOperation const& e) const { os << "(unary op: " << boost::fusion::as_vector(e) << ")"; }
void operator()(BinaryOperation const& e) const { os << "(binary op: " << boost::fusion::as_vector(e) << ")"; }
void operator()(FunctionCall const& e) const {
os << "(function call: " << e.functionName << "(";
if (e.args.size() > 0) os << e.args.front();
for (auto it = e.args.begin() + 1; it != e.args.end(); it++) { os << ", " << *it; }
os << ")";
}
};
boost::apply_visitor(v(os), expr);
return os;
}
std::ostream& operator<<(std::ostream& os, const Program& prog)
{
os << "Program" << std::endl << "{" << std::endl;
for (const Expression& expr : prog.statements)
{
std::cout << "\t" << expr << std::endl;
}
os << "}" << std::endl;
return os;
}
template<typename Iterator, typename Skipper>
struct BoltGrammar : qi::grammar<Iterator, Skipper, Program()>
{
BoltGrammar() : BoltGrammar::base_type(start, "start")
{
using namespace qi::labels;
equalOp.add
("==", BinaryOperator::EQUAL)
("!=", BinaryOperator::NOT_EQUAL);
lowerGreaterOp.add
("<", BinaryOperator::LOWER)
("<=", BinaryOperator::LOWER_EQUAL)
(">", BinaryOperator::GREATER)
(">=", BinaryOperator::GREATER_EQUAL);
shiftOp.add
("<<", BinaryOperator::LEFT_SHIFT)
(">>", BinaryOperator::RIGHT_SHIFT);
addSubOp.add
("+", BinaryOperator::ADD)
("-", BinaryOperator::SUBTRACT);
multDivModOp.add
("*", BinaryOperator::MULTIPLY)
("/", BinaryOperator::DIVIDE)
("%", BinaryOperator::MODULO);
equal = lowerGreater [ _val=_1 ] >> -(equalOp >> lowerGreater) [ _val = phx::construct<BinaryOperation>(_val, _1, _2) ];
lowerGreater = shift [ _val=_1 ] >> -(lowerGreaterOp >> shift) [ _val = phx::construct<BinaryOperation>(_val, _1, _2) ];
shift = addSub [ _val=_1 ] >> -(shiftOp >> addSub) [ _val = phx::construct<BinaryOperation>(_val, _1, _2) ];
addSub = multDivMod [ _val=_1 ] >> -(addSubOp >> multDivMod) [ _val = phx::construct<BinaryOperation>(_val, _1, _2) ];
multDivMod = value [ _val=_1 ] >> -(multDivModOp >> value) [ _val = phx::construct<BinaryOperation>(_val, _1, _2) ];
start = qi::eps >> -expression % ';';
expression = '(' >> expression >> ')' | equal | value;
value = identifier | literal;
identifier = qi::lexeme[ascii::char_("a-zA-Z") >> *ascii::char_("0-9a-zA-Z")];
qi::real_parser<double, qi::strict_real_policies<double> > strict_double;
literal = quotedString | strict_double | qi::int_;
quotedString = qi::lexeme['"' >> +(ascii::char_ - '"') >> '"'];
qi::on_error<qi::fail>(start, std::cout << phx::val("Error! Expecting: ") << _4 << phx::val(" here: \"") << phx::construct<std::string>(_3, _2) << phx::val("\"") << std::endl);
BOOST_SPIRIT_DEBUG_NODES((start)(expression)(identifier)(literal)(quotedString)
(equal)(lowerGreater)(shift)(addSub)(multDivMod)(value)
)
}
qi::symbols<char, BinaryOperator> equalOp, lowerGreaterOp, shiftOp, addSubOp, multDivModOp;
qi::rule<Iterator, Skipper, Expression()> equal, lowerGreater, shift, addSub, multDivMod;
qi::rule<Iterator, Skipper, Expression()> value;
qi::rule<Iterator, Skipper, Program()> start;
qi::rule<Iterator, Skipper, Expression()> expression;
qi::rule<Iterator, Skipper, Identifier()> identifier;
qi::rule<Iterator, Skipper, Literal()> literal;
qi::rule<Iterator, Skipper, std::string()> quotedString;
};
typedef std::string::iterator Iterator;
typedef boost::spirit::ascii::space_type Skipper;
int main()
{
BoltGrammar<Iterator, Skipper> grammar;
std::string str("3; 4.2; \"lounge <c++>\"; 3 + 4;");
Program prog;
Iterator iter = str.begin(), last = str.end();
bool r = phrase_parse(iter, last, grammar, ascii::space, prog);
if (r && iter == last)
{
std::cout << "Parsing succeeded: " << prog << std::endl;
}
else
{
std::cout << "Parsing failed, remaining: " << std::string(iter, last) << std::endl;
}
return 0;
}
Prints:
Parsing succeeded: Program
{
Expression (literal: 3)
Expression (literal: 4.2)
Expression (literal: lounge <c++>)
Expression (binary op: (Expression (literal: 3) + Expression (literal: 4)))
}