n-ary boolean grammar conversion from infix to prefix with Boost::Spirit? - c++

I need to convert infix notations like the one below to n-ary prefix notation with Boost::Spirit, but I am failing at building on the answers from https://stackoverflow.com/a/8707598/1816477 et al.
This is what I am trying to parse:
not (xyz='a' or xyz='b' or xyz='c') and abc='s' xor (pqr ='v' and xyz='d')
and this LISP-styled format is what I am trying to provide as output (do not mind the indentation):
(xor (and (= pqr 'v') (= xyz 'd'))
(and (= abc 's')
(not (or (= xyz 'a')
(= xyz 'b')
(= xyz 'c')))))
So, the terms I try to parse consist of prefixed (not <expression>) and infix expressions (<expression> and <expression> and ... etc.), i.e.: assignments, negations and n-ary ands, ors, xors etc., implying operator precedence (or < xor < and < assignment < negation).
What I am failing at is getting the grammar right. Outputting to a suitable boost::variant representing the parsed boolean expression I think I am able to accomplish. I am thinking of an output structure like this one:
struct prefixExpr;
struct infixExpr;
typedef boost::variant<
std::string, // identifiers, values etc.
boost::recursive_wrapper<prefixExpr>, // e.g. negation
boost::recursive_wrapper<infixExpr> // assignment, and, or, xor etc.
> expression;
struct prefixExpr {
std::string op; // currently only "not"
expression expr;
};
BOOST_FUSION_ADAPT_STRUCT(prefixExpr, op, expr)
struct infixExpr {
std::string op; // "and", "or", "xor", "="
std::vector<expression> exprs;
};
BOOST_FUSION_ADAPT_STRUCT(infixExpr, op, exprs)
What do I need to do to be able to parse expressions like the one mentioned above and convert them to a prefix notation?
I am using the boost 1.67.0 (the latest at the time of writing) and Visual Studio 15.7.3 (also the latest at the time of writing).

The code is not perfect but should be simple to understand:
#include <boost/variant.hpp>
#include <boost/spirit/home/x3.hpp>
#include <vector>
#include <string>
#include <iostream>
struct id : std::string {};
struct value : std::string {};
struct nary_expr;
using expr = boost::variant<
id, value,
boost::recursive_wrapper<nary_expr>
>;
struct nary_expr
{
std::string op;
std::vector<expr> exprs;
};
namespace x3 = boost::spirit::x3;
auto compose_nary_expr = [](auto& ctx)
{
//auto&& [left, tail] = x3::_attr(ctx);
auto&& left = boost::fusion::at_c<0>(x3::_attr(ctx));
auto&& tail = boost::fusion::at_c<1>(x3::_attr(ctx));
if (tail.size() == 0) {
x3::_val(ctx) = left;
return;
}
// left associativity
auto op = boost::fusion::at_c<0>(tail[0]);
std::vector<expr> exprs = { left, boost::fusion::at_c<1>(tail[0]) };
for (std::size_t i = 1; i < tail.size(); ++i) {
// same priority but different operator
auto&& next_op = boost::fusion::at_c<0>(tail[i]);
if (op != next_op) {
exprs = std::vector<expr>{ nary_expr{ op, std::move(exprs) } };
op = next_op;
}
exprs.push_back(boost::fusion::at_c<1>(tail[i]));
}
x3::_val(ctx) = nary_expr{ op, std::move(exprs) };
};
x3::rule<class prec4_expr_rule, expr> const prec4_expr("prec4_expr");
x3::rule<class prec3_expr_rule, expr> const prec3_expr("prec3_expr");
x3::rule<class prec2_expr_rule, expr> const prec2_expr("prec2_expr");
x3::rule<class prec1_expr_rule, expr> const prec1_expr("prec1_expr");
x3::rule<class prec0_expr_rule, expr> const prec0_expr("prec0_expr");
auto const prec4_expr_def = prec4_expr = (
prec3_expr
>> *( (x3::string("or") > prec3_expr)
)
)[compose_nary_expr];
auto const prec3_expr_def = prec3_expr = (
prec2_expr
>> *( (x3::string("xor") > prec2_expr)
)
)[compose_nary_expr];
auto const prec2_expr_def = prec2_expr = (
prec1_expr
>> *( (x3::string("and") > prec1_expr)
)
)[compose_nary_expr];
auto compose_binary_expr = [](auto& ctx)
{
auto&& rhs = boost::fusion::at_c<0>(x3::_attr(ctx));
auto&& tail = boost::fusion::at_c<1>(x3::_attr(ctx));
if (tail.size() > 0) {
auto&& op = boost::fusion::at_c<0>(tail[0]);
auto&& lhs = boost::fusion::at_c<1>(tail[0]);
x3::_val(ctx) = nary_expr{ op, { rhs, lhs } };
}
else {
x3::_val(ctx) = rhs;
}
};
// should use optional, but something wrong with spirit
auto const prec1_expr_def = prec1_expr = (
prec0_expr >> *(x3::string("=") > prec0_expr)
)[compose_binary_expr];
x3::rule<class not_expr_rule, expr> const not_expr("not_expr");
auto compose_unary_expr = [](auto& ctx)
{
//auto&& [op, expr] = x3::_attr(ctx);
auto&& op = boost::fusion::at_c<0>(x3::_attr(ctx));
auto&& expr = boost::fusion::at_c<1>(x3::_attr(ctx));
x3::_val(ctx) = nary_expr{ op, { expr } };
};
auto const not_expr_def = not_expr = (x3::string("not") > prec0_expr)[compose_unary_expr];
auto const id_term = x3::rule<class id_r, id>{} = x3::lexeme[x3::alpha >> *x3::alnum];
auto const value_term = x3::rule<class value_r, value>{} = x3::lexeme["'" > +~x3::char_('\'') >> "'"];
auto const prec0_expr_def =
value_term
| ( '(' > prec4_expr >> ')' )
| not_expr
| id_term
;
BOOST_SPIRIT_DEFINE(
prec0_expr
, prec1_expr
, prec2_expr
, prec3_expr
, prec4_expr
, not_expr
);
struct indent
{
std::size_t cur;
};
indent operator+(indent lhs, std::size_t rhs)
{
return { lhs.cur + rhs };
}
std::ostream& operator<<(std::ostream& os, indent const& v)
{
for (unsigned i = 0; i < v.cur; ++i) os << ' ';
return os;
}
struct is_simple
{
template <typename T>
bool operator()(T const&) const
{
return std::is_same<T, id>::value || std::is_same<T, value>::value;
}
};
struct printer
{
indent indent_;
void operator()(id const& v)
{
std::cout << v;
}
void operator()(value const& v)
{
std::cout << '\'' << v << '\'';
}
void operator()(nary_expr const& v)
{
std::cout << '(' << v.op << ' ';
printer p{ indent_ + 2 + v.op.size() };
boost::apply_visitor(p, v.exprs[0]);
for (std::size_t i = 1; i < v.exprs.size(); ++i) {
if (boost::apply_visitor(is_simple{}, v.exprs[i])) {
std::cout << ' ';
}
else {
std::cout << '\n' << p.indent_;
}
boost::apply_visitor(p, v.exprs[i]);
}
std::cout << ')';
}
};
int main()
{
std::string s = "not (xyz='a' or xyz='b' or xyz='c') and abc='s' xor (pqr ='v' and xyz='d')";
expr expr;
auto iter = s.cbegin();
if (phrase_parse(iter, s.cend(), prec4_expr_def, x3::space, expr) && iter == s.cend()) {
boost::apply_visitor(printer{}, expr);
}
return 0;
}
It prints:
(xor (and (not (or (= xyz 'a')
(= xyz 'b')
(= xyz 'c')))
(= abc 's'))
(and (= pqr 'v')
(= xyz 'd')))

Related

Parsing a string to create a geometry

What is the algorithm for developing a string parser to create a geometry? The geometry is generated in 2 steps: at the first step, we create primitives; at the second, we combine primitives into objects.
The syntax is presented in the string below.
string str="[GEOMETRY]
PRIMITIVE1=SPHERE(RADIUS=5.5);
PRIMITIVE2=BOX(A=-5.2, B=7.3);
//...
OBJECT1=PRIMITIVE2*(-PRIMITIVE1);
//..."
class PRIMITIVE{
int number;
public:
Primitive& operator+ (Primitive& primitive) {}; //overloading arithmetic operations
Primitive& operator* (Primitive& primitive) {};
Primitive& operator- (Primitive& primitive) {};
virtual bool check_in_point_inside_primitive = 0;
};
class SPHERE:public PRIMITIVE{
double m_radius;
public:
SPHERE(double radius): m_radius(radius) {}; //In which part of the parser to create objects?
bool check_in_point_inside_sphere(Point& point){};
};
class BOX:public PRIMITIVE{
double m_A;
double m_B;
public:
BOX(double A, double B): m_A(A), m_B(B) {};
bool check_in_point_inside_box(Point& point){};
};
class OBJECT{
int number;
PRIMITIVE& primitive;
public:
OBJECT(){};
bool check_in_point_inside_object(Primitive& PRIMITIVE1, Primitive& PRIMITIVE2, Point& point){
//>How to construct a function from an expression 'PRIMITIVE2*(-PRIMITIVE1)' when parsing?
}
};
How to analyze the string PRIMITIVE1=SPHERE(RADIUS=5.5) and pass a parameter to the constructor of SPHERE()? How to identify this object with the name PRIMITIVE 1 to call to it in OBJECT? Is it possible to create a pair<PRIMITIVE1,SPHERE(5.5)> and store all primitives in map?
How to parse the string of the OBJECT1 and to construct a function from an expression PRIMITIVE2*(-PRIMITIVE1) inside an OBJECT1? This expression will be required multiple times when determining the position of each point relative to the object.
How to use boost::spirit for this task? Tokenize a string using boost::spirit::lex, and then develop rules using boost::spirit::qi?
As a finger exercise, and despite the serious problems I see with the chosen virtual type hierarchy, let's try to make a value-oriented container of Primitives that can be indexed by their id (ById):
Live On Coliru
#include <boost/intrusive/set.hpp>
#include <boost/poly_collection/base_collection.hpp>
#include <iostream>
namespace bi = boost::intrusive;
struct Point {
};
using IndexHook = bi::set_member_hook<bi::link_mode<bi::auto_unlink>>;
class Primitive {
int _id;
public:
struct ById {
bool operator()(auto const&... oper) const { return std::less<>{}(access(oper)...); }
private:
static int access(int id) { return id; }
static int access(Primitive const& p) { return p._id; }
};
IndexHook _index;
Primitive(int id) : _id(id) {}
virtual ~Primitive() = default;
int id() const { return _id; }
Primitive& operator+= (Primitive const& primitive) { return *this; } //overloading arithmetic operations
Primitive& operator*= (Primitive const& primitive) { return *this; }
Primitive& operator-= (Primitive const& primitive) { return *this; }
virtual bool check_in_point_inside(Point const&) const = 0;
};
using Index =
bi::set<Primitive, bi::constant_time_size<false>,
bi::compare<Primitive::ById>,
bi::member_hook<Primitive, IndexHook, &Primitive::_index>>;
class Sphere : public Primitive {
double _radius;
public:
Sphere(int id, double radius)
: Primitive(id)
, _radius(radius) {} // In which part of the parser to create objects?
bool check_in_point_inside(Point const& point) const override { return false; }
};
class Box : public Primitive {
double _A;
double _B;
public:
Box(int id, double A, double B) : Primitive(id), _A(A), _B(B) {}
bool check_in_point_inside(Point const& point) const override { return false; }
};
class Object{
int _id;
Primitive& _primitive;
public:
Object(int id, Primitive& p) : _id(id), _primitive(p) {}
bool check_in_point_inside_object(Primitive const& p1, Primitive const& p2,
Point const& point) const
{
//>How to construct a function from an expression
//'PRIMITIVE2*(-PRIMITIVE1)' when parsing?
return false;
}
};
using Primitives = boost::poly_collection::base_collection<Primitive>;
int main() {
Primitives test;
test.insert(Sphere{2, 4.0});
test.insert(Sphere{4, 4.0});
test.insert(Box{2, 5, 6});
test.insert(Sphere{1, 4.0});
test.insert(Box{3, 5, 6});
Index idx;
for (auto& p : test)
if (not idx.insert(p).second)
std::cout << "Duplicate id " << p.id() << " not indexed\n";
for (auto& p : idx)
std::cout << typeid(p).name() << " " << p.id() << "\n";
std::cout << "---\n";
for (auto& p : test)
std::cout << typeid(p).name() << " " << p.id() << "\n";
}
Prints
Duplicate id 2 not indexed
6Sphere 1
3Box 2
3Box 3
6Sphere 4
---
3Box 2
3Box 3
6Sphere 2
6Sphere 4
6Sphere 1
So far so good. This is an important building block to prevent all manner of pain when dealing with virtual types in Spirit grammars¹
PS: I've since dropped the idea of intrusive_set. It doesn't work because the base_container moves items around on reallocation, and that unlinks the items from their intrusive set.
Instead, see below for an approach that doesn't try to resolve ids during the parse.
Parsing primitives
We get the ID from the PRIMITIVE1. We could store it somewhere before naturally parsing the primitives themselves, then set the id on it on commit.
Let's start with defining a State object for the parser:
struct State {
Ast::Id next_id;
Ast::Primitives primitives;
Ast::Objects objects;
template <typename... T> void commit(boost::variant<T...>& val) {
boost::apply_visitor([this](auto& obj) { commit(obj); }, val);
}
template <typename T> void commit(T& primitiveOrExpr) {
auto id = std::exchange(next_id, 0);
if constexpr (std::is_base_of_v<Ast::Primitive, T>) {
primitiveOrExpr.id = id;
primitives.insert(std::move(primitiveOrExpr));
} else {
objects.push_back(Ast::Object{id, std::move(primitiveOrExpr)});
}
}
};
As you can see, we just have a place to store the primitives, objects. And then there is the temporary storage for our next_id while we're still parsing the next entity.
The commit function helps sorting the products of the parser rules. As it happens, they can be variant, which is why we have the apply_visitor dispatch for commit on a variant.
Again, as the footnote¹ explains, Spirit's natural attribute synthesis favors static polymorphism.
The semantic actions we need are now:
static inline auto& state(auto& ctx) { return get<State>(ctx); }
auto draft = [](auto& ctx) { state(ctx).next_id = _attr(ctx); };
auto commit = [](auto& ctx) { state(ctx).commit(_attr(ctx)); };
Now let's jump ahead to the primitives:
auto sphere = as<Ast::Sphere>(eps >> "sphere" >>'(' >> param("radius") >> ')');
auto box = as<Ast::Box>(eps >> "box" >> '(' >> param('a') >> ',' >> param('b') >> ')');
auto primitive =
("primitive" >> uint_[draft] >> '=' >> (sphere | box)[commit]) > ';';
That's still cheating a little, as I've used the param helper to reduce typing:
auto number = as<Ast::Number>(double_, "number");
auto param(auto name, auto p) { return eps >> omit[name] >> '=' >> p; }
auto param(auto name) { return param(name, number); }
As you can see I've already assumed most parameters will have numerical nature.
What Are Objects Really?
Looking at it for a while, I concluded that really an Object is defined as an id number (OBJECT1, OBJECT2...) which is tied to an expression. The expression can reference primitives and have some unary and binary operators.
Let's sketch an AST for that:
using Number = double;
struct RefPrimitive { Id id; };
struct Binary;
struct Unary;
using Expr = boost::variant< //
Number, //
RefPrimitive, //
boost::recursive_wrapper<Unary>, //
boost::recursive_wrapper<Binary> //
>;
struct Unary { char op; Expr oper; };
struct Binary { Expr lhs; char op; Expr rhs; };
struct Object { Id id; Expr expr; };
Now To Parse Into That Expression AST
It's really 1:1 rules for each Ast node type. E.g.:
auto ref_prim = as<Ast::RefPrimitive>(lexeme["primitive" >> uint_]);
Now many of the expression rules can recurse, so we need declared rules with definitions via BOOST_SPIRIT_DEFINE:
// object expression grammar
rule<struct simple_tag, Ast::Expr> simple{"simple"};
rule<struct unary_tag, Ast::Unary> unary{"unary"};
rule<struct expr_tag, Ast::Expr> expr{"expr"};
rule<struct term_tag, Ast::Expr> term{"term"};
rule<struct factor_tag, Ast::Expr> factor{"factor"};
As you can tell, some of these are not 1:1 with the Ast nodes, mainly because of the recursion and the difference in operator precedence (term vs factor vs. simple). It's easier to see with the rule definition:
auto unary_def = char_("-+") >> simple;
auto simple_def = ref_prim | unary | '(' >> expr >> ")";
auto factor_def = simple;
auto term_def = factor[assign] >> *(char_("*/") >> term)[make_binary];
auto expr_def = term[assign] >> *(char_("-+") >> expr)[make_binary];
Because none of the rules actually expose a Binary, automatic attribute propagation is not convenient there². Instead, we use assign and make_binary semantic actions:
auto assign = [](auto& ctx) { _val(ctx) = _attr(ctx); };
auto make_binary = [](auto& ctx) {
using boost::fusion::at_c;
auto& attr = _attr(ctx);
auto op = at_c<0>(attr);
auto& rhs = at_c<1>(attr);
_val(ctx) = Ast::Binary { _val(ctx), op, rhs };
};
Finally, let's tie the defintions to the declared rules (using their tag types):
BOOST_SPIRIT_DEFINE(simple, unary, expr, term, factor)
All we need is a similar line to primitive:
auto object =
("object" >> uint_[draft] >> '=' >> (expr)[commit]) > ';';
And we can finish up by defining each line as a primitive|object:
auto line = primitive | object;
auto file = no_case[skip(ws_comment)[*eol >> "[geometry]" >> (-line % eol) >> eoi]];
At the top level we expect the [GEOMETRY] header, specify that we want to be case insensitive and ... that ws_comment is to be skipped³:
auto ws_comment = +(blank | lexeme["//" >> *(char_ - eol) >> eol]);
This allows us to ignore the // comments as well.
Live Demo Time
Live On Compiler Explorer
//#define BOOST_SPIRIT_X3_DEBUG
#include <boost/fusion/adapted.hpp>
#include <boost/poly_collection/base_collection.hpp>
#include <boost/spirit/home/x3.hpp>
#include <iostream>
#include <list>
#include <map>
namespace x3 = boost::spirit::x3;
namespace Ast {
using Id = uint32_t;
struct Point { }; // ?? where does this belong?
struct Primitive {
Id id;
virtual ~Primitive() = default;
};
struct Sphere : Primitive { double radius; };
struct Box : Primitive { double a, b; };
using Number = double;
struct RefPrimitive { Id id; };
struct Binary;
struct Unary;
using Expr = boost::variant< //
Number, //
RefPrimitive, //
boost::recursive_wrapper<Unary>, //
boost::recursive_wrapper<Binary> //
>;
struct Unary { char op; Expr oper; };
struct Binary { Expr lhs; char op; Expr rhs; };
struct Object { Id id; Expr expr; };
using Primitives = boost::poly_collection::base_collection<Primitive>;
using Objects = std::list<Object>;
using Index = std::map<Id, std::reference_wrapper<Primitive const>>;
std::ostream& operator<<(std::ostream& os, Primitive const& p) {
return os << boost::core::demangle(typeid(p).name()) << " "
<< "(id: " << p.id << ")";
}
std::ostream& operator<<(std::ostream& os, Object const& o) {
return os << "object(id:" << o.id << ", expr:" << o.expr << ")";
}
std::ostream& operator<<(std::ostream& os, RefPrimitive ref) {
return os << "reference(prim:" << ref.id << ")";
}
std::ostream& operator<<(std::ostream& os, Binary const& b) {
return os << '(' << b.lhs << b.op << b.rhs << ')';
}
std::ostream& operator<<(std::ostream& os, Unary const& u) {
return os << '(' << u.op << u.oper << ')';
}
} // namespace Ast
BOOST_FUSION_ADAPT_STRUCT(Ast::Primitive, id)
BOOST_FUSION_ADAPT_STRUCT(Ast::Sphere, radius)
BOOST_FUSION_ADAPT_STRUCT(Ast::Box, a, b)
BOOST_FUSION_ADAPT_STRUCT(Ast::Object, id)
BOOST_FUSION_ADAPT_STRUCT(Ast::RefPrimitive, id)
BOOST_FUSION_ADAPT_STRUCT(Ast::Unary, op, oper)
namespace Parser {
using namespace x3;
struct State {
Ast::Id next_id;
Ast::Primitives primitives;
Ast::Objects objects;
template <typename... T> void commit(boost::variant<T...>& val) {
boost::apply_visitor([this](auto& obj) { commit(obj); }, val);
}
template <typename T> void commit(T& val) {
auto id = std::exchange(next_id, 0);
if constexpr (std::is_base_of_v<Ast::Primitive, T>) {
val.id = id;
primitives.insert(std::move(val));
} else {
objects.push_back(Ast::Object{id, std::move(val)});
}
}
};
static inline auto& state(auto& ctx) { return get<State>(ctx); }
auto draft = [](auto& ctx) { state(ctx).next_id = _attr(ctx); };
auto commit = [](auto& ctx) { state(ctx).commit(_attr(ctx)); };
template <typename T>
auto as = [](auto p, char const* name = "as") {
return rule<struct _, T>{name} = p;
};
auto ws_comment = +(blank | lexeme["//" >> *(char_ - eol) >> (eol | eoi)]);
auto number = as<Ast::Number>(double_, "number");
auto param(auto name, auto p) { return eps >> omit[name] >> '=' >> p; }
auto param(auto name) { return param(name, number); }
auto sphere = as<Ast::Sphere>(eps >> "sphere" >>'(' >> param("radius") >> ')');
auto box = as<Ast::Box>(eps >> "box" >> '(' >> param('a') >> ',' >> param('b') >> ')');
auto primitive =
("primitive" >> uint_[draft] >> '=' >> (sphere | box)[commit]) > ';';
auto ref_prim = as<Ast::RefPrimitive>(lexeme["primitive" >> uint_], "ref_prim");
// object expression grammar
rule<struct simple_tag, Ast::Expr> simple{"simple"};
rule<struct unary_tag, Ast::Unary> unary{"unary"};
rule<struct expr_tag, Ast::Expr> expr{"expr"};
rule<struct term_tag, Ast::Expr> term{"term"};
rule<struct factor_tag, Ast::Expr> factor{"factor"};
auto assign = [](auto& ctx) { _val(ctx) = _attr(ctx); };
auto make_binary = [](auto& ctx) {
using boost::fusion::at_c;
auto& attr = _attr(ctx);
auto op = at_c<0>(attr);
auto& rhs = at_c<1>(attr);
_val(ctx) = Ast::Binary { _val(ctx), op, rhs };
};
auto unary_def = char_("-+") >> simple;
auto simple_def = ref_prim | unary | '(' >> expr >> ")";
auto factor_def = simple;
auto term_def = factor[assign] >> *(char_("*/") >> term)[make_binary];
auto expr_def = term[assign] >> *(char_("-+") >> expr)[make_binary];
BOOST_SPIRIT_DEFINE(simple, unary, expr, term, factor)
auto object =
("object" >> uint_[draft] >> '=' >> (expr)[commit]) > ';';
auto line = primitive | object;
auto file = no_case[skip(ws_comment)[*eol >> "[geometry]" >> (-line % eol) >> eoi]];
} // namespace Parser
int main() {
for (std::string const input :
{
R"(
[geometry]
primitive1=sphere(radius=5.5);
primitive2=box(a=-5.2, b=7.3);
//...
object1=primitive2*(-primitive1);
//...)",
R"(
[GEOMETRY]
PRIMITIVE1=SPHERE(RADIUS=5.5);
PRIMITIVE2=BOX(A=-5.2, B=7.3);
//...
OBJECT1=PRIMITIVE2*(-PRIMITIVE1);
//...)",
}) //
{
Parser::State state;
bool ok = parse(begin(input), end(input),
x3::with<Parser::State>(state)[Parser::file]);
std::cout << "Parse success? " << std::boolalpha << ok << "\n";
Ast::Index index;
for (auto& p : state.primitives)
if (auto[it,ok] = index.emplace(p.id, p); not ok) {
std::cout << "Duplicate id " << p
<< " (conflicts with existing " << it->second.get()
<< ")\n";
}
std::cout << "Primitives by ID:\n";
for (auto& [id, prim] : index)
std::cout << " - " << prim << "\n";
std::cout << "Objects in definition order:\n";
for (auto& obj: state.objects)
std::cout << " - " << obj << "\n";
}
}
Prints
Parse success? true
Primitives by ID:
- Ast::Sphere (id: 1)
- Ast::Box (id: 2)
Objects in definition order:
- object(id:1, expr:(reference(prim:2)*(-reference(prim:1))))
Parse success? true
Primitives by ID:
- Ast::Sphere (id: 1)
- Ast::Box (id: 2)
Objects in definition order:
- object(id:1, expr:(reference(prim:2)*(-reference(prim:1))))
¹ How can I use polymorphic attributes with boost::spirit::qi parsers?
² and insisting on that leads to classical in-efficiency with rules that cause a lot of backtracking
³ outside of lexemes

How to write a 'c like if' parser with boost spirit

I want to write a rule that parses something like:
if (1==1) {do something}
My problem is how to 'disable' a semantic action bases on the result of another rule's output.
For demonstration in my example I use a int_ parser and simply use that value as its result. I want to bypass that action if the ifrule returns false.
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <iomanip>
namespace qi = boost::spirit::qi;
namespace px = boost::phoenix;
int main() {
qi::symbols<char, std::function<bool(int, int)>> sym;
sym.add
("==", std::equal_to<>())
("!=", std::not_equal_to<>());
using namespace qi::labels;
int result;
auto bin_eval = [](auto const& lhs, auto const& op, auto const& rhs) {
return op(lhs, rhs);
};
qi::rule<std::string::const_iterator, bool(), qi::space_type> ifrule;
ifrule = (qi::lit("if") >> '(' >> qi::int_ >> sym >> qi::int_ >> ')')
[qi::_val = px::bind(bin_eval, _1, _2, _3)];
qi::rule<std::string::const_iterator, int(), qi::space_type> rule;
rule = ifrule >> '{' >> qi::int_[_val = _1] >> '}';
for (std::string const s : {"if (1==2) {1}", "if (1==1) {1}"}) {
std::cout << std::quoted(s) << " -> ";
if (qi::phrase_parse(s.begin(), s.end(), rule, qi::space, result)) {
std::cout << "result: " << std::boolalpha << result << "\n";
} else {
std::cout << "parse failed\n";
}
}
}
Are you looking for shortcut evaluation?
If so, there is literally nothing you have to do to get that effect, nor is there anything you COULD do, short of modifying the input.
As long as you are combining the parsing and the evaluation, you will necessarily be visiting the entire expression tree (because you have to parse it).
Only if you pruduce an AST tree you could do optimizations:
static optimizations that simplify the tree (removing double negations, removing tautologies, resolving contradictions like x xor not x -> not (x and not x) -> not (false) -> true
This is a rich topic and not as easy as it seems (see e.g. Boost Spirit: parse boolean expression and reduce to canonical normal form)
runtime optimization, this is where shortcut evaluation comes in. E.g. a && (b | (c ^ d) | (!a & (b ^ c))) would be
[![enter image description here][1]][1]. Ifais false, the entire evaluation can be skipped because the result will befalse`.
Is your grammar an expression grammar or a statement parser? What should be the value of result if the condition is false? – sehe just now
Assuming the answer is "expression" not "statement" then this is arguably just an expression parser with some binary operators and one ternary (with implicit else branch).
In that case you could cheat and just return an optional value.
I'll show you a more complete AST-based example because likely you're trying to parse something less trivial than what you're showing anyways.
An AST
As always, think of what your Abstract Syntax Tree looks like fist:
namespace Ast {
struct BinOp;
struct Conditioinal;
using Expr = boost::variant<
int,
bool,
BinOp,
Conditional
>;
struct BinOp {
BinOpF op;
Expr lhs, rhs;
};
struct Conditional {
Expr condition, true_part;
};
}
I've taken the function definition for operators from your code:
using BinOpF = std::function<bool(int, int)>;
The variant wouldn't actually compiler with the recursive use incomplete types, so let's use the wrapper:
using Expr = boost::variant<
int,
bool,
boost::recursive_wrapper<BinOp>,
boost::recursive_wrapper<Conditional>
>;
Parsing
The parser construction is usually a 1:1 mapping of rules:
expr
= '(' >> expr >> ')'
| conditional_
| int_
| bool_
| binop_
;
Let's put it in a grammar (that also hides the skipper).
Note I also shuffled the binop parser around splitting expr_ into simple_ and using semantic actions to create the BinOp Ast nodes for efficiency. This stems from experience that grammars get horrifically inefficient with backtracking otherwise.
template <typename It>
struct Expr : qi::grammar<It, Ast::Expr()> {
Expr() : Expr::base_type(start) {
using namespace qi;
start = qi::skip(space) [ expr_ ];
simple_
= '(' >> expr_ >> ')'
| conditional_
| int_
| bool_
;
auto make_bin = [](auto lhs, auto op, auto rhs) {
return Ast::BinOp { op, lhs, rhs };
};
expr_ %= simple_
>> *(binop_ >> expr_) [ _val = px::bind(make_bin, _val, _1, _2) ];
;
conditional_
= lexeme["if"]
>> '(' >> expr_ >> ')'
>> '{' >> expr_ >> '}'
;
binop_.add
("==", std::equal_to<>())
("!=", std::not_equal_to<>());
BOOST_SPIRIT_DEBUG_NODES((start)(expr_)(conditional_))
}
private:
qi::rule<It, Ast::Expr()> start;
qi::rule<It, Ast::Expr(), qi::space_type> simple_, expr_;
qi::rule<It, Ast::Conditional(), qi::space_type> conditional_;
qi::symbols<char, Ast::BinOpF> binop_;
};
That's it. Note that it will parse a multitude of expressions that your parser could not, previously. E.g.:
Live On Coliru
int main() {
Expr<std::string::const_iterator> const parser;
for (std::string const s : {
"false",
"1==2",
"-3!=3",
"if (true) {42}",
"if (false) {43}",
"if (1==2) {44}",
"if (false == (1 == 2)) { ((((45)))) }",
"if (false == (1 == 2)) { if (true) {46} }",
"if (true == (1 == 2)) {47} else { 48 };",
}) {
std::cout << std::quoted(s) << " -> ";
auto f = begin(s);
auto l = end(s);
Ast::Expr expr;
if (qi::parse(f, l, parser, expr)) {
std::cout << "result: " << expr << "\n";
} else {
std::cout << "parse failed\n";
}
if (f!=l) {
std::cout << "Remaining unparsed input: " << std::quoted(std::string(f,l)) << "\n";
}
}
}
Prints
"false" -> result: 0
"1==2" -> result: (1 (opfun) 2)
"-3!=3" -> result: (-3 (opfun) 3)
"if (true) {42}" -> result: if(1) {42}
"if (false) {43}" -> result: if(0) {43}
"if (1==2) {44}" -> result: if((1 (opfun) 2)) {44}
"if (false == (1 == 2)) { ((((45)))) }" -> result: if((0 (opfun) (1 (opfun) 2))) {45}
"if (false == (1 == 2)) { if (true) {46} }" -> result: if((0 (opfun) (1 (opfun) 2))) {if(1) {46}}
"if (true == (1 == 2)) {47} else { 48 };" -> result: if((1 (opfun) (1 (opfun) 2))) {47}
Remaining unparsed input: " else { 48 };"
Evaluation
All that's left is actual evaluation. The result will be an optional value which could be optional<int> for your examples, but I'll make it variant<Nil, int, bool> so we can represent conditions as well:
namespace Evaluation {
struct Nil {};
using Value = boost::variant<Nil, int, bool>;
Now the engine is a function object that will be visiting our AST nodes:
struct Engine {
template <typename... T> auto operator()(T const&... v) const {
return eval(v...);
}
private:
Delegating any call to operator() to private eval methods makes things easier to read, and implementation is pretty straightforward:
Value eval(Ast::Expr const& e) const { return boost::apply_visitor(*this, e); }
Value eval(int e) const { return e; }
Value eval(bool e) const { return e; }
Value eval(Ast::BinOp const& e) const { return e.op(as_int(e.lhs), as_int(e.rhs)); }
Here we see the first bit of logic creep in. Since BinUpF is bool(int,int) we have to coerce the arguments to int.
Value eval(Ast::Conditional const& e) const {
Value True = true;
if (eval(e.condition) == True) {
return eval(e.true_part);
}
return Nil{};
}
This is where we finally answer the meat of your question: the true_part is never evaluated if the condition evaluates to false¹
Test #1
Testing the current state of affairs:
Evaluation::Engine const eval;
// ...
if (qi::parse(f, l, parser, expr)) {
std::cout << "expr: " << expr << "\n";
try {
auto result = eval(expr);
std::cout << "result: " << result << "\n";
} catch(boost::bad_get const&) {
std::cout << "result: Type mismatch\n";
}
} else {
std::cout << "parse failed\n";
}
Prints Live On Coliru
result: 0
result: 0
result: 1
result: 42
result: Nil
result: Nil
"false" -> expr: 0
"1==2" -> expr: (1 (opfun) 2)
"-3!=3" -> expr: (-3 (opfun) 3)
"if (true) {42}" -> expr: if(1) {42}
"if (false) {43}" -> expr: if(0) {43}
"if (1==2) {44}" -> expr: if((1 (opfun) 2)) {44}
The following expressions fail because the operands aren't integer:
"if (false == (1 == 2)) { ((((45)))) }" -> expr: if((0 (opfun) (1 (opfun) 2))) {45}
result: Type mismatch
"if (false == (1 == 2)) { if (true) {46} }" -> expr: if((0 (opfun) (1 (opfun) 2))) {if(1) {46}}
result: Type mismatch
"if (true == (1 == 2)) {47} else { 48 };" -> expr: if((1 (opfun) (1 (opfun) 2))) {47}
result: Type mismatch
Remaining unparsed input: " else { 48 };"
Three Improvevents
Let's allow mixed-type evaluation (true != false should also work, right? As well as true == (1!=9).
Changing the function type from bool(int,int) to Value(Value,Value) allows for the full gamut.
namespace Evaluation {
struct Nil { bool operator==(Nil) const { return true; } };
using Value = boost::variant<Nil, int, bool>;
using BinOpF = std::function<Value(Value, Value)>;
For Mixed-Type evaluation we need some help because std::equal_to and friends don't know how to do (binary) variant visitation. So, let's make a wrapper for that:
template <typename Op> struct MixedOp {
Value operator()(Value const& lhs, Value const& rhs) const {
return boost::apply_visitor(Dispatch{}, lhs, rhs);
}
private:
struct Dispatch {
template <typename T, typename U>
Value operator()(T const& lhs, U const& rhs, decltype(Op{}(T{}, U{}))* = nullptr) const
{ return Op{}(lhs, rhs); }
template <typename... T>
Value operator()(T const&...) const
{ throw std::logic_error("Type mismatch " + std::string(__PRETTY_FUNCTION__)); }
};
};
Yeah. That's ugly, but note how it will naturally work for most of the operator functions (like std::plus<>, std::multiplies<> etc.).
// wrap std functionals
using equal_to = detail::MixedOp<std::equal_to<> >;
using not_equal_to = detail::MixedOp<std::not_equal_to<> >;
using plus = detail::MixedOp<std::plus<> >;
using minus = detail::MixedOp<std::minus<> >;
using multiplies = detail::MixedOp<std::multiplies<> >;
using divides = detail::MixedOp<std::divides<> >;
Let's also make it so truthy-ness of values is better (0 should probably just mean false like in C, as would Nil).
Let's get the truthyness:
namespace detail {
struct truthy {
Value operator()(Value const& v) const { return boost::apply_visitor(*this, v); }
Value operator()(int v) const { return static_cast<bool>(v); }
Value operator()(bool v) const { return static_cast<bool>(v); }
Value operator()(Nil) const { return Nil{}; }
};
And then define a free function to make it easy to call from our Engine:
static inline bool truthy(Value const& v) { return Value{true} == detail::truthy{}(v); }
We can now drop the complications from the Evaluation::Engine:
Value eval(Ast::BinOp const& e) const { return e.op(eval(e.lhs), eval(e.rhs)); }
Value eval(Ast::Conditional const& e) const {
if (truthy(eval(e.condition))) {
return eval(e.true_part);
}
return Nil{};
}
No more as_inst or True comparison.
Let's make else supported
This is very simple extending from what we had above. But it will nicely demonstrate that only one branch is actually evaluated, e.g. when one branch contains a division by zero.
Value eval(Ast::Conditional const& e) const {
if (truthy(eval(e.condition))) {
return eval(e.true_part);
}
if (e.false_part) {
return eval(*e.false_part);
}
return Nil{};
}
Full Demo
Live On Coliru
//#define BOOST_SPIRIT_DEBUG
#include <boost/fusion/adapted.hpp>
#include <boost/fusion/include/io.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <iomanip>
namespace qi = boost::spirit::qi;
namespace px = boost::phoenix;
namespace Evaluation {
struct Nil { bool operator==(Nil) const { return true; } };
using Value = boost::variant<Nil, int, bool>;
using BinOpF = std::function<Value(Value, Value)>;
namespace detail {
struct truthy {
Value operator()(Value const& v) const { return boost::apply_visitor(*this, v); }
Value operator()(int v) const { return static_cast<bool>(v); }
Value operator()(bool v) const { return static_cast<bool>(v); }
Value operator()(Nil) const { return Nil{}; }
};
template <typename Op> struct MixedOp {
Value operator()(Value const& lhs, Value const& rhs) const {
return boost::apply_visitor(Dispatch{}, lhs, rhs);
}
private:
struct Dispatch {
template <typename T, typename U>
Value operator()(T const& lhs, U const& rhs, decltype(Op{}(T{}, U{}))* = nullptr) const
{ return Op{}(lhs, rhs); }
template <typename... T>
Value operator()(T const&...) const
{ throw std::logic_error("Type mismatch " + std::string(__PRETTY_FUNCTION__)); }
};
};
}
static inline bool truthy(Value const& v) { return Value{true} == detail::truthy{}(v); }
// wrap std functionals
using equal_to = detail::MixedOp<std::equal_to<> >;
using not_equal_to = detail::MixedOp<std::not_equal_to<> >;
using plus = detail::MixedOp<std::plus<> >;
using minus = detail::MixedOp<std::minus<> >;
using multiplies = detail::MixedOp<std::multiplies<> >;
using divides = detail::MixedOp<std::divides<> >;
}
namespace Ast {
struct BinOp;
struct Conditional;
using Expr = boost::variant<
int,
bool,
boost::recursive_wrapper<BinOp>,
boost::recursive_wrapper<Conditional>
>;
struct BinOp {
Evaluation::BinOpF op;
Expr lhs, rhs;
};
struct Conditional {
Expr condition, true_part;
boost::optional<Expr> false_part;
};
}
BOOST_FUSION_ADAPT_STRUCT(Ast::Conditional, condition, true_part, false_part)
namespace Parsing {
template <typename It>
struct Expr : qi::grammar<It, Ast::Expr()> {
Expr() : Expr::base_type(start) {
using namespace qi;
start = qi::skip(space) [ expr_ ];
simple_
= '(' >> expr_ >> ')'
| conditional_
| int_
| bool_
;
auto make_bin = [](auto lhs, auto op, auto rhs) {
return Ast::BinOp { op, lhs, rhs };
};
expr_ %= simple_
>> *(binop_ >> expr_) [ _val = px::bind(make_bin, _val, _1, _2) ];
;
conditional_
= lexeme["if"]
>> '(' >> expr_ >> ')'
>> '{' >> expr_ >> '}'
>> -(lit("else") >> '{' >> expr_ >> '}')
;
binop_.add
("==", Evaluation::equal_to())
("!=", Evaluation::not_equal_to())
("+", Evaluation::plus())
("-", Evaluation::minus())
("*", Evaluation::multiplies())
("/", Evaluation::divides())
;
BOOST_SPIRIT_DEBUG_NODES((start)(expr_)(conditional_))
}
private:
qi::rule<It, Ast::Expr()> start;
qi::rule<It, Ast::Expr(), qi::space_type> simple_, expr_;
qi::rule<It, Ast::Conditional(), qi::space_type> conditional_;
qi::symbols<char, Evaluation::BinOpF> binop_;
};
}
namespace Evaluation {
struct Engine {
template <typename... T> auto operator()(T const&... v) const {
return eval(v...);
}
private:
Value eval(Ast::Expr const& e) const { return boost::apply_visitor(*this, e); }
Value eval(int e) const { return e; }
Value eval(bool e) const { return e; }
Value eval(Ast::BinOp const& e) const { return e.op(eval(e.lhs), eval(e.rhs)); }
Value eval(Ast::Conditional const& e) const {
if (truthy(eval(e.condition))) {
return eval(e.true_part);
}
if (e.false_part) {
return eval(*e.false_part);
}
return Nil{};
}
};
}
namespace Ast { // for debug output only
static inline std::ostream& operator<<(std::ostream& os, BinOp const& b) {
return os << "(" << b.lhs << " (opfun) " << b.rhs << ")";
}
static inline std::ostream& operator<<(std::ostream& os, Conditional const& c) {
os << "if(" << c.condition << ") {" << c.true_part << "}";
if (c.false_part)
os << "else{" << *c.false_part << "}";
return os;
}
}
namespace Evaluation { // for debug output only
static inline std::ostream& operator<<(std::ostream& os, Nil) {
return os << "Nil";
}
}
int main() {
Parsing::Expr<std::string::const_iterator> const parser;
Evaluation::Engine const eval;
for (std::string const s : {
"false",
"1==2",
"-3!=3",
"if (true) {42}",
"if (false) {43}",
"if (1==2) {44}",
"if (false == (1 == 2)) { ((((45)))) }",
"if (false == (1 == 2)) { if (true) {46} }",
"if (true == (1 == 2)) {47} else { 48 }",
// cherry on top:
"if (false) { 3*3 / 0 } else { 7*7 }", // note division by zero
}) {
std::cout << std::quoted(s) << " -> ";
auto f = begin(s);
auto l = end(s);
Ast::Expr expr;
if (qi::parse(f, l, parser, expr)) {
std::cout << "expr: " << expr << "\n";
try {
std::cout << "result: " << eval(expr) << "\n";
} catch(std::exception const& e) {
std::cout << "result: " << e.what() << "\n";
}
} else {
std::cout << "parse failed\n";
}
if (f!=l) {
std::cout << "Remaining unparsed input: " << std::quoted(std::string(f,l)) << "\n";
}
}
}
Prints
"false" -> expr: 0
result: 0
"1==2" -> expr: (1 (opfun) 2)
result: 0
"-3!=3" -> expr: (-3 (opfun) 3)
result: 1
"if (true) {42}" -> expr: if(1) {42}
result: 42
"if (false) {43}" -> expr: if(0) {43}
result: Nil
"if (1==2) {44}" -> expr: if((1 (opfun) 2)) {44}
result: Nil
"if (false == (1 == 2)) { ((((45)))) }" -> expr: if((0 (opfun) (1 (opfun) 2))) {45}
result: 45
"if (false == (1 == 2)) { if (true) {46} }" -> expr: if((0 (opfun) (1 (opfun) 2))) {if(1) {46}}
result: 46
"if (true == (1 == 2)) {47} else { 48 }" -> expr: if((1 (opfun) (1 (opfun) 2))) {47}else{48}
result: 48
"if (false) { 3*3 / 0 } else { 7*7 }" -> expr: if(0) {(3 (opfun) (3 (opfun) 0))}else{(7 (opfun) 7)}
result: 49
¹ Something other than true, actually

Boost Spirit x3 conditional (ternary) operator parser (follow up question)

This question is a follow up question for the one in
Boost Spirit x3 conditional (ternary) operator parser
The original question context did not show (my bad!) the ast attributes and the answer therefore could not take all the moving parts into account. This question now shows how the ast attributes looks like and how the ast is used to evaluate the expression with a symbol table.
The follow up question is therefore that how the correctly spelled ternary conditional should change the ast types and how the conditional and expression interact together (according to my understanding it is now not part of the x3::variant as it is to be removed from primary parser choices)
Here is how the ast attributes and declared symbol definitions look like
namespace x3 = boost::spirit::x3;
namespace ast {
struct nil {};
struct unary_op;
struct binary_op;
struct conditional_op;
struct expression;
struct operand : x3::variant<
nil
, double
, std::string
, x3::forward_ast<unary_op>
, x3::forward_ast<binary_op>
//, x3::forward_ast<conditional_op> // conditional_op not here?
, x3::forward_ast<expression>
> {
using base_type::base_type;
using base_type::operator=;
};
struct unary_op {
double (*op)(double);
operand rhs;
};
struct binary_op {
double (*op)(double, double);
operand lhs;
operand rhs;
};
/*
struct conditional_op {
operand lhs;
operand rhs_true;
operand rhs_false;
};
*/
struct conditional_op {
expression lhs;
// how the exact type is spelled?
optional<expression, expression> maybe_rhs;
};
struct operation {
double (*op)(double, double);
operand rhs;
};
// what is the type of expression ?
struct expression {
conditional_op conditional;
};
/*
struct expression {
operand lhs;
std::list<operation> rhs;
};
*/
} // namespace ast
struct constant_ : x3::symbols<double> {
constant_() {
add
("e" , boost::math::constants::e<double>())
("pi" , boost::math::constants::pi<double>())
;
}
} constant;
struct ufunc_ : x3::symbols<double (*)(double)> {
ufunc_() {
add
("abs" , static_cast<double (*)(double)>(&std::abs))
;
}
} ufunc;
struct bfunc_ : x3::symbols<double (*)(double, double)> {
bfunc_() {
add
("max" , static_cast<double (*)(double, double)>(&std::fmax))
;
}
} bfunc;
struct unary_op_ : x3::symbols<double (*)(double)> {
unary_op_() {
add
("+", static_cast<double (*)(double)>(&math::plus))
("-", static_cast<double (*)(double)>(&math::minus))
("!", static_cast<double (*)(double)>(&math::unary_not))
;
}
} unary_op;
struct additive_op_ : x3::symbols<double (*)(double, double)> {
additive_op_() {
add
("+", static_cast<double (*)(double, double)>(&math::plus))
("-", static_cast<double (*)(double, double)>(&math::minus))
;
}
} additive_op;
struct multiplicative_op_ : x3::symbols<double (*)(double, double)> {
multiplicative_op_() {
add
("*", static_cast<double (*)(double, double)>(&math::multiplies))
("/", static_cast<double (*)(double, double)>(&math::divides))
("%", static_cast<double (*)(double, double)>(&std::fmod))
;
}
} multiplicative_op;
struct logical_op_ : x3::symbols<double (*)(double, double)> {
logical_op_() {
add
("&&", static_cast<double (*)(double, double)>(&math::logical_and))
("||", static_cast<double (*)(double, double)>(&math::logical_or))
;
}
} logical_op;
struct relational_op_ : x3::symbols<double (*)(double, double)> {
relational_op_() {
add
("<" , static_cast<double (*)(double, double)>(&math::less))
("<=", static_cast<double (*)(double, double)>(&math::less_equals))
(">" , static_cast<double (*)(double, double)>(&math::greater))
(">=", static_cast<double (*)(double, double)>(&math::greater_equals))
;
}
} relational_op;
struct equality_op_ : x3::symbols<double (*)(double, double)> {
equality_op_() {
add
("==", static_cast<double (*)(double, double)>(&math::equals))
("!=", static_cast<double (*)(double, double)>(&math::not_equals))
;
}
} equality_op;
struct power_ : x3::symbols<double (*)(double, double)> {
power_() {
add
("**", static_cast<double (*)(double, double)>(&std::pow))
;
}
} power;
The more complete grammar and the definition of ast attributes is below (with modifications based on the answer in Boost Spirit x3 conditional (ternary) operator parser)
struct expression_class;
struct logical_class;
struct equality_class;
struct relational_class;
struct additive_class;
struct multiplicative_class;
struct factor_class;
struct primary_class;
struct unary_class;
struct binary_class;
struct conditional_class;
struct variable_class;
// Rule declarations
auto const expression = x3::rule<expression_class , ast::expression >{"expression"};
auto const logical = x3::rule<logical_class , ast::expression >{"logical"};
auto const equality = x3::rule<equality_class , ast::expression >{"equality"};
auto const relational = x3::rule<relational_class , ast::expression >{"relational"};
auto const additive = x3::rule<additive_class , ast::expression >{"additive"};
auto const multiplicative = x3::rule<multiplicative_class, ast::expression >{"multiplicative"};
auto const factor = x3::rule<factor_class , ast::expression >{"factor"};
auto const primary = x3::rule<primary_class , ast::operand >{"primary"};
auto const unary = x3::rule<unary_class , ast::unary_op >{"unary"};
auto const binary = x3::rule<binary_class , ast::binary_op >{"binary"};
auto const conditional = x3::rule<conditional_class , ast::conditional_op>{"conditional"};
auto const variable = x3::rule<variable_class , std::string >{"variable"};
// Rule defintions
/* This is a bit of magic to me. Does this definition now say that expression
itself is now initializer list constructible from the conditional (which is spelled below)?
*/
auto const expression_def =
conditional
;
/* now ast::conditional_op type should be constructible from an initialization list consisting
of of an expression and optional<tuple<expression,expression>> ? How these types should be
spelled in the struct? There is a circular reference between expression and conditional :D ?
*/
auto const conditional_def =
logical >> -('?' > expression > ':'> expression)
;
auto const logical_def =
equality >> *(logical_op > equality)
;
auto const equality_def =
relational >> *(equality_op > relational)
;
auto const relational_def =
additive >> *(relational_op > additive)
;
auto const additive_def =
multiplicative >> *(additive_op > multiplicative)
;
auto const multiplicative_def =
factor >> *(multiplicative_op > factor)
;
auto const factor_def =
primary >> *( power > factor )
;
auto const unary_def =
ufunc > '(' > expression > ')'
;
auto const binary_def =
bfunc > '(' > expression > ',' > expression > ')'
;
auto const primary_def =
x3::double_
| ('(' > expression > ')')
| (unary_op > primary)
| binary
| unary
// | conditional // by removing the conditional from primary implies the type of x3::variant changes
| variable
;
BOOST_SPIRIT_DEFINE(
expression,
logical,
equality,
relational,
additive,
multiplicative,
factor,
primary,
unary,
binary,
conditional,
variable
)
Here is how the AST is traversed using boost static visitor to evaluate the expression with a variable symbol table
namespace ast {
// Evaluator
struct Evaluator {
using result_type = double;
explicit Evaluator(std::map<std::string, double> sym);
double operator()(nil) const;
double operator()(double n) const;
double operator()(std::string const &c) const;
double operator()(operation const &x, double lhs) const;
double operator()(unary_op const &x) const;
double operator()(binary_op const &x) const;
double operator()(conditional_op const &x) const;
double operator()(expression const &x) const;
private:
std::map<std::string, double> st;
};
Evaluator::Evaluator(std::map<std::string, double> sym)
: st(std::move(sym)) {}
double Evaluator::operator()(nil) const {
BOOST_ASSERT(0);
return 0;
}
double Evaluator::operator()(double n) const { return n; }
double Evaluator::operator()(std::string const &c) const {
auto it = st.find(c);
if (it == st.end()) {
throw std::invalid_argument("Unknown variable " + c);
}
return it->second;
}
double Evaluator::operator()(operation const &x, double lhs) const {
double rhs = boost::apply_visitor(*this, x.rhs);
return x.op(lhs, rhs);
}
double Evaluator::operator()(unary_op const &x) const {
double rhs = boost::apply_visitor(*this, x.rhs);
return x.op(rhs);
}
double Evaluator::operator()(binary_op const &x) const {
double lhs = boost::apply_visitor(*this, x.lhs);
double rhs = boost::apply_visitor(*this, x.rhs);
return x.op(lhs, rhs);
}
double Evaluator::operator()(conditional_op const &x) const {
return static_cast<bool>(boost::apply_visitor(*this, x.lhs))
? boost::apply_visitor(*this, x.rhs_true)
: boost::apply_visitor(*this, x.rhs_false);
}
double Evaluator::operator()(expression const &x) const {
double state = boost::apply_visitor(*this, x.lhs);
for (operation const &oper : x.rhs) {
state = (*this)(oper, state);
}
return state;
}
} // namespace ast
So, the top-level attribute exposed is expression, which is, frankly, not representing an expression at all.
Rather, it is representing an artificial unit of expression input syntax, which could perhaps be dubbed "operation_chain".
This is also going to make it hard to use your AST for semantically correct transformations (like e.g. expression evaluation) because crucial information like precedence of operations is not encoded in it.
In fact, if we're not careful it's very possible that this information - if present in the input - would be lost. I think it's possible in practice to go from your AST and reconstruct the operation tree with dependent operations in order of their precedence. But I usually err on the safe side of explicitly modeling the expression tree to reflect the operation dependencies.
That said, the conditional_op is not a chaining binary operation, so it doesn't fit the mold. I'd suggest making the "top level" rules expose an ast::operand instead (so it can fit the conditional_op or expression both just fine).
However, due the "lazy" way we detect the conditional, this requires some semantic actions to actually build the proper attributes:
auto const conditional_def =
logical [([](auto& ctx) { _val(ctx) = _attr(ctx); })]
>> -('?' > expression > ':' > expression) [make_conditional_op]
;
The first semantic action is straight-forward, the second one became large enough to define it out-of-line:
auto make_conditional_op = [](auto& ctx) {
using boost::fusion::at_c;
x3::_val(ctx) = ast::conditional_op {
x3::_val(ctx),
at_c<0>(x3::_attr(ctx)),
at_c<1>(x3::_attr(ctx)) };
};
Still straight-forward but clumsy. Notice that the reason is that we expose different types depending on the presence of the optional branch.
Here's it all put together working:
Live On Coliru
//#define BOOST_SPIRIT_X3_DEBUG
//#define DEBUG_SYMBOLS
#include <iostream>
#include <functional>
#include <iomanip>
#include <list>
#include <boost/fusion/adapted/struct.hpp>
#include <boost/math/constants/constants.hpp>
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
namespace x3 = boost::spirit::x3;
namespace ast {
struct nil {};
struct unary_op;
struct binary_op;
struct conditional_op;
struct expression;
using UnFunc = std::function<double(double)>;
using BinFunc = std::function<double(double, double)>;
struct operand : x3::variant<
nil
, double
, std::string
, x3::forward_ast<unary_op>
, x3::forward_ast<binary_op>
, x3::forward_ast<conditional_op>
, x3::forward_ast<expression> >
{
using base_type::base_type;
using base_type::operator=;
};
struct unary_op {
UnFunc op;
operand rhs;
};
struct binary_op {
BinFunc op;
operand lhs;
operand rhs;
};
struct conditional_op {
operand lhs;
operand rhs_true;
operand rhs_false;
};
struct operation {
BinFunc op;
operand rhs;
};
struct expression {
operand lhs;
std::list<operation> rhs;
};
} // namespace ast
BOOST_FUSION_ADAPT_STRUCT(ast::expression, lhs, rhs)
BOOST_FUSION_ADAPT_STRUCT(ast::operation, op, rhs)
BOOST_FUSION_ADAPT_STRUCT(ast::conditional_op, lhs, rhs_true, rhs_false)
BOOST_FUSION_ADAPT_STRUCT(ast::binary_op, op, lhs, rhs)
BOOST_FUSION_ADAPT_STRUCT(ast::unary_op, op, rhs)
namespace P {
struct ehbase {
template <typename It, typename Ctx>
x3::error_handler_result on_error(It f, It l, x3::expectation_failure<It> const& e, Ctx const& /*ctx*/) const {
std::cout << std::string(f,l) << "\n"
<< std::setw(1+std::distance(f, e.where())) << "^"
<< "-- expected: " << e.which() << "\n";
return x3::error_handler_result::fail;
}
};
struct expression_class : ehbase {};
struct logical_class : ehbase {};
struct equality_class : ehbase {};
struct relational_class : ehbase {};
struct additive_class : ehbase {};
struct multiplicative_class : ehbase {};
struct factor_class : ehbase {};
struct primary_class : ehbase {};
struct unary_class : ehbase {};
struct binary_class : ehbase {};
struct conditional_class : ehbase {};
struct variable_class : ehbase {};
// Rule declarations
auto const expression = x3::rule<expression_class , ast::operand >{"expression"};
auto const conditional = x3::rule<conditional_class , ast::operand >{"conditional"};
auto const primary = x3::rule<primary_class , ast::operand >{"primary"};
auto const logical = x3::rule<logical_class , ast::expression >{"logical"};
auto const equality = x3::rule<equality_class , ast::expression >{"equality"};
auto const relational = x3::rule<relational_class , ast::expression >{"relational"};
auto const additive = x3::rule<additive_class , ast::expression >{"additive"};
auto const multiplicative = x3::rule<multiplicative_class, ast::expression >{"multiplicative"};
auto const factor = x3::rule<factor_class , ast::expression >{"factor"};
auto const unary = x3::rule<unary_class , ast::unary_op >{"unary"};
auto const binary = x3::rule<binary_class , ast::binary_op >{"binary"};
auto const variable = x3::rule<variable_class , std::string >{"variable"};
struct constant_ : x3::symbols<double> {
constant_() {
this->add
("e" , boost::math::constants::e<double>())
("pi" , boost::math::constants::pi<double>())
;
}
} constant;
struct ufunc_ : x3::symbols<ast::UnFunc> {
ufunc_() {
this->add
("abs" , &std::abs<double>)
;
}
} ufunc;
struct bfunc_ : x3::symbols<ast::BinFunc> {
bfunc_() {
this->add
("max" , [](double a,double b){ return std::fmax(a,b); })
("min" , [](double a,double b){ return std::fmin(a,b); })
("pow" , [](double a,double b){ return std::pow(a,b); })
;
}
} bfunc;
struct unary_op_ : x3::symbols<ast::UnFunc> {
unary_op_() {
this->add
("+", [](double v) { return +v; })
("-", std::negate{})
("!", [](double v) { return !v; })
;
}
} unary_op;
struct additive_op_ : x3::symbols<ast::BinFunc> {
additive_op_() {
this->add
("+", std::plus{})
("-", std::minus{})
;
}
} additive_op;
struct multiplicative_op_ : x3::symbols<ast::BinFunc> {
multiplicative_op_() {
this->add
("*", std::multiplies<>{})
("/", std::divides<>{})
("%", [](double a, double b) { return std::fmod(a, b); })
;
}
} multiplicative_op;
struct logical_op_ : x3::symbols<ast::BinFunc> {
logical_op_() {
this->add
("&&", std::logical_and{})
("||", std::logical_or{})
;
}
} logical_op;
struct relational_op_ : x3::symbols<ast::BinFunc> {
relational_op_() {
this->add
("<" , std::less{})
("<=", std::less_equal{})
(">" , std::greater{})
(">=", std::greater_equal{})
;
}
} relational_op;
struct equality_op_ : x3::symbols<ast::BinFunc> {
equality_op_() {
this->add
("==", std::equal_to{})
("!=", std::not_equal_to{})
;
}
} equality_op;
struct power_ : x3::symbols<ast::BinFunc> {
power_() {
this->add
("**", [](double v, double exp) { return std::pow(v, exp); })
;
}
} power;
auto const variable_def = x3::lexeme[x3::alpha >> *x3::alnum];
// Rule defintions
auto const expression_def =
conditional
;
auto make_conditional_op = [](auto& ctx) {
using boost::fusion::at_c;
x3::_val(ctx) = ast::conditional_op {
x3::_val(ctx),
at_c<0>(x3::_attr(ctx)),
at_c<1>(x3::_attr(ctx)) };
};
auto const conditional_def =
logical [([](auto& ctx) { _val(ctx) = _attr(ctx); })]
>> -('?' > expression > ':' > expression) [make_conditional_op]
;
auto const logical_def =
equality >> *(logical_op > equality)
;
auto const equality_def =
relational >> *(equality_op > relational)
;
auto const relational_def =
additive >> *(relational_op > additive)
;
auto const additive_def =
multiplicative >> *(additive_op > multiplicative)
;
auto const multiplicative_def =
factor >> *(multiplicative_op > factor)
;
auto const factor_def =
primary >> *( power > factor )
;
auto const unary_def
= (unary_op > primary)
| (ufunc > '(' > expression > ')')
;
auto const binary_def =
bfunc > '(' > expression > ',' > expression > ')'
;
auto const primary_def =
x3::double_
| ('(' > expression > ')')
//| (unary_op > primary)
| binary
| unary
| constant
| variable
;
BOOST_SPIRIT_DEFINE(expression)
BOOST_SPIRIT_DEFINE(logical)
BOOST_SPIRIT_DEFINE(equality)
BOOST_SPIRIT_DEFINE(relational)
BOOST_SPIRIT_DEFINE(additive)
BOOST_SPIRIT_DEFINE(multiplicative)
BOOST_SPIRIT_DEFINE(factor)
BOOST_SPIRIT_DEFINE(primary)
BOOST_SPIRIT_DEFINE(unary)
BOOST_SPIRIT_DEFINE(binary)
BOOST_SPIRIT_DEFINE(conditional)
BOOST_SPIRIT_DEFINE(variable)
}
int main() {
for (std::string const input : {
"x+(3**pow(2,8))",
"1 + (2 + abs(x))",
"min(x,1+y)",
"(x > y ? 1 : 0) * (y - z)",
"min(3**4,7))",
"3***4",
"(3,4)",
})
{
std::cout << " ===== " << std::quoted(input) << " =====\n";
auto f = begin(input), l = end(input);
ast::operand out;
if (phrase_parse(f, l, P::expression, x3::space, out)) {
std::cout << "Success\n";
} else {
std::cout << "Failed\n";
}
if (f!=l) {
std::cout << "Unparsed: " << std::quoted(std::string(f,l)) << "\n";
}
}
}
Printing
===== "x+(3**pow(2,8))" =====
Success
===== "1 + (2 + abs(x))" =====
Success
===== "min(x,1+y)" =====
Success
===== "(x > y ? 1 : 0) * (y - z)" =====
Success
===== "min(3**4,7))" =====
Success
Unparsed: ")"
===== "3***4" =====
3***4
^-- expected: factor
Failed
Unparsed: "3***4"
===== "(3,4)" =====
(3,4)
^-- expected: ')'
Failed
Unparsed: "(3,4)"
I feel it should be possible to be
more elegant (Boost Spirit: "Semantic actions are evil"?)
model the expression more semantically
but sadly I lacked the time to work on it, so this is it for the moment :)

Boost Spirit X3: Collapsing one-element lists

Say I have a (simplified) recursive grammar like this:
OrExpr := AndExpr % "or"
AndExpr := Term % "and"
Term := ParenExpr | String
ParenExpr := '(' >> OrExpr >> ')'
String := lexeme['"' >> *(char_ - '"') >> '"']
So this works, but the problem is that it will wrap everything in multiple layers of expression. For example, the string "hello" and ("world" or "planet" or "globe") would parse as OrExpr(AndExpr("hello", OrExpr(AndExpr("world"), AndExpr("planet"), AndExpr("globe")))) (playing fast and loose with the syntax, but hopefully you understand). What I'd like is for the one-element nodes to be collapsed into their parent, so it would end up as AndExpr("hello", OrExpr("world", "parent", "globe"))
This can be solved with actions and using a state machine that only constructs the outer object if there's more than one child inside it. But I'm wondering if there's a way to fix this problem without using parser actions?
EDIT: Almost minimal example
Coliru
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <iostream>
namespace x3 = boost::spirit::x3;
namespace burningmime::setmatch::ast
{
// an expression node (either an AND or an OR)
struct Expr;
// child of an expression -- either another expression, or a terminal
struct Node : x3::variant<std::string, x3::forward_ast<Expr>>
{
using base_type::base_type;
using base_type::operator=;
};
// tags for expression type
enum OPER
{
OPER_AND = 1,
OPER_OR = 2
};
// see above
struct Expr
{
OPER op;
std::vector<Node> children;
};
// for debugging purposes; this will print all the expressions
struct AstPrinter
{
void operator()(const Expr& node) const
{
std::cout << (node.op == OPER_AND ? "And(" : "Or(");
bool first = true;
for(const auto& child : node.children)
{
if(!first) std::cout << ", ";
first = false;
boost::apply_visitor(*this, child);
}
std::cout << ")";
}
void operator()(const std::string& node) const
{
std::cout << node;
}
};
}
// these need to be at top-level scope
// basically this adds compile-time type information, so the parser knows where to put various attributes
BOOST_FUSION_ADAPT_STRUCT(burningmime::setmatch::ast::Expr, op, children)
#define DECLARE_RULE(NAME, TYPE) static const x3::rule<class NAME, TYPE> NAME = #NAME;
#define KEYWORD(X) static const auto kw_##X = x3::no_case[#X];
#define DEFINE_RULE(NAME, GRAMMAR) \
static const auto NAME##_def = GRAMMAR; \
BOOST_SPIRIT_DEFINE(NAME)
namespace burningmime::setmatch::parser
{
// we need to pre-declare the rules so they can be used recursively
DECLARE_RULE(Phrase, std::string)
DECLARE_RULE(Term, ast::Node)
DECLARE_RULE(AndExpr, ast::Expr)
DECLARE_RULE(OrExpr, ast::Expr)
DECLARE_RULE(ParenExpr, ast::Expr)
// keywords
KEYWORD(and)
KEYWORD(or)
static const auto lparen = x3::lit('(');
static const auto rparen = x3::lit(')');
// helper parsers
static const auto keywords = kw_and | kw_or | lparen | rparen;
static const auto word = x3::lexeme[+(x3::char_ - x3::ascii::space - lparen - rparen)];
static const auto bareWord = word - keywords;
static const auto quotedString = x3::lexeme[x3::char_('"') >> *(x3::char_ - '"') >> x3::char_('"')];
DEFINE_RULE(Phrase, quotedString | bareWord)
DEFINE_RULE(Term, ParenExpr | Phrase)
DEFINE_RULE(ParenExpr, lparen >> OrExpr >> rparen)
DEFINE_RULE(AndExpr, x3::attr(ast::OPER_AND) >> (Term % kw_and))
DEFINE_RULE(OrExpr, x3::attr(ast::OPER_OR) >> (AndExpr % kw_or))
}
namespace burningmime::setmatch
{
void parseRuleFluent(const char* buf)
{
ast::Expr root;
auto start = buf, end = start + strlen(buf);
bool success = x3::phrase_parse(start, end, parser::OrExpr, x3::ascii::space, root);
if(!success || start != end)
throw std::runtime_error(std::string("Could not parse rule: ") + buf);
printf("Result of parsing: %s\n=========================\n", start);
ast::Node root2(root);
boost::apply_visitor(ast::AstPrinter(), root2);
}
}
int main()
{
burningmime::setmatch::parseRuleFluent(R"#("hello" and ("world" or "planet" or "globe"))#");
}
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <iostream>
namespace x3 = boost::spirit::x3;
namespace burningmime::setmatch::ast
{
// an expression node (either an AND or an OR)
struct Expr;
// child of an expression -- either another expression, or a terminal
struct Node : x3::variant<std::string, x3::forward_ast<Expr>>
{
using base_type::base_type;
using base_type::operator=;
};
// tags for expression type
enum OPER
{
OPER_AND = 1,
OPER_OR = 2
};
// see above
struct Expr
{
OPER op;
std::vector<Node> children;
};
// for debugging purposes; this will print all the expressions
struct AstPrinter
{
void operator()(const Expr& node) const
{
std::cout << (node.op == OPER_AND ? "And(" : "Or(");
bool first = true;
for(const auto& child : node.children)
{
if(!first) std::cout << ", ";
first = false;
boost::apply_visitor(*this, child);
}
std::cout << ")";
}
void operator()(const std::string& node) const
{
std::cout << node;
}
};
}
// these need to be at top-level scope
// basically this adds compile-time type information, so the parser knows where to put various attributes
BOOST_FUSION_ADAPT_STRUCT(burningmime::setmatch::ast::Expr, op, children)
#define DECLARE_RULE(NAME, TYPE) static const x3::rule<class NAME##_r, TYPE> NAME = #NAME;
#define KEYWORD(X) static const auto kw_##X = x3::no_case[#X];
#define DEFINE_RULE(NAME, GRAMMAR) \
static const auto NAME##_def = GRAMMAR; \
BOOST_SPIRIT_DEFINE(NAME)
namespace burningmime::setmatch::parser
{
// we need to pre-declare the rules so they can be used recursively
DECLARE_RULE(Phrase, std::string)
DECLARE_RULE(Term, ast::Node)
DECLARE_RULE(AndExpr, ast::Node)
DECLARE_RULE(OrExpr, ast::Node)
DECLARE_RULE(ParenExpr, ast::Node)
// keywords
KEYWORD(and)
KEYWORD(or)
static const auto lparen = x3::lit('(');
static const auto rparen = x3::lit(')');
// helper parsers
static const auto keywords = kw_and | kw_or | lparen | rparen;
static const auto word = x3::lexeme[+(x3::char_ - x3::ascii::space - lparen - rparen)];
static const auto bareWord = word - keywords;
static const auto quotedString = x3::lexeme[x3::char_('"') >> *(x3::char_ - '"') >> x3::char_('"')];
DEFINE_RULE(Phrase, quotedString | bareWord)
DEFINE_RULE(Term, ParenExpr | Phrase)
DEFINE_RULE(ParenExpr, lparen >> OrExpr >> rparen)
template <ast::OPER Op>
struct make_node
{
template <typename Context >
void operator()(Context const& ctx) const
{
if (_attr(ctx).size() == 1)
_val(ctx) = std::move(_attr(ctx)[0]);
else
_val(ctx) = ast::Expr{ Op, std::move(_attr(ctx)) };
}
};
DEFINE_RULE(AndExpr, (Term % kw_and)[make_node<ast::OPER_AND>{}])
DEFINE_RULE(OrExpr, (AndExpr % kw_or)[make_node<ast::OPER_OR>{}])
}
namespace burningmime::setmatch
{
void parseRuleFluent(const char* buf)
{
ast::Node root;
auto start = buf, end = start + strlen(buf);
bool success = x3::phrase_parse(start, end, parser::OrExpr, x3::ascii::space, root);
if (!success || start != end)
throw std::runtime_error(std::string("Could not parse rule: ") + buf);
printf("Result of parsing: %s\n=========================\n", start);
boost::apply_visitor(ast::AstPrinter(), root);
}
}
int main()
{
burningmime::setmatch::parseRuleFluent(R"#("hello" and ("world" or "planet" or "globe"))#");
}
https://wandbox.org/permlink/kMSHOHG0pgwGr0zv
Output:
Result of parsing:
=========================
And("hello", Or("world", "planet", "globe"))

Attributes from Boost.Spirit grammar: error from std:vector of boost::variant

I got a working parser for reading position descriptions for a board game (international draughts, official grammar):
#include <boost/spirit/home/x3.hpp>
#include <iostream>
namespace x3 = boost::spirit::x3;
auto const colon = x3::lit(':');
auto const comma = x3::lit(',');
auto const dash = x3::lit('-');
auto const dot = x3::lit('.');
auto const king = x3::char_('K');
auto const color = x3::char_("BW");
auto const num_sq = x3::int_;
auto const num_pc = -king >> num_sq; // Kxx means king on square xx, xx a man on that square
auto const num_rng = num_pc >> dash >> num_sq; // xx-yy means range of squares xx through yy (inclusive)
auto const num_seq = (num_rng | num_pc) % comma; // <--- attribute should be std::vector<boost::variant>
auto const ccn = colon >> color >> -num_seq;
auto const num_not = x3::repeat(2)[ccn]; // need to specify both white and black pieces
auto const fen = color >> num_not >> -dot;
Live On Coliru
Now I want to extract the values from the synthesized attributes, so I did the boilerplate dance around Boost.Fusion etc.,
namespace ast {
struct num_pc { boost::optional<char> k; int sq; };
struct num_rng { boost::optional<char> k; int first, last; };
using rng_or_pc = boost::variant<num_rng, num_pc>;
struct num_seq { std::vector<rng_or_pc> sqrs; };
struct ccn { char c; boost::optional<num_seq> seq; };
struct num_not { std::vector<ccn> n; };
struct fen { char c; num_not n; };
} // namespace ast
BOOST_FUSION_ADAPT_STRUCT(ast::num_pc, (boost::optional<char>, k), (int, sq))
BOOST_FUSION_ADAPT_STRUCT(ast::num_rng, (boost::optional<char>, k), (int, first), (int, last))
BOOST_FUSION_ADAPT_STRUCT(ast::num_seq, (std::vector<ast::rng_or_pc>, sqrs))
BOOST_FUSION_ADAPT_STRUCT(ast::ccn, (char, c), (boost::optional<ast::num_seq>, seq))
BOOST_FUSION_ADAPT_STRUCT(ast::num_not, (std::vector<ast::ccn>, n))
BOOST_FUSION_ADAPT_STRUCT(ast::fen, (char, c), (ast::num_not, n))
x3::rule<class num_pc_class, ast::num_pc > num_pc = "num_pc";
x3::rule<class num_rng_class, ast::num_rng> num_rng = "num_rng";
x3::rule<class num_seq_class, ast::num_seq> num_seq = "num_seq";
x3::rule<class ccn_class, ast::ccn > ccn = "ccn";
x3::rule<class num_not_class, ast::num_not> num_not = "num_not";
x3::rule<class fen_class, ast::fen > fen = "fen";
auto const colon = x3::lit(':');
auto const comma = x3::lit(',');
auto const dash = x3::lit('-');
auto const dot = x3::lit('.');
auto const king = x3::char_('K');
auto const color = x3::char_("BW");
auto const num_sq = x3::int_;
auto const num_pc_def = -king >> num_sq;
auto const num_rng_def = num_pc >> dash >> num_sq;
auto const num_seq_def = (num_rng | num_pc) % comma;
auto const ccn_def = colon >> color >> -num_seq;
auto const num_not_def = x3::repeat(2)[ccn];
auto const fen_def = color >> num_not >> -dot;
BOOST_SPIRIT_DEFINE(num_pc, num_rng, num_seq, ccn, num_not, fen)
Live On Coliru
However, I then get an error saying that
error: static_assert failed "Attribute does not have the expected
size."
and a couple of pages down:
^ main.cpp:16:8: note: candidate constructor (the implicit move constructor) not viable: no known conversion from
'std::vector<boost::variant<ast::num_rng, ast::num_pc>,
std::allocator<boost::variant<ast::num_rng, ast::num_pc> > >' to
'ast::num_seq' for 1st argument struct num_seq {
std::vector<rng_or_pc> sqrs; };
^ main.cpp:16:8: note: candidate constructor (the implicit copy constructor) not viable: no known conversion from
'std::vector<boost::variant<ast::num_rng, ast::num_pc>,
std::allocator<boost::variant<ast::num_rng, ast::num_pc> > >' to
'const ast::num_seq' for 1st argument struct num_seq {
std::vector<rng_or_pc> sqrs; };
Question: where is this error coming from, and how to resolve it? Apparently the synthesized attribute of my num_seq rule is not equal to std::vector<boost::variant>>. How can I correct this?
I've spent some time trying to understand the grammar.
I strongly suggest readable identifiers. It's very hard to understand what's going on, while I have the strong impression it's actually a really simple grammar
I suggest a simplification version shown below.
Because your grammar doesn't use recursion there's no real need for the rule and tagged parser types.
Also use a namespace for the parser artefacts.
Consider encapsulation the use of a skipper instead of letting the caller decide (x3::skip[])
Add a few helpers to be able to print the AST for verification:
template <typename T> std::ostream& operator<<(std::ostream& os, std::vector<T> const& v) {
os << "{"; for (auto& el : v) os << el << " "; return os << "}";
}
std::ostream& operator<<(std::ostream& os, num_pc const& p) { if (p.k) os << p.k; return os << p.sq; }
std::ostream& operator<<(std::ostream& os, num_rng const& r) { return os << r.pc << "-" << r.last; }
std::ostream& operator<<(std::ostream& os, ccn const& o) { return os << o.c << " " << o.seq; }
std::ostream& operator<<(std::ostream& os, num_not const& nn) { return os << nn.n; }
I'd avoid wrapping the other vector unnecessarily too:
using num_not = std::vector<ccn>;
Use the modern ADAPT macros (as you're using C++14 by definition):
BOOST_FUSION_ADAPT_STRUCT(ast::num_pc, k, sq)
BOOST_FUSION_ADAPT_STRUCT(ast::num_rng, pc, last)
BOOST_FUSION_ADAPT_STRUCT(ast::ccn, c, seq)
BOOST_FUSION_ADAPT_STRUCT(ast::fen, c, n)
-
Live Demo
Live On Coliru
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/as_vector.hpp>
#include <boost/fusion/include/io.hpp>
#include <boost/optional/optional_io.hpp>
#include <boost/optional.hpp>
#include <boost/spirit/home/x3.hpp>
#include <boost/variant.hpp>
#include <iostream>
#include <vector>
namespace ast {
struct num_pc {
boost::optional<char> k;
int sq;
};
struct num_rng {
num_pc pc;
int last;
};
using rng_or_pc = boost::variant<num_rng, num_pc>;
using num_seq = std::vector<rng_or_pc>;
struct ccn {
char c;
boost::optional<num_seq> seq;
};
using num_not = std::vector<ccn>;
struct fen {
char c;
num_not n;
};
template <typename T> std::ostream& operator<<(std::ostream& os, std::vector<T> const& v) {
os << "{"; for (auto& el : v) os << el << " "; return os << "}";
}
std::ostream& operator<<(std::ostream& os, num_pc const& p) { if (p.k) os << p.k; return os << p.sq; }
std::ostream& operator<<(std::ostream& os, num_rng const& r) { return os << r.pc << "-" << r.last; }
std::ostream& operator<<(std::ostream& os, ccn const& o) { return os << o.c << " " << o.seq; }
}
BOOST_FUSION_ADAPT_STRUCT(ast::num_pc, k, sq)
BOOST_FUSION_ADAPT_STRUCT(ast::num_rng, pc, last)
BOOST_FUSION_ADAPT_STRUCT(ast::ccn, c, seq)
BOOST_FUSION_ADAPT_STRUCT(ast::fen, c, n)
namespace FEN {
namespace x3 = boost::spirit::x3;
namespace grammar
{
using namespace x3;
template<typename T>
auto as = [](auto p) { return rule<struct _, T>{} = as_parser(p); };
uint_type const number {};
auto const color = char_("BW");
auto const num_pc = as<ast::num_pc> ( -char_('K') >> number );
auto const num_rng = as<ast::num_rng> ( num_pc >> '-' >> number );
auto const num_seq = as<ast::num_seq> ( (num_rng | num_pc) % ',' );
auto const ccn = as<ast::ccn> ( ':' >> color >> -num_seq );
auto const num_not = as<ast::num_not> ( repeat(2)[ccn] );
auto const fen = as<ast::fen> ( color >> num_not >> -lit('.') );
}
using grammar::fen;
}
int main() {
for (std::string const t : {
"B:W18,24,27,28,K10,K15:B12,16,20,K22,K25,K29",
"B:W18,19,21,23,24,26,29,30,31,32:B1,2,3,4,6,7,9,10,11,12",
"W:B1-20:W31-50", // initial position
"W:B:W", // empty board
"W:B1:W", // only black pieces
"W:B:W50" // only white pieces
}) {
auto b = t.begin(), e = t.end();
ast::fen data;
bool ok = phrase_parse(b, e, FEN::fen, FEN::x3::space, data);
std::cout << t << "\n";
if (ok) {
std::cout << "Parsed: " << boost::fusion::as_vector(data) << "\n";
} else {
std::cout << "Parse failed:\n";
std::cout << "\t on input: " << t << "\n";
}
if (b != e)
std::cout << "\t Remaining unparsed: '" << std::string(b, e) << '\n';
}
}
Prints:
B:W18,24,27,28,K10,K15:B12,16,20,K22,K25,K29
Parsed: (B {W {18 24 27 28 K10 K15 } B {12 16 20 K22 K25 K29 } })
B:W18,19,21,23,24,26,29,30,31,32:B1,2,3,4,6,7,9,10,11,12
Parsed: (B {W {18 19 21 23 24 26 29 30 31 32 } B {1 2 3 4 6 7 9 10 11 12 } })
W:B1-20:W31-50
Parsed: (W {B {1-20 } W {31-50 } })
W:B:W
Parsed: (W {B -- W -- })
W:B1:W
Parsed: (W {B {1 } W -- })
W:B:W50
Parsed: (W {B -- W {50 } })