I had recently with the help of the amazing sehe managed to advance my boost spirit x3 parser for hlsl (high level shading language) that is a c-like language for writing shader kernels for GPU's. Here is the rough grammar I am following...
https://craftinginterpreters.com/appendix-i.html
Here is the previous question and answer for the curious.
Trying to parse nested expressions with boost spirit x3
I am now trying to implement unary and binary operators and have hit a stumbling block with how they recurse. I am able to get it to compile and a single binary operator is parsed, but having multiple nested ones doesn't seem to be working. I suspect the solution is going to be involving semantic actions again to manually propagate values but I struggle to see how to do that yet as the side effects are hard to understand (still working out how it all works).
Here's my compiling example...
#include <boost/fusion/adapted.hpp>
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
#include <boost/spirit/home/x3/support/utility/error_reporting.hpp>
#include <iomanip>
#include <iostream>
namespace x3 = boost::spirit::x3;
namespace hlsl
{
namespace ast
{
struct Void
{
};
struct Get;
struct Set;
struct Call;
struct Assign;
struct Binary;
struct Unary;
struct Variable
{
std::string name;
};
using Expr = x3::variant<Void, x3::forward_ast<Get>, x3::forward_ast<Set>, Variable, x3::forward_ast<Call>, x3::forward_ast<Assign>, x3::forward_ast<Binary>, x3::forward_ast<Unary>>;
struct Call
{
Expr name;
std::vector<Expr> arguments_;
};
struct Get
{
Expr object_;
std::string property_;
};
struct Set
{
Expr object_;
Expr value_;
std::string name_;
};
struct Assign
{
std::string name_;
Expr value_;
};
struct Binary
{
Expr left_;
std::string op_;
Expr right_;
};
struct Unary
{
std::string op_;
Expr expr_;
};
} // namespace ast
struct printer
{
std::ostream &_os;
using result_type = void;
void operator()(hlsl::ast::Get const &get) const
{
_os << "get { object_:";
get.object_.apply_visitor(*this);
_os << ", property_:" << quoted(get.property_) << " }";
}
void operator()(hlsl::ast::Set const &set) const
{
_os << "set { object_:";
set.object_.apply_visitor(*this);
_os << ", name_:" << quoted(set.name_);
_os << " equals: ";
set.value_.apply_visitor(*this);
_os << " }";
}
void operator()(hlsl::ast::Assign const &assign) const
{
_os << "assign { ";
_os << "name_:" << quoted(assign.name_);
_os << ", value_:";
assign.value_.apply_visitor(*this);
_os << " }";
}
void operator()(hlsl::ast::Variable const &var) const
{
_os << "var{" << quoted(var.name) << "}";
};
void operator()(hlsl::ast::Binary const &bin) const
{
_os << "binary { ";
bin.left_.apply_visitor(*this);
_os << " " << quoted(bin.op_) << " ";
bin.right_.apply_visitor(*this);
_os << " }";
};
void operator()(hlsl::ast::Unary const &un) const
{
_os << "unary { ";
un.expr_.apply_visitor(*this);
_os << quoted(un.op_);
_os << " }";
};
void operator()(hlsl::ast::Call const &call) const
{
_os << "call{";
call.name.apply_visitor(*this);
_os << ", args: ";
for (auto &arg : call.arguments_)
{
arg.apply_visitor(*this);
_os << ", ";
}
_os << /*quoted(call.name) << */ "}";
};
void operator()(hlsl::ast::Void const &) const { _os << "void{}"; };
};
} // namespace hlsl
BOOST_FUSION_ADAPT_STRUCT(hlsl::ast::Variable, name)
BOOST_FUSION_ADAPT_STRUCT(hlsl::ast::Call, name, arguments_)
BOOST_FUSION_ADAPT_STRUCT(hlsl::ast::Get, object_, property_)
BOOST_FUSION_ADAPT_STRUCT(hlsl::ast::Set, object_, value_)
BOOST_FUSION_ADAPT_STRUCT(hlsl::ast::Assign, name_, value_)
BOOST_FUSION_ADAPT_STRUCT(hlsl::ast::Binary, left_, op_, right_)
BOOST_FUSION_ADAPT_STRUCT(hlsl::ast::Unary, op_, expr_)
namespace hlsl::parser
{
struct eh_tag;
struct error_handler
{
template <typename It, typename Exc, typename Ctx>
auto on_error(It &, It, Exc const &x, Ctx const &context) const
{
x3::get<eh_tag>(context)( //
x.where(), "Error! Expecting: " + x.which() + " here:");
return x3::error_handler_result::fail;
}
};
struct program_ : error_handler
{
};
x3::rule<struct identifier_, std::string> const identifier{"identifier"};
x3::rule<struct variable_, ast::Variable> const variable{"variable"};
x3::rule<struct arguments_, std::vector<ast::Expr>> const arguments{"arguments_"};
x3::rule<struct binary_, hlsl::ast::Binary, true> const binary{"binary"};
x3::rule<struct unary_, hlsl::ast::Unary> const unary{"unary"};
x3::rule<struct unarycallwrapper_, hlsl::ast::Expr> const unarycallwrapper{"unarycallwrapper"};
x3::rule<struct get_, ast::Expr> const get{"get"};
x3::rule<struct call_, ast::Expr> const call{"call"};
x3::rule<struct program_, ast::Expr> const program{"program"};
x3::rule<struct primary_, ast::Expr> const primary{"primary"};
x3::rule<struct expression_, ast::Expr> const expression{"expression"};
x3::rule<struct set_, ast::Set, true> const set{"set"};
x3::rule<struct assign_, ast::Assign> const assign{"assign"};
x3::rule<struct assignment_, ast::Expr> const assignment{"assignment"};
auto get_string_from_variable = [](auto &ctx)
{ _val(ctx).name_ = std::move(_attr(ctx).name); };
auto fix_assignExpr = [](auto &ctx)
{ _val(ctx).value_ = std::move(_attr(ctx)); };
auto as_expr = [](auto &ctx)
{ _val(ctx) = ast::Expr(std::move(_attr(ctx))); };
auto as_unary = [](auto &ctx)
{ _val(ctx) = ast::Unary(std::move(_attr(ctx))); };
auto as_call = [](auto &ctx)
{ _val(ctx) = ast::Call{std::move(_val(ctx)), std::move(_attr(ctx))}; };
auto fold_in_get_to_set = [](auto &ctx)
{
auto &val = x3::_val(ctx);
val.name_ = boost::get<x3::forward_ast<ast::Get>>(val.object_).get().property_;
val.object_ = ast::Expr(boost::get<x3::forward_ast<ast::Get>>(val.object_).get().object_);
};
auto as_string = [](auto &ctx)
{ _val(ctx) = std::move(_attr(ctx).name); };
auto as_assign = [](auto &ctx)
{ _val(ctx) = ast::Assign(std::move(_val(ctx)), std::move(_attr(ctx))); };
auto as_get = [](auto &ctx)
{
_val(ctx) = ast::Get{std::move(_val(ctx)), _attr(ctx)};
};
auto variable_def = identifier;
auto primary_def = variable;
auto identifier_def = x3::lexeme[x3::alpha >> *x3::alnum];
auto expression_def = assignment;
auto assignment_def = (assign | set) | binary; // replace binary with call to see the rest working
auto assign_def = variable[get_string_from_variable] >> '=' >> assignment[fix_assignExpr];
auto set_def = (get >> '=' >> assignment)[fold_in_get_to_set];
auto arguments_def = *(expression % ',');
auto get_def = primary[as_expr] >> *('.' >> identifier)[as_get];
auto call_def = primary[as_expr] >> *((x3::lit('(') >> arguments >> x3::lit(')'))[as_call] | ('.' >> identifier)[as_get]);
auto unary_def = (x3::string("-") >> unary);
auto unarycallwrapper_def = unary | call ;
auto binary_def = unarycallwrapper >> x3::string("*") >> unarycallwrapper;
auto program_def = x3::skip(x3::space)[expression];
BOOST_SPIRIT_DEFINE(primary, assign, binary, unary, unarycallwrapper, assignment, get, set, variable, arguments, expression, call, identifier, program);
} // namespace hlsl::parser
int main()
{
using namespace hlsl;
for (std::string const input :
{
"first",
"first.second",
"first.Second.third",
"first.Second().third",
"first.Second(arg1).third",
"first.Second(arg1, arg2).third",
"first = second",
"first.second = third",
"first.second.third = fourth",
"first.second.third = fourth()",
"first.second.third = fourth(arg1)",
"this * that", //binary { var{"this"} "*" var{"that"} }
"this * -that", // binary { var{"this"} "*" unary{'-', var{"that"}} }
"this * that * there",
}) //
{
std::cout << "===== " << quoted(input) << "\n";
auto f = input.begin(), l = input.end();
// Our error handler
auto const p = x3::with<parser::eh_tag>(
x3::error_handler{f, l, std::cerr})[hlsl::parser::program];
if (hlsl::ast::Expr fs; parse(f, l, p, fs))
{
fs.apply_visitor(hlsl::printer{std::cout << "Parsed: "});
std::cout << "\n";
}
else
{
std::cout << "Parse failed at " << quoted(std::string(f, l)) << "\n";
}
}
}
Any help is appreciated :)
You found out how to jump hoops already :)
To lend some perspective I started from scratch. I copied the specs as a markdown comment. I basically copy pasted stuff and mapped an AST 1:1:
namespace Ast {
//////////////////
// primitive types
struct Nil { };
struct Identifier : std::string { using std::string::string; };
struct String : std::string { using std::string::string; };
enum class Bool { False, True };
using Number = boost::multiprecision::cpp_dec_float_50;
//////////////////
// expressions
enum class Op {
Plus, Minus, Multiply, Divide,
Equal, NotEqual, NOT, OR, AND,
GT, GTE, LT, LTE,
Assign
};
#define FWD(T) boost::recursive_wrapper<struct T>
using boost::optional;
using boost::blank; // std::monostate
using boost::variant;
using Expression = variant< //
Nil, Bool, Number, Identifier, String, //
FWD(FunctionCall), //
FWD(MemberAccess), //
FWD(Unary), //
FWD(Binary) //
>;
using Parameters = std::vector<Identifier>;
using Arguments = std::vector<Expression>;
struct FunctionCall { Expression fun; Arguments args; };
struct MemberAccess { Expression obj; Identifier mem; };
struct Unary { Op op; Expression oper; };
struct Binary { Op op; Expression lhs, rhs; };
//////////////////
// Declarations
struct PrintStmt { Expression value; };
struct ReturnStmt { optional<Expression> value; };
using Statement = variant< //
Expression, PrintStmt, ReturnStmt,
FWD(ForStmt), //
FWD(IfStmt), //
FWD(WhileStmt), //
FWD(Block) //
>;
using Statements = std::vector<Statement>;
struct VarDecl {
Identifier id;
optional<Expression> init;
};
struct ForStmt {
variant<blank, VarDecl, Expression> init;
optional<Expression> cond, incr;
optional<Statement> body;
};
struct IfStmt {
Expression cond;
Statement branch1;
optional<Statement> branch2;
};
struct WhileStmt { // REVIEW might represent as ForStmt
Expression cond;
Statement body;
};
struct Block {
Statements stmts;
};
//////////////////
// Declarations
struct FunDecl {
Identifier id;
Parameters params;
Block body;
};
struct ClassDecl {
Identifier id;
optional<Identifier> super;
std::vector<FunDecl> funcs;
};
using Declaration = boost::variant<ClassDecl, FunDecl, VarDecl, Statement>;
using Declarations = std::vector<Declaration>;
using Program = Declarations;
} // namespace Ast
Notes:
I used decimal number representation to not have to deal with too many representation issues
I changed Block content to be statements instead of declarations. It's unlikely that the script should really allow local class declarations. Allowing it means effectively the Declaration and Statement variant have to merge.
Adapting as Fusion sequences:
BOOST_FUSION_ADAPT_STRUCT(Ast::PrintStmt, value)
BOOST_FUSION_ADAPT_STRUCT(Ast::ReturnStmt, value)
BOOST_FUSION_ADAPT_STRUCT(Ast::ForStmt, init, cond, incr, body)
BOOST_FUSION_ADAPT_STRUCT(Ast::IfStmt, cond, branch1, branch2)
BOOST_FUSION_ADAPT_STRUCT(Ast::WhileStmt, cond, body)
BOOST_FUSION_ADAPT_STRUCT(Ast::Block, stmts)
BOOST_FUSION_ADAPT_STRUCT(Ast::FunDecl, id, params, body)
BOOST_FUSION_ADAPT_STRUCT(Ast::ClassDecl, id, super, funcs)
BOOST_FUSION_ADAPT_STRUCT(Ast::VarDecl, id, init)
// These are not required because they're constructed from semantic actions
//BOOST_FUSION_ADAPT_STRUCT(Ast::Unary, op, oper)
//BOOST_FUSION_ADAPT_STRUCT(Ast::Binary, lhs, rhs)
//BOOST_FUSION_ADAPT_STRUCT(Ast::FunctionCall, fun, args)
//BOOST_FUSION_ADAPT_STRUCT(Ast::MemberAccess, obj, mem)
Next up we declare rules for anything that is gonna recurse:
x3::rule<struct declaration, Ast::Declaration> declaration {"declaration"};
x3::rule<struct statement, Ast::Statement> statement {"statement"};
x3::rule<struct expression, Ast::Expression> expression {"expression"};
x3::rule<struct call, Ast::Expression> call {"call"};
Sadly, due to the operator precedence levels being split up in separate grammar productions, we get a proliferation of these rules:
x3::rule<struct unary, Ast::Expression> unary {"unary"};
x3::rule<struct factor, Ast::Expression> factor {"factor"};
x3::rule<struct term, Ast::Expression> term {"term"};
x3::rule<struct comparison, Ast::Expression> comparison {"comparison"};
x3::rule<struct equality, Ast::Expression> equality {"equality"};
x3::rule<struct logic_and, Ast::Expression> logic_and {"logic_and"};
x3::rule<struct logic_or, Ast::Expression> logic_or {"logic_or"};
x3::rule<struct assignment, Ast::Expression> assignment {"assignment"};
The lexicals are simple enough:
auto number = AST(Number,
x3::raw[x3::lexeme[ //
+x3::digit >> -("." >> +x3::digit) //
]][to_number]);
auto alpha = x3::char_("a-zA-Z_");
auto alnum = x3::char_("a-zA-Z_0-9");
auto identifier = AST(Identifier, x3::lexeme[alpha >> *alnum]);
auto string = AST(String, x3::lexeme['"' >> *~x3::char_('"') >> '"']);
I see I forgot to introduce AST(T, p) macro in time. See below.
Constructing the decimal number from string is fine:
auto to_number = [](auto& ctx) {
auto& raw = _attr(ctx);
_val(ctx) = Ast::Number{std::string(raw.begin(), raw.end())};
};
Keyword Checking
As an advanced feature I added keyword checking. You will find out you need it when you have a function name starting with a keyword, e.g.
def for_each(container, action) {
for (var i = 0; i < = container.size(); ++i) {
action(container.item(i));
}
}
for_each would misparse for as the keyword, unless we check that it is not immediately followed by "identifier" characters. Let's also make this a configuration point for case sensitivity:
// keyword checking
#if CASE_SENSITIVE
auto cs(auto p) { return p; };
#else
auto cs(auto p) { return x3::no_case[p]; };
#endif
auto kw(auto... p) { return x3::lexeme[(cs(p) | ...) >> !alnum]; }
Now we can use kw("for") instead of "for" and it will be properly case sensitive and boundary-checked.
Reserved keywords
The specs don't say, but you may want to avoid creating variables with reserved names. E.g. (return)("key").index would be an expression that invokes a function named return, but return ("key") would be a statement that returns the expression "key" (wrapped in a redundant subexpression).
So, let's add some logic to distinguish non-reserved identifiers:
// utility
auto bool_ = [] {
x3::symbols<Ast::Bool> sym;
sym.add("true", Ast::Bool::True);
sym.add("false", Ast::Bool::False);
return kw(sym);
}();
// Not specified, use `non_reserved = identifier` to allow those
auto reserved = kw("return", bool_, "nil", "fun", "var", "class");
auto non_reserved = !reserved >> identifier;
AST Building
I think I mentioned the at<T>(p) device before.
template <typename T> auto as(auto p, char const* name) {
return x3::rule<struct _, T>{name} = std::move(p);
};
template <typename T> auto as(auto p) {
static auto const name = boost::core::demangle(typeid(T).name());
return as<T>(std::move(p), name.c_str());
};
Making it less verbose with Ast:: types:
#define AST(T, p) as<Ast::T>(p, #T)
Now the utility productions from the grammar can be written as:
auto parameters = AST(Parameters, -(non_reserved % ","));
auto block = AST(Block,"{" >> *statement >> "}");
auto function = AST(FunDecl, non_reserved >> "(" >> parameters >> ")" >> block);
Declarations
// declarations
auto classDecl = AST(ClassDecl, //
kw("class") >> non_reserved >> -("<" >> non_reserved) >> //
"{" >> *function >> "}" //
);
auto funDecl = kw("fun") >> function;
auto varDecl = kw("var") >> AST(VarDecl, non_reserved >> -("=" >> expression) >> ";");
auto declaration_def = AST(Declaration, classDecl | funDecl | varDecl | statement);
auto program = x3::skip(skipper)[AST(Program, *(!x3::eoi >> declaration)) >> x3::eoi];
Not a lot to be said, except note the embedding of the skipper. For fun and exposition, I've customized the skipper to allow C++ style comments:
auto comment //
= ("//" > *(x3::char_ - x3::eol) > (x3::eoi | x3::eol)) //
| ("/*" > *(x3::char_ - "*/") > "*/") //
; //
auto skipper = x3::space | comment;
Statements
It's a bit of tedium, but the Fusion adaptations and previously introduced kw(...) and AST(T, p) helpers do all the heavy lifting:
// statements
auto exprStmt = AST(Expression, expression >> ";");
auto forStmt = AST(ForStmt, //
kw("for") >> "(" >> //
(varDecl | exprStmt | ";") >> //
-expression >> ";" >> //
-expression >> ")" >> statement);
auto ifStmt = AST(IfStmt, //
kw("if") >> ("(" >> expression >> ")") >> statement >>
-(kw("else") >> statement));
auto printStmt = AST(PrintStmt, kw("print") >> expression >> ";");
auto returnStmt = AST(ReturnStmt, kw("return") >> -expression >> ";");
auto whileStmt = AST(WhileStmt, kw("while") >> "(" >> expression >> ")" >> statement);
auto statement_def = AST(Statement, !(x3::eoi | "}") //
>> (forStmt | ifStmt | printStmt | returnStmt |
whileStmt | block | exprStmt));
Note how these are basically carbon copies of the specs.
Expressions
Here is the part that gave trouble.
First let's get the simple things out of way:
auto opsym = [] {
x3::symbols<Ast::Op> sym;
sym.add //
("+", Ast::Op::Plus)("-", Ast::Op::Minus) //
("*", Ast::Op::Multiply)("/", Ast::Op::Divide) //
("==", Ast::Op::Equal)("!=", Ast::Op::NotEqual) //
("!", Ast::Op::NOT)("or", Ast::Op::OR)("and", Ast::Op::AND) //
(">", Ast::Op::GT)(">=", Ast::Op::GTE) //
("<", Ast::Op::LT)("<=", Ast::Op::LTE) //
("=", Ast::Op::Assign);
return as<Ast::Op>( //
&identifier >> kw(sym) // if named operator, require keyword boundary
| sym,
"opsym");
}();
Note here that we conditionally apply the kw() modification on the operator symbol if the input token looks like alphanumeric. That, again, is to prevent andalucia or orlando from misparsing as the logical operators.
The condition &identifier is a bit sloppy, but it saves us from separating the interpunction operators from the named ones. Your profiler will tell you which is better.
auto nil = AST(Nil, kw("nil"));
auto arguments = AST(Arguments, &x3::lit(")") | expression % ",");
// this and super are just builtin identifiers
auto primary = AST(Expression,
bool_ | nil | number | string | non_reserved | "(" >> expression >> ")");
Note that I pruned "this" and "super" from the list as they are
just like other variables. If you opt to make them reserved, you will
need to special-case them here, e.g.
auto this_ = AST(Identifier, kw(x3::string("this")));
auto super_ = AST(Identifier, kw(x3::string("super")));
Smooth Operators
You already noticed the way using semantic actions. I separate out a few semantic action helpers:
auto assign = [](auto& ctx) {
_val(ctx) = _attr(ctx);
};
auto mk_call = [](auto& ctx) {
Ast::Expression expr = _val(ctx);
Ast::Arguments args = _attr(ctx);
_val(ctx) = Ast::FunctionCall{expr, args};
};
auto mk_member = [](auto& ctx) {
Ast::Expression obj = _val(ctx);
Ast::Identifier mem = _attr(ctx);
_val(ctx) = Ast::MemberAccess{obj, mem};
};
auto mk_unary = [](auto& ctx) {
auto& op = at_c<0>(_attr(ctx));
auto& rhs = at_c<1>(_attr(ctx));
_val(ctx) = Ast::Unary{op, rhs};
};
auto mk_binary = [](auto& ctx) {
auto& attr = _attr(ctx);
auto& op = at_c<0>(attr);
auto& rhs = at_c<1>(attr);
_val(ctx) = Ast::Binary{op, _val(ctx), rhs};
};
With these you can do the simples:
auto call_def = primary[assign] >> //
*(("(" >> arguments >> ")")[mk_call] //
| "." >> non_reserved[mk_member] //
);
auto unary_def = (expect_op("!", "-") >> unary)[mk_unary] | call[assign];
auto assignment_def = //
(call[assign] >> (expect_op("=") >> assignment)[mk_binary]) | //
logic_or[assign];
Then the bulk would become e.g.:
auto logic_or_def = logic_and[assign] >> *(&kw("or") >> opsym >> logic_and)[mk_binary];
To avoid the duplication let's make a rule factory:
auto binary_def = [](auto precedent, auto... ops) {
return precedent[assign] >> *(expect_op(ops...) >> precedent)[mk_binary];
};
The expect_op factory handles multiple acceptable operators, and applies proper token boundary checking again:
auto expect_op(auto... ops) {
return &x3::lexeme[
// keyword operator?
(&identifier >> kw((x3::as_parser(ops) | ...))) |
// interpunction operator
((x3::as_parser(ops) | ...) >> !x3::char_("!=><)"))] >>
opsym;
};
Now all the binaries (except the top level assignment, which has special associativity and lhs productions) become:
auto factor_def = binary_def(unary, "/", "*");
auto term_def = binary_def(factor, "-", "+");
auto comparison_def = binary_def(term, ">", ">=", "<", "<=");
auto equality_def = binary_def(comparison, "!=", "==");
auto logic_and_def = binary_def(equality, "and");
auto logic_or_def = binary_def(logic_and, "or");
Tieing it all together:
auto expression_def = assignment;
BOOST_SPIRIT_DEFINE(declaration, statement, expression);
BOOST_SPIRIT_DEFINE(call, unary, factor, term, comparison, equality, logic_and,
logic_or, assignment);
Testing
int main() {
#ifdef COLIRU
std::string input(std::istreambuf_iterator<char>(std::cin), {});
#else
std::string_view input = R"~(
class Cat < Animal {
Cat(name) {
print format("maybe implement member data some day: {}\n", name);
}
bark(volume) {
for (dummy = Nil; volume>0; volume = volume - 1)
print "bark!";
if (dummy or !(dummy == Nil) and universe_sane()) {
while(dummy) {{ print "(just kidding)"; }}
} else if (nesting() == "the shit") {
print("cool beans"); // extra parentheses are fine
return(True != False); // also on return statements
} else brackets = !"required";
return False;
}
bite() { return "pain takes no arguments"; }
}
var pooky = Cat("Pooky");
pooky.bark(10);
pooky = nil; // pooky got offed for being obnoxious :(
)~";
#endif
{
if (Ast::Program parsed;
parse(begin(input), end(input), Grammar::program, parsed))
std::cout << parsed << "\n";
else
std::cout << "Failed\n";
}
}
Live On Coliru Printing
class `Cat` < `Animal`{
[fun] `Cat`(`name`) {
print (`format`("maybe implement member data some day: {}\\n",`name`));
}
[fun] `bark`(`volume`) {
for((`dummy` = Nil); (`volume` > 0); (`volume` = (`volume` - 1)))
print "bark!";
if((`dummy` or ((! (`dummy` == Nil)) and (`universe_sane`())))) {
while(`dummy`)
{
{
print "(just kidding)";
}
}
}
else if(((`nesting`()) == "the shit")) {
print "cool beans";
return (True != False);
}
else (`brackets` = (! "required"))
return False;
}
[fun] `bite`() {
return "pain takes no arguments";
}
}
var `pooky` = (`Cat`("Pooky"));
((`pooky`.`bark`(10))
(`pooky` = Nil)
Locally, interactively:
Full Listing (anti-bitrot)
Sadly [SO] refuses it for length limits. I'll post it on Github. Link coming.
TL;DR
I think the at_c<N> accessor trick to dissect Fusion sequences in semantic action will help a lot.
Also, keep in mind that I don't think this rule structure is good for performant parsers. Just look at how something simple like x = y + (2); will invoke 43 rules (!!!) nested to 32 levels deep (!!!).
That's... not ideal. I've made a fully C++-compatible expression grammar (complete with interpreter) on SO before, and you can witness it here: https://github.com/sehe/qi-extended-parser-evaluator. It's using Spirit Qi, but in spirit it uses an almost X3 approach. I might make an X3 version of it just to compare for myself.
The key difference is that it generically implements operators with some metadata to describe it (token, precedence, associativity). This information is then used to combine expression AST nodes correctly. It even allows to get rid of redundant parentheses, both when building the Ast and when printing.
The interpreter logic (with dynamic type system, some reflection and execution tracing) may be a nice bonus inspiration: https://github.com/sehe/qi-extended-parser-evaluator/blob/master/eval.h#L291
Here is how I have solved the issue.
Instead of having a single binary ast node that stores a string of either "*" or "/", I split it up into separate ast node types for divide and multiply.
I then used the same machinery suggested by #sehe in the linked answer to synthesize the right nodes.
I'm still unsure how you can use semantic actions to synthesize attributes that span accross multiple '>>' operators. I'm guessing that the _val(ctx) in the semantic action refers to the whole ast::Expr across the currently defined rule so maybe you can set one member of a ast::Binary (eg the op string from the x3::string("*"), then in the next term after the '>>' you write _val(ctx) again (copy construct from previous?) and set the next member from the _attr(ctx)? I'll see if I can investigate if that works next. That would allow some more complex synthesizing of Attributes. Although I'm not sure if you could have different types being set accross the rule.
#include <boost/fusion/adapted.hpp>
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
#include <boost/spirit/home/x3/support/utility/error_reporting.hpp>
#include <iomanip>
#include <iostream>
namespace x3 = boost::spirit::x3;
namespace hlsl
{
namespace ast
{
struct Void
{
};
struct Get;
struct Set;
struct Call;
struct Assign;
struct Divide;
struct Multiply;
struct Unary;
struct Variable
{
std::string name;
// operator std::string() const {
// return name;
// }
};
using Expr = x3::variant<Void, x3::forward_ast<Get>, x3::forward_ast<Set>, Variable, x3::forward_ast<Call>, x3::forward_ast<Assign>, x3::forward_ast<Multiply>, x3::forward_ast<Divide>, x3::forward_ast<Unary>>;
struct Call
{
Expr name;
std::vector<Expr> arguments_;
};
struct Get
{
Expr object_;
std::string property_;
};
struct Set
{
Expr object_;
Expr value_;
std::string name_;
};
struct Assign
{
std::string name_;
Expr value_;
};
// struct Logical
// {
// Expr left_;
// std::string op_;
// Expr right_;
// };
struct Multiply
{
Expr left_;
Expr right_;
};
struct Divide
{
Expr left_;
Expr right_;
};
struct Unary
{
std::string op_;
Expr expr_;
};
} // namespace ast
struct printer
{
std::ostream &_os;
using result_type = void;
void operator()(hlsl::ast::Get const &get) const
{
_os << "get { object_:";
get.object_.apply_visitor(*this);
_os << ", property_:" << quoted(get.property_) << " }";
}
void operator()(hlsl::ast::Set const &set) const
{
_os << "set { object_:";
set.object_.apply_visitor(*this);
_os << ", name_:" << quoted(set.name_);
_os << " equals: ";
set.value_.apply_visitor(*this);
_os << " }";
}
void operator()(hlsl::ast::Assign const &assign) const
{
_os << "assign { ";
_os << "name_:" << quoted(assign.name_);
_os << ", value_:";
assign.value_.apply_visitor(*this);
_os << " }";
}
void operator()(hlsl::ast::Variable const &var) const
{
_os << "var{" << quoted(var.name) << "}";
};
void operator()(hlsl::ast::Divide const &bin) const
{
_os << "divide { ";
bin.left_.apply_visitor(*this);
bin.right_.apply_visitor(*this);
_os << " }";
};
void operator()(hlsl::ast::Multiply const &bin) const
{
_os << "multiply { ";
bin.left_.apply_visitor(*this);
bin.right_.apply_visitor(*this);
_os << " }";
};
void operator()(hlsl::ast::Unary const &un) const
{
_os << "unary { ";
un.expr_.apply_visitor(*this);
_os << quoted(un.op_);
_os << " }";
};
void operator()(hlsl::ast::Call const &call) const
{
_os << "call{";
call.name.apply_visitor(*this);
_os << ", args: ";
for (auto &arg : call.arguments_)
{
arg.apply_visitor(*this);
_os << ", ";
}
_os << /*quoted(call.name) << */ "}";
};
void operator()(hlsl::ast::Void const &) const { _os << "void{}"; };
};
} // namespace hlsl
BOOST_FUSION_ADAPT_STRUCT(hlsl::ast::Variable, name)
BOOST_FUSION_ADAPT_STRUCT(hlsl::ast::Call, name, arguments_)
BOOST_FUSION_ADAPT_STRUCT(hlsl::ast::Get, object_, property_)
BOOST_FUSION_ADAPT_STRUCT(hlsl::ast::Set, object_, value_)
BOOST_FUSION_ADAPT_STRUCT(hlsl::ast::Assign, name_, value_)
BOOST_FUSION_ADAPT_STRUCT(hlsl::ast::Multiply, left_, right_)
BOOST_FUSION_ADAPT_STRUCT(hlsl::ast::Divide, left_, right_)
BOOST_FUSION_ADAPT_STRUCT(hlsl::ast::Unary, op_, expr_)
namespace hlsl::parser
{
struct eh_tag;
struct error_handler
{
template <typename It, typename Exc, typename Ctx>
auto on_error(It &, It, Exc const &x, Ctx const &context) const
{
x3::get<eh_tag>(context)( //
x.where(), "Error! Expecting: " + x.which() + " here:");
return x3::error_handler_result::fail;
}
};
struct program_ : error_handler
{
};
x3::rule<struct identifier_, std::string> const identifier{"identifier"};
x3::rule<struct variable_, ast::Variable> const variable{"variable"};
x3::rule<struct arguments_, std::vector<ast::Expr>> const arguments{"arguments_"};
x3::rule<struct binary_, hlsl::ast::Expr> const binary{"binary"};
x3::rule<struct multiply_, hlsl::ast::Expr> const multiply{"multiply"};
x3::rule<struct divide_, hlsl::ast::Expr> const divide{"divide"};
x3::rule<struct unary_, hlsl::ast::Unary> const unary{"unary"};
x3::rule<struct unarycallwrapper_, hlsl::ast::Expr> const unarycallwrapper{"unarycallwrapper"};
x3::rule<struct get_, ast::Expr> const get{"get"};
x3::rule<struct call_, ast::Expr> const call{"call"};
x3::rule<struct program_, ast::Expr> const program{"program"};
x3::rule<struct primary_, ast::Expr> const primary{"primary"};
x3::rule<struct expression_, ast::Expr> const expression{"expression"};
x3::rule<struct set_, ast::Set, true> const set{"set"};
x3::rule<struct assign_, ast::Assign> const assign{"assign"};
x3::rule<struct assignment_, ast::Expr> const assignment{"assignment"};
auto get_string_from_variable = [](auto &ctx)
{ _val(ctx).name_ = std::move(_attr(ctx).name); };
auto fix_assignExpr = [](auto &ctx)
{ _val(ctx).value_ = std::move(_attr(ctx)); };
auto as_expr = [](auto &ctx)
{ _val(ctx) = ast::Expr(std::move(_attr(ctx))); };
auto as_unary = [](auto &ctx)
{ _val(ctx) = ast::Unary(std::move(_attr(ctx))); };
auto as_call = [](auto &ctx)
{ _val(ctx) = ast::Call{std::move(_val(ctx)), std::move(_attr(ctx))}; };
auto as_multiply = [](auto &ctx)
{ _val(ctx) = ast::Multiply{std::move(_val(ctx)), std::move(_attr(ctx))}; };
auto as_divide = [](auto &ctx)
{ _val(ctx) = ast::Divide{std::move(_val(ctx)), std::move(_attr(ctx))}; };
auto fold_in_get_to_set = [](auto &ctx)
{
auto &val = x3::_val(ctx);
val.name_ = boost::get<x3::forward_ast<ast::Get>>(val.object_).get().property_;
val.object_ = ast::Expr(boost::get<x3::forward_ast<ast::Get>>(val.object_).get().object_);
};
auto as_string = [](auto &ctx)
{ _val(ctx) = std::move(_attr(ctx).name); };
auto as_assign = [](auto &ctx)
{ _val(ctx) = ast::Assign(std::move(_val(ctx)), std::move(_attr(ctx))); };
auto as_get = [](auto &ctx)
{
_val(ctx) = ast::Get{std::move(_val(ctx)), _attr(ctx)};
};
auto variable_def = identifier;
auto primary_def = variable;
auto identifier_def = x3::lexeme[x3::alpha >> *x3::alnum];
auto expression_def = assignment;
auto assignment_def = (assign | set) | binary; // replace binary with call to see the rest working
auto assign_def = variable[get_string_from_variable] >> '=' >> assignment[fix_assignExpr];
auto set_def = (get >> '=' >> assignment)[fold_in_get_to_set];
auto arguments_def = *(expression % ',');
auto get_def = primary[as_expr] >> *('.' >> identifier)[as_get];
auto call_def = primary[as_expr] >> *((x3::lit('(') >> arguments >> x3::lit(')'))[as_call] | ('.' >> identifier)[as_get]);
auto unary_def = (x3::string("-") >> unarycallwrapper);
auto unarycallwrapper_def = call | unary;
auto binary_def = unarycallwrapper[as_expr] >> *((x3::lit('/') >> unarycallwrapper[as_divide]) | (x3::lit('*') >> unarycallwrapper[as_multiply]));
auto program_def = x3::skip(x3::space)[expression];
BOOST_SPIRIT_DEFINE(primary, assign, binary, multiply, divide, unary, unarycallwrapper, assignment, get, set, variable, arguments, expression, call, identifier, program);
} // namespace hlsl::parser
int main()
{
using namespace hlsl;
for (std::string const input :
{
"first",
"first.second",
"first.Second.third",
"first.Second().third",
"first.Second(arg1).third",
"first.Second(arg1, arg2).third",
"first = second",
"first.second = third",
"first.second.third = fourth",
"first.second.third = fourth()",
"first.second.third = fourth(arg1)",
"this * that", // binary { var{"this"} "*" var{"that"} }
"this * -that", // binary { var{"this"} "*" unary{'-', var{"that"}} }
"this * that * there",
"this * that / there",
"this.inner * that * there.inner2",
}) //
{
std::cout << "===== " << quoted(input) << "\n";
auto f = input.begin(), l = input.end();
// Our error handler
auto const p = x3::with<parser::eh_tag>(
x3::error_handler{f, l, std::cerr})[hlsl::parser::program];
if (hlsl::ast::Expr fs; parse(f, l, p, fs))
{
fs.apply_visitor(hlsl::printer{std::cout << "Parsed: "});
std::cout << "\n";
}
else
{
std::cout << "Parse failed at " << quoted(std::string(f, l)) << "\n";
}
}
}
I also figured out how the semantic actions write to _val(ctx) across multiple sequence '>>' operators. You can write to them with the type that you need and it gets passed to the next one!
See binary2 rule and how it's def uses two semantic actions to write a Binary2 ast node and set different members each time.
#include <boost/fusion/adapted.hpp>
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
#include <boost/spirit/home/x3/support/utility/error_reporting.hpp>
#include <iomanip>
#include <iostream>
namespace x3 = boost::spirit::x3;
namespace hlsl
{
namespace ast
{
struct Void
{
};
struct Get;
struct Set;
struct Call;
struct Assign;
struct Divide;
struct Multiply;
struct Unary;
struct Binary2;
struct Variable
{
std::string name;
// operator std::string() const {
// return name;
// }
};
using Expr = x3::variant<Void, x3::forward_ast<Get>, x3::forward_ast<Set>, Variable, x3::forward_ast<Call>, x3::forward_ast<Assign>, x3::forward_ast<Multiply>, x3::forward_ast<Binary2>, x3::forward_ast<Divide>, x3::forward_ast<Unary>>;
struct Call
{
Expr name;
std::vector<Expr> arguments_;
};
struct Get
{
Expr object_;
std::string property_;
};
struct Set
{
Expr object_;
Expr value_;
std::string name_;
};
struct Assign
{
std::string name_;
Expr value_;
};
// struct Logical
// {
// Expr left_;
// std::string op_;
// Expr right_;
// };
struct Multiply
{
Expr left_;
Expr right_;
};
struct Binary2
{
Expr left_;
std::string op_;
Expr right_;
};
struct Divide
{
Expr left_;
Expr right_;
};
struct Unary
{
std::string op_;
Expr expr_;
};
} // namespace ast
struct printer
{
std::ostream &_os;
using result_type = void;
void operator()(hlsl::ast::Get const &get) const
{
_os << "get { object_:";
get.object_.apply_visitor(*this);
_os << ", property_:" << quoted(get.property_) << " }";
}
void operator()(hlsl::ast::Set const &set) const
{
_os << "set { object_:";
set.object_.apply_visitor(*this);
_os << ", name_:" << quoted(set.name_);
_os << " equals: ";
set.value_.apply_visitor(*this);
_os << " }";
}
void operator()(hlsl::ast::Assign const &assign) const
{
_os << "assign { ";
_os << "name_:" << quoted(assign.name_);
_os << ", value_:";
assign.value_.apply_visitor(*this);
_os << " }";
}
void operator()(hlsl::ast::Variable const &var) const
{
_os << "var{" << quoted(var.name) << "}";
};
void operator()(hlsl::ast::Divide const &bin) const
{
_os << "divide { ";
bin.left_.apply_visitor(*this);
bin.right_.apply_visitor(*this);
_os << " }";
};
void operator()(hlsl::ast::Multiply const &bin) const
{
_os << "multiply { ";
bin.left_.apply_visitor(*this);
bin.right_.apply_visitor(*this);
_os << " }";
};
void operator()(hlsl::ast::Binary2 const &bin) const
{
_os << "binary2 { ";
bin.left_.apply_visitor(*this);
_os << bin.op_ << ", ";
bin.right_.apply_visitor(*this);
_os << " }";
};
void operator()(hlsl::ast::Unary const &un) const
{
_os << "unary { ";
un.expr_.apply_visitor(*this);
_os << quoted(un.op_);
_os << " }";
};
void operator()(hlsl::ast::Call const &call) const
{
_os << "call{";
call.name.apply_visitor(*this);
_os << ", args: ";
for (auto &arg : call.arguments_)
{
arg.apply_visitor(*this);
_os << ", ";
}
_os << /*quoted(call.name) << */ "}";
};
void operator()(hlsl::ast::Void const &) const { _os << "void{}"; };
};
} // namespace hlsl
BOOST_FUSION_ADAPT_STRUCT(hlsl::ast::Variable, name)
BOOST_FUSION_ADAPT_STRUCT(hlsl::ast::Call, name, arguments_)
BOOST_FUSION_ADAPT_STRUCT(hlsl::ast::Get, object_, property_)
BOOST_FUSION_ADAPT_STRUCT(hlsl::ast::Set, object_, value_)
BOOST_FUSION_ADAPT_STRUCT(hlsl::ast::Assign, name_, value_)
BOOST_FUSION_ADAPT_STRUCT(hlsl::ast::Multiply, left_, right_)
BOOST_FUSION_ADAPT_STRUCT(hlsl::ast::Binary2, left_, op_, right_)
BOOST_FUSION_ADAPT_STRUCT(hlsl::ast::Divide, left_, right_)
BOOST_FUSION_ADAPT_STRUCT(hlsl::ast::Unary, op_, expr_)
namespace hlsl::parser
{
struct eh_tag;
struct error_handler
{
template <typename It, typename Exc, typename Ctx>
auto on_error(It &, It, Exc const &x, Ctx const &context) const
{
x3::get<eh_tag>(context)( //
x.where(), "Error! Expecting: " + x.which() + " here:");
return x3::error_handler_result::fail;
}
};
struct program_ : error_handler
{
};
x3::rule<struct identifier_, std::string> const identifier{"identifier"};
x3::rule<struct binop_, std::string> const binop{"binop"};
x3::rule<struct variable_, ast::Variable> const variable{"variable"};
x3::rule<struct arguments_, std::vector<ast::Expr>> const arguments{"arguments_"};
x3::rule<struct binary_, hlsl::ast::Expr> const binary{"binary"};
x3::rule<struct binary2_, hlsl::ast::Expr> const binary2{"binary2"};
x3::rule<struct multiply_, hlsl::ast::Expr> const multiply{"multiply"};
x3::rule<struct divide_, hlsl::ast::Expr> const divide{"divide"};
x3::rule<struct unary_, hlsl::ast::Unary> const unary{"unary"};
x3::rule<struct unarycallwrapper_, hlsl::ast::Expr> const unarycallwrapper{"unarycallwrapper"};
x3::rule<struct get_, ast::Expr> const get{"get"};
x3::rule<struct call_, ast::Expr> const call{"call"};
x3::rule<struct program_, ast::Expr> const program{"program"};
x3::rule<struct primary_, ast::Expr> const primary{"primary"};
x3::rule<struct expression_, ast::Expr> const expression{"expression"};
x3::rule<struct set_, ast::Set, true> const set{"set"};
x3::rule<struct assign_, ast::Assign> const assign{"assign"};
x3::rule<struct assignment_, ast::Expr> const assignment{"assignment"};
auto get_string_from_variable = [](auto &ctx)
{ _val(ctx).name_ = std::move(_attr(ctx).name); };
auto fix_assignExpr = [](auto &ctx)
{ _val(ctx).value_ = std::move(_attr(ctx)); };
auto as_expr = [](auto &ctx)
{ _val(ctx) = ast::Expr(std::move(_attr(ctx))); };
auto as_unary = [](auto &ctx)
{ _val(ctx) = ast::Unary(std::move(_attr(ctx))); };
auto as_call = [](auto &ctx)
{ _val(ctx) = ast::Call{std::move(_val(ctx)), std::move(_attr(ctx))}; };
auto as_multiply = [](auto &ctx)
{ _val(ctx) = ast::Multiply{std::move(_val(ctx)), std::move(_attr(ctx))}; };
auto as_divide = [](auto &ctx)
{ _val(ctx) = ast::Divide{std::move(_val(ctx)), std::move(_attr(ctx))}; };
auto as_binary2A = [](auto &ctx)
{ _val(ctx) = ast::Binary2{std::move(_val(ctx)), std::move(_attr(ctx)), ast::Expr{}}; };
auto as_binary2B = [](auto &ctx)
{ //_val(ctx) = std::move(_val(ctx));
boost::get<x3::forward_ast<ast::Binary2>>(_val(ctx)).get().right_ = std::move(_attr(ctx)); };
auto fold_in_get_to_set = [](auto &ctx)
{
auto &val = x3::_val(ctx);
val.name_ = boost::get<x3::forward_ast<ast::Get>>(val.object_).get().property_;
val.object_ = ast::Expr(boost::get<x3::forward_ast<ast::Get>>(val.object_).get().object_);
};
auto as_string = [](auto &ctx)
{ _val(ctx) = std::move(_attr(ctx).name); };
auto as_assign = [](auto &ctx)
{ _val(ctx) = ast::Assign(std::move(_val(ctx)), std::move(_attr(ctx))); };
auto as_get = [](auto &ctx)
{
_val(ctx) = ast::Get{std::move(_val(ctx)), _attr(ctx)};
};
auto variable_def = identifier;
auto primary_def = variable;
auto identifier_def = x3::lexeme[x3::alpha >> *x3::alnum];
auto expression_def = assignment;
auto assignment_def = (assign | set) | binary2; // replace binary with call to see the rest working
auto assign_def = variable[get_string_from_variable] >> '=' >> assignment[fix_assignExpr];
auto set_def = (get >> '=' >> assignment)[fold_in_get_to_set];
auto arguments_def = *(expression % ',');
auto get_def = primary[as_expr] >> *('.' >> identifier)[as_get];
auto call_def = primary[as_expr] >> *((x3::lit('(') >> arguments >> x3::lit(')'))[as_call] | ('.' >> identifier)[as_get]);
auto unary_def = (x3::string("-") >> unarycallwrapper);
auto unarycallwrapper_def = unary | call;
auto binop_def = x3::string("*") | x3::string("/");
auto binary_def = unarycallwrapper[as_expr] >> *((x3::lit('/') >> unarycallwrapper[as_divide]) | (x3::lit('*') >> unarycallwrapper[as_multiply]));
auto binary2_def = unarycallwrapper[as_expr] >> *(binop[as_binary2A] >> unarycallwrapper[as_binary2B]);
auto program_def = x3::skip(x3::space)[expression];
BOOST_SPIRIT_DEFINE(primary, assign, binop, binary, binary2, unary, unarycallwrapper, assignment, get, set, variable, arguments, expression, call, identifier, program);
} // namespace hlsl::parser
int main()
{
using namespace hlsl;
for (std::string const input :
{
"first",
"first.second",
"first.Second.third",
"first.Second().third",
"first.Second(arg1).third",
"first.Second(arg1, arg2).third",
"first = second",
"first.second = third",
"first.second.third = fourth",
"first.second.third = fourth()",
"first.second.third = fourth(arg1)",
"this * that", // binary { var{"this"} "*" var{"that"} }
"this * -that", // binary { var{"this"} "*" unary{'-', var{"that"}} }
"this * that * there",
"this * that / there",
"this.inner * that * there.inner2",
}) //
{
std::cout << "===== " << quoted(input) << "\n";
auto f = input.begin(), l = input.end();
// Our error handler
auto const p = x3::with<parser::eh_tag>(
x3::error_handler{f, l, std::cerr})[hlsl::parser::program];
if (hlsl::ast::Expr fs; parse(f, l, p, fs))
{
fs.apply_visitor(hlsl::printer{std::cout << "Parsed: "});
std::cout << "\n";
}
else
{
std::cout << "Parse failed at " << quoted(std::string(f, l)) << "\n";
}
}
}
One more post...
I'm almost finished with the full stack of expression operators that hlsl uses (bitwise, logical, compound assignment etc).
I even figured out nested ternary operators which I thought would be really hard but didn't turn out too bad.
#include <boost/fusion/adapted.hpp>
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
#include <boost/spirit/home/x3/support/utility/error_reporting.hpp>
#include <iomanip>
#include <iostream>
namespace x3 = boost::spirit::x3;
namespace hlsl
{
namespace ast
{
struct Void
{
};
struct Get;
struct Set;
struct Call;
struct Assign;
struct CompoundAssign;
struct Divide;
struct Multiply;
struct Unary;
struct Binary;
struct Logical;
struct Bitwise;
struct Ternary;
struct Variable
{
std::string name;
// operator std::string() const {
// return name;
// }
};
using Expr = x3::variant< Void,
x3::forward_ast<Get>,
x3::forward_ast<Set>,
Variable,
x3::forward_ast<Call>,
x3::forward_ast<Assign>,
x3::forward_ast<CompoundAssign>,
x3::forward_ast<Multiply>,
x3::forward_ast<Binary>,
x3::forward_ast<Logical>,
x3::forward_ast<Ternary>,
x3::forward_ast<Bitwise>,
x3::forward_ast<Divide>,
x3::forward_ast<Unary>>;
struct Call
{
Expr name;
std::vector<Expr> arguments_;
};
struct Get
{
Expr object_;
std::string property_;
};
struct Set
{
Expr object_;
Expr value_;
std::string name_;
};
struct Assign
{
std::string name_;
Expr value_;
};
struct CompoundAssign
{
std::string name_;
std::string op_;
Expr value_;
};
struct Multiply
{
Expr left_;
Expr right_;
};
struct Binary
{
Expr left_;
std::string op_;
Expr right_;
};
struct Logical
{
Expr left_;
std::string op_;
Expr right_;
};
struct Bitwise
{
Expr left_;
std::string op_;
Expr right_;
};
struct Divide
{
Expr left_;
Expr right_;
};
struct Unary
{
std::string op_;
Expr expr_;
};
struct Ternary
{
Expr condition_;
Expr ifexpr_;
Expr elseexpr_;
};
} // namespace ast
struct printer
{
std::ostream &_os;
using result_type = void;
void operator()(hlsl::ast::Get const &get) const
{
_os << "get { object_:";
get.object_.apply_visitor(*this);
_os << ", property_:" << quoted(get.property_) << " }";
}
void operator()(hlsl::ast::Set const &set) const
{
_os << "set { object_:";
set.object_.apply_visitor(*this);
_os << ", name_:" << quoted(set.name_);
_os << " equals: ";
set.value_.apply_visitor(*this);
_os << " }";
}
void operator()(hlsl::ast::Assign const &assign) const
{
_os << "assign { ";
_os << "name_:" << quoted(assign.name_);
_os << ", value_:";
assign.value_.apply_visitor(*this);
_os << " }";
}
void operator()(hlsl::ast::CompoundAssign const &assign) const
{
_os << "compoundAssign { ";
_os << "name_:" << quoted(assign.name_);
_os << "op_:" << quoted(assign.op_);
_os << ", value_:";
assign.value_.apply_visitor(*this);
_os << " }";
}
void operator()(hlsl::ast::Variable const &var) const
{
_os << "var{" << quoted(var.name) << "}";
};
void operator()(hlsl::ast::Divide const &bin) const
{
_os << "divide { ";
bin.left_.apply_visitor(*this);
bin.right_.apply_visitor(*this);
_os << " }";
};
void operator()(hlsl::ast::Multiply const &bin) const
{
_os << "multiply { ";
bin.left_.apply_visitor(*this);
bin.right_.apply_visitor(*this);
_os << " }";
};
void operator()(hlsl::ast::Binary const &bin) const
{
_os << "binary { ";
bin.left_.apply_visitor(*this);
_os << bin.op_ << ", ";
bin.right_.apply_visitor(*this);
_os << " }";
};
void operator()(hlsl::ast::Logical const &bin) const
{
_os << "logical { ";
bin.left_.apply_visitor(*this);
_os << bin.op_ << ", ";
bin.right_.apply_visitor(*this);
_os << " }";
};
void operator()(hlsl::ast::Bitwise const &bin) const
{
_os << "bitwise { ";
bin.left_.apply_visitor(*this);
_os << bin.op_ << ", ";
bin.right_.apply_visitor(*this);
_os << " }";
};
void operator()(hlsl::ast::Unary const &un) const
{
_os << "unary { ";
un.expr_.apply_visitor(*this);
_os << quoted(un.op_);
_os << " }";
};
void operator()(hlsl::ast::Ternary const &tern) const
{
_os << "ternary { ";
tern.condition_.apply_visitor(*this);
tern.ifexpr_.apply_visitor(*this);
tern.elseexpr_.apply_visitor(*this);
_os << " }";
};
void operator()(hlsl::ast::Call const &call) const
{
_os << "call{";
call.name.apply_visitor(*this);
_os << ", args: ";
for (auto &arg : call.arguments_)
{
arg.apply_visitor(*this);
_os << ", ";
}
_os << /*quoted(call.name) << */ "}";
};
void operator()(hlsl::ast::Void const &) const { _os << "void{}"; };
};
} // namespace hlsl
BOOST_FUSION_ADAPT_STRUCT(hlsl::ast::Variable, name)
BOOST_FUSION_ADAPT_STRUCT(hlsl::ast::Call, name, arguments_)
BOOST_FUSION_ADAPT_STRUCT(hlsl::ast::Get, object_, property_)
BOOST_FUSION_ADAPT_STRUCT(hlsl::ast::Set, object_, value_)
BOOST_FUSION_ADAPT_STRUCT(hlsl::ast::Assign, name_, value_)
BOOST_FUSION_ADAPT_STRUCT(hlsl::ast::CompoundAssign, name_, op_, value_)
BOOST_FUSION_ADAPT_STRUCT(hlsl::ast::Multiply, left_, right_)
BOOST_FUSION_ADAPT_STRUCT(hlsl::ast::Binary, left_, op_, right_)
BOOST_FUSION_ADAPT_STRUCT(hlsl::ast::Logical, left_, op_, right_)
BOOST_FUSION_ADAPT_STRUCT(hlsl::ast::Bitwise, left_, op_, right_)
BOOST_FUSION_ADAPT_STRUCT(hlsl::ast::Divide, left_, right_)
BOOST_FUSION_ADAPT_STRUCT(hlsl::ast::Unary, op_, expr_)
BOOST_FUSION_ADAPT_STRUCT(hlsl::ast::Ternary, condition_, ifexpr_, elseexpr_)
namespace hlsl::parser
{
struct eh_tag;
struct error_handler
{
template <typename It, typename Exc, typename Ctx>
auto on_error(It &, It, Exc const &x, Ctx const &context) const
{
x3::get<eh_tag>(context)( //
x.where(), "Error! Expecting: " + x.which() + " here:");
return x3::error_handler_result::fail;
}
};
struct program_ : error_handler
{
};
x3::rule<struct identifier_, std::string> const identifier{"identifier"};
x3::rule<struct factor_, std::string> const factor{"factor"};
x3::rule<struct term_, std::string> const term{"term"};
x3::rule<struct compare_op_, std::string> const compare_op{"compare_op"};
x3::rule<struct equality_op_, std::string> const equality_op{"equality_op"};
x3::rule<struct compoundassign_op_, std::string> const compoundassign_op{"compoundassign_op"};
x3::rule<struct bitwise_shift_op_, std::string> const bitwise_shift_op{"bitwise_shift_op"};
x3::rule<struct variable_, ast::Variable> const variable{"variable"};
x3::rule<struct arguments_, std::vector<ast::Expr>> const arguments{"arguments_"};
x3::rule<struct bitwise_or_, hlsl::ast::Expr> const bitwise_or{"bitwise_or"};
x3::rule<struct bitwise_xor_, hlsl::ast::Expr> const bitwise_xor{"bitwise_xor"};
x3::rule<struct bitwise_and_, hlsl::ast::Expr> const bitwise_and{"bitwise_and"};
x3::rule<struct bitwise_shift_, hlsl::ast::Expr> const bitwise_shift{"bitwise_shift"};
x3::rule<struct addition_, hlsl::ast::Expr> const addition{"addition"};
x3::rule<struct comparison_, hlsl::ast::Expr> const comparison{"comparison"};
x3::rule<struct equality_, hlsl::ast::Expr> const equality{"equality"};
x3::rule<struct logical_or_, hlsl::ast::Expr> const logical_or{"logical_or"};
x3::rule<struct logical_and_, hlsl::ast::Expr> const logical_and{"logical_and"};
x3::rule<struct multiply_, hlsl::ast::Expr> const multiply{"multiply"};
x3::rule<struct unary_, hlsl::ast::Unary> const unary{"unary"};
x3::rule<struct unarycallwrapper_, hlsl::ast::Expr> const unarycallwrapper{"unarycallwrapper"};
x3::rule<struct get_, ast::Expr> const get{"get"};
x3::rule<struct call_, ast::Expr> const call{"call"};
x3::rule<struct program_, ast::Expr> const program{"program"};
x3::rule<struct primary_, ast::Expr> const primary{"primary"};
x3::rule<struct expression_, ast::Expr> const expression{"expression"};
x3::rule<struct set_, ast::Set, true> const set{"set"};
x3::rule<struct assign_, ast::Assign> const assign{"assign"};
x3::rule<struct compoundassign_, ast::CompoundAssign> const compoundassign{"compoundassign"};
x3::rule<struct ternary_, ast::Expr> const ternary{"ternary"};
x3::rule<struct assignment_, ast::Expr> const assignment{"assignment"};
auto get_string_from_variable = [](auto &ctx)
{ _val(ctx).name_ = std::move(_attr(ctx).name); };
auto get_string_from_variable_cast = [](auto &ctx)
{ _val(ctx).name_ = std::move(_attr(ctx).name); };
auto fix_assignExpr = [](auto &ctx)
{ _val(ctx).value_ = std::move(_attr(ctx)); };
auto as_expr = [](auto &ctx)
{ _val(ctx) = ast::Expr(std::move(_attr(ctx))); };
auto as_unary = [](auto &ctx)
{ _val(ctx) = ast::Unary(std::move(_attr(ctx))); };
auto as_call = [](auto &ctx)
{ _val(ctx) = ast::Call{std::move(_val(ctx)), std::move(_attr(ctx))}; };
auto as_binary_op = [](auto &ctx)
{ _val(ctx) = ast::Binary{std::move(_val(ctx)), std::move(_attr(ctx)), ast::Expr{}}; };
auto as_binary_wrap = [](auto &ctx)
{ boost::get<x3::forward_ast<ast::Binary>>(_val(ctx)).get().right_ = std::move(_attr(ctx)); };
auto as_logical_op = [](auto &ctx)
{ _val(ctx) = ast::Logical{std::move(_val(ctx)), std::move(_attr(ctx)), ast::Expr{}}; };
auto as_logical_wrap = [](auto &ctx)
{ boost::get<x3::forward_ast<ast::Logical>>(_val(ctx)).get().right_ = std::move(_attr(ctx)); };
auto as_bitwise_op = [](auto &ctx)
{ _val(ctx) = ast::Bitwise{std::move(_val(ctx)), std::move(_attr(ctx)), ast::Expr{}}; };
auto as_bitwise_wrap = [](auto &ctx)
{ boost::get<x3::forward_ast<ast::Bitwise>>(_val(ctx)).get().right_ = std::move(_attr(ctx)); };
auto as_compound_op = [](auto &ctx)
{ _val(ctx).op_ = std::move(_attr(ctx)); };
auto as_ternary_ifexpr = [](auto &ctx)
{ _val(ctx) = ast::Ternary{std::move(_val(ctx)), std::move(_attr(ctx)), ast::Expr{}}; };
auto as_ternary_elseexpr = [](auto &ctx)
{ boost::get<x3::forward_ast<ast::Ternary>>(_val(ctx)).get().elseexpr_ = std::move(_attr(ctx)); };
auto as_compound_wrap = [](auto &ctx)
{ boost::get<x3::forward_ast<ast::CompoundAssign>>(_val(ctx)).get().value_ = std::move(_attr(ctx)); };
auto fold_in_get_to_set = [](auto &ctx)
{
auto &val = x3::_val(ctx);
val.name_ = boost::get<x3::forward_ast<ast::Get>>(val.object_).get().property_;
val.object_ = ast::Expr(boost::get<x3::forward_ast<ast::Get>>(val.object_).get().object_);
};
auto as_string = [](auto &ctx)
{ _val(ctx) = std::move(_attr(ctx).name); };
auto as_assign = [](auto &ctx)
{ _val(ctx) = ast::Assign(std::move(_val(ctx)), std::move(_attr(ctx))); };
auto as_get = [](auto &ctx)
{
_val(ctx) = ast::Get{std::move(_val(ctx)), _attr(ctx)};
};
auto expression_def = assignment;
auto variable_def = identifier;
auto identifier_def = x3::lexeme[x3::alpha >> *x3::alnum];
auto set_def = (get >> '=' >> assignment)[fold_in_get_to_set];
auto arguments_def = *(expression % ',');
auto factor_def = x3::string("*") | x3::string("/");
auto term_def = x3::string("+") | x3::string("-");
auto compare_op_def = x3::string("<=") | x3::string(">=") | x3::string("<") | x3::string(">");
auto equality_op_def = x3::string("!=") | x3::string("==");
auto compoundassign_op_def = x3::string("*=") | x3::string("/=") | x3::string("%=") | x3::string("+=")
| x3::string("-=") | x3::string("<<=") | x3::string(">>=")
| x3::string("&=") | x3::string("^=") | x3::string("|=");
auto bitwise_shift_op_def = x3::string(">>") | x3::string("<<");
//auto binary_def = unarycallwrapper[as_expr] >> *((x3::lit('/') >> unarycallwrapper[as_divide]) | (x3::lit('*') >> unarycallwrapper[as_multiply]));
auto assign_def = variable[get_string_from_variable] >> '=' >> assignment[fix_assignExpr];
auto compoundassign_def = variable[get_string_from_variable] >> compoundassign_op[as_compound_op] >> assignment[fix_assignExpr];
auto assignment_def = (assign | compoundassign | set) | ternary;
auto ternary_def = logical_or[as_expr] >> *('?' >> expression[as_ternary_ifexpr] >> ':' >> ternary[as_ternary_elseexpr]);
auto logical_or_def = logical_and[as_expr] >> *(x3::string("||")[as_logical_op] >> logical_and[as_logical_wrap]);
auto logical_and_def = bitwise_or[as_expr] >> *(x3::string("&&")[as_logical_op] >> bitwise_or[as_logical_wrap]);
auto bitwise_or_def = bitwise_xor[as_expr] >> *((x3::string("|") >> !(x3::lit('|') | x3::lit('=')))[as_bitwise_op] >> bitwise_xor[as_bitwise_wrap]);
auto bitwise_xor_def = bitwise_and[as_expr] >> *((x3::string("^") > !(x3::lit('^') | x3::lit('=')))[as_bitwise_op] >> bitwise_and[as_bitwise_wrap]);
auto bitwise_and_def = equality[as_expr] >> *((x3::string("&") >> !(x3::lit('&') | x3::lit('=')))[as_bitwise_op] >> equality[as_bitwise_wrap]);
auto equality_def = comparison[as_expr] >> *(equality_op[as_binary_op] >> comparison[as_binary_wrap]);
auto comparison_def = bitwise_shift[as_expr] >> *(compare_op[as_binary_op] >> bitwise_shift[as_binary_wrap]);
auto bitwise_shift_def = addition[as_expr] >> *(bitwise_shift_op[as_binary_op] >> addition[as_binary_wrap]);
auto addition_def = multiply[as_expr] >> *(term[as_binary_op] >> multiply[as_binary_wrap]);
auto multiply_def = unarycallwrapper[as_expr] >> *(factor[as_binary_op] >> unarycallwrapper[as_binary_wrap]);
auto unarycallwrapper_def = unary | call;
auto unary_def = (x3::string("-") >> unarycallwrapper);
auto get_def = primary[as_expr] >> *('.' >> identifier)[as_get];
auto call_def = primary[as_expr] >> *((x3::lit('(') >> arguments >> x3::lit(')'))[as_call] | ('.' >> identifier)[as_get]);
auto primary_def = variable;
auto program_def = x3::skip(x3::space)[expression];
BOOST_SPIRIT_DEFINE(primary, assign,
compoundassign, compoundassign_op, bitwise_or, bitwise_xor,
ternary,
bitwise_and, bitwise_shift, bitwise_shift_op,
logical_and, logical_or, equality_op,
equality, factor, compare_op, comparison,
term, addition, multiply, unary, unarycallwrapper,
assignment, get, set, variable, arguments, expression, call, identifier, program);
} // namespace hlsl::parser
int main()
{
using namespace hlsl;
for (std::string const input :
{
"first",
"first.second",
"first.Second.third",
"first.Second().third",
"first.Second(arg1).third",
"first.Second(arg1, arg2).third",
"first = second",
"first.second = third",
"first.second.third = fourth",
"first.second.third = fourth()",
"first.second.third = fourth(arg1)",
"this * that", // binary { var{"this"} "*" var{"that"} }
"this * -that", // binary { var{"this"} "*" unary{'-', var{"that"}} }
"this * that * there",
"this * that / there",
"this.inner * that * there.inner2",
"first + second",
"first + second * third",
"first < second",
"first <= second * third",
"first - second > third",
"first != second",
"first == second * third",
"first || second",
"first || second && third"
"first |= second",
"first |= second.third",
"first & second",
"first & second && third",
"first &= second && third",
"first << second && third",
"first ^ second",
"first ^ second ^^ third", //fails on purpose because this operator doesn't exist!
"zero |= first | second || third",
"first ? second : third",
"first > second ? third : fourth",
"first > second ? third : fourth > fifth ? sixth : seventh"
}) //
{
std::cout << "===== " << quoted(input) << "\n";
auto f = input.begin(), l = input.end();
// Our error handler
auto const p = x3::with<parser::eh_tag>(
x3::error_handler{f, l, std::cerr})[hlsl::parser::program];
if (hlsl::ast::Expr fs; parse(f, l, p, fs))
{
fs.apply_visitor(hlsl::printer{std::cout << "Parsed: "});
std::cout << "\n";
}
else
{
std::cout << "Parse failed at " << quoted(std::string(f, l)) << "\n";
}
}
}
What is the algorithm for developing a string parser to create a geometry? The geometry is generated in 2 steps: at the first step, we create primitives; at the second, we combine primitives into objects.
The syntax is presented in the string below.
string str="[GEOMETRY]
PRIMITIVE1=SPHERE(RADIUS=5.5);
PRIMITIVE2=BOX(A=-5.2, B=7.3);
//...
OBJECT1=PRIMITIVE2*(-PRIMITIVE1);
//..."
class PRIMITIVE{
int number;
public:
Primitive& operator+ (Primitive& primitive) {}; //overloading arithmetic operations
Primitive& operator* (Primitive& primitive) {};
Primitive& operator- (Primitive& primitive) {};
virtual bool check_in_point_inside_primitive = 0;
};
class SPHERE:public PRIMITIVE{
double m_radius;
public:
SPHERE(double radius): m_radius(radius) {}; //In which part of the parser to create objects?
bool check_in_point_inside_sphere(Point& point){};
};
class BOX:public PRIMITIVE{
double m_A;
double m_B;
public:
BOX(double A, double B): m_A(A), m_B(B) {};
bool check_in_point_inside_box(Point& point){};
};
class OBJECT{
int number;
PRIMITIVE& primitive;
public:
OBJECT(){};
bool check_in_point_inside_object(Primitive& PRIMITIVE1, Primitive& PRIMITIVE2, Point& point){
//>How to construct a function from an expression 'PRIMITIVE2*(-PRIMITIVE1)' when parsing?
}
};
How to analyze the string PRIMITIVE1=SPHERE(RADIUS=5.5) and pass a parameter to the constructor of SPHERE()? How to identify this object with the name PRIMITIVE 1 to call to it in OBJECT? Is it possible to create a pair<PRIMITIVE1,SPHERE(5.5)> and store all primitives in map?
How to parse the string of the OBJECT1 and to construct a function from an expression PRIMITIVE2*(-PRIMITIVE1) inside an OBJECT1? This expression will be required multiple times when determining the position of each point relative to the object.
How to use boost::spirit for this task? Tokenize a string using boost::spirit::lex, and then develop rules using boost::spirit::qi?
As a finger exercise, and despite the serious problems I see with the chosen virtual type hierarchy, let's try to make a value-oriented container of Primitives that can be indexed by their id (ById):
Live On Coliru
#include <boost/intrusive/set.hpp>
#include <boost/poly_collection/base_collection.hpp>
#include <iostream>
namespace bi = boost::intrusive;
struct Point {
};
using IndexHook = bi::set_member_hook<bi::link_mode<bi::auto_unlink>>;
class Primitive {
int _id;
public:
struct ById {
bool operator()(auto const&... oper) const { return std::less<>{}(access(oper)...); }
private:
static int access(int id) { return id; }
static int access(Primitive const& p) { return p._id; }
};
IndexHook _index;
Primitive(int id) : _id(id) {}
virtual ~Primitive() = default;
int id() const { return _id; }
Primitive& operator+= (Primitive const& primitive) { return *this; } //overloading arithmetic operations
Primitive& operator*= (Primitive const& primitive) { return *this; }
Primitive& operator-= (Primitive const& primitive) { return *this; }
virtual bool check_in_point_inside(Point const&) const = 0;
};
using Index =
bi::set<Primitive, bi::constant_time_size<false>,
bi::compare<Primitive::ById>,
bi::member_hook<Primitive, IndexHook, &Primitive::_index>>;
class Sphere : public Primitive {
double _radius;
public:
Sphere(int id, double radius)
: Primitive(id)
, _radius(radius) {} // In which part of the parser to create objects?
bool check_in_point_inside(Point const& point) const override { return false; }
};
class Box : public Primitive {
double _A;
double _B;
public:
Box(int id, double A, double B) : Primitive(id), _A(A), _B(B) {}
bool check_in_point_inside(Point const& point) const override { return false; }
};
class Object{
int _id;
Primitive& _primitive;
public:
Object(int id, Primitive& p) : _id(id), _primitive(p) {}
bool check_in_point_inside_object(Primitive const& p1, Primitive const& p2,
Point const& point) const
{
//>How to construct a function from an expression
//'PRIMITIVE2*(-PRIMITIVE1)' when parsing?
return false;
}
};
using Primitives = boost::poly_collection::base_collection<Primitive>;
int main() {
Primitives test;
test.insert(Sphere{2, 4.0});
test.insert(Sphere{4, 4.0});
test.insert(Box{2, 5, 6});
test.insert(Sphere{1, 4.0});
test.insert(Box{3, 5, 6});
Index idx;
for (auto& p : test)
if (not idx.insert(p).second)
std::cout << "Duplicate id " << p.id() << " not indexed\n";
for (auto& p : idx)
std::cout << typeid(p).name() << " " << p.id() << "\n";
std::cout << "---\n";
for (auto& p : test)
std::cout << typeid(p).name() << " " << p.id() << "\n";
}
Prints
Duplicate id 2 not indexed
6Sphere 1
3Box 2
3Box 3
6Sphere 4
---
3Box 2
3Box 3
6Sphere 2
6Sphere 4
6Sphere 1
So far so good. This is an important building block to prevent all manner of pain when dealing with virtual types in Spirit grammars¹
PS: I've since dropped the idea of intrusive_set. It doesn't work because the base_container moves items around on reallocation, and that unlinks the items from their intrusive set.
Instead, see below for an approach that doesn't try to resolve ids during the parse.
Parsing primitives
We get the ID from the PRIMITIVE1. We could store it somewhere before naturally parsing the primitives themselves, then set the id on it on commit.
Let's start with defining a State object for the parser:
struct State {
Ast::Id next_id;
Ast::Primitives primitives;
Ast::Objects objects;
template <typename... T> void commit(boost::variant<T...>& val) {
boost::apply_visitor([this](auto& obj) { commit(obj); }, val);
}
template <typename T> void commit(T& primitiveOrExpr) {
auto id = std::exchange(next_id, 0);
if constexpr (std::is_base_of_v<Ast::Primitive, T>) {
primitiveOrExpr.id = id;
primitives.insert(std::move(primitiveOrExpr));
} else {
objects.push_back(Ast::Object{id, std::move(primitiveOrExpr)});
}
}
};
As you can see, we just have a place to store the primitives, objects. And then there is the temporary storage for our next_id while we're still parsing the next entity.
The commit function helps sorting the products of the parser rules. As it happens, they can be variant, which is why we have the apply_visitor dispatch for commit on a variant.
Again, as the footnote¹ explains, Spirit's natural attribute synthesis favors static polymorphism.
The semantic actions we need are now:
static inline auto& state(auto& ctx) { return get<State>(ctx); }
auto draft = [](auto& ctx) { state(ctx).next_id = _attr(ctx); };
auto commit = [](auto& ctx) { state(ctx).commit(_attr(ctx)); };
Now let's jump ahead to the primitives:
auto sphere = as<Ast::Sphere>(eps >> "sphere" >>'(' >> param("radius") >> ')');
auto box = as<Ast::Box>(eps >> "box" >> '(' >> param('a') >> ',' >> param('b') >> ')');
auto primitive =
("primitive" >> uint_[draft] >> '=' >> (sphere | box)[commit]) > ';';
That's still cheating a little, as I've used the param helper to reduce typing:
auto number = as<Ast::Number>(double_, "number");
auto param(auto name, auto p) { return eps >> omit[name] >> '=' >> p; }
auto param(auto name) { return param(name, number); }
As you can see I've already assumed most parameters will have numerical nature.
What Are Objects Really?
Looking at it for a while, I concluded that really an Object is defined as an id number (OBJECT1, OBJECT2...) which is tied to an expression. The expression can reference primitives and have some unary and binary operators.
Let's sketch an AST for that:
using Number = double;
struct RefPrimitive { Id id; };
struct Binary;
struct Unary;
using Expr = boost::variant< //
Number, //
RefPrimitive, //
boost::recursive_wrapper<Unary>, //
boost::recursive_wrapper<Binary> //
>;
struct Unary { char op; Expr oper; };
struct Binary { Expr lhs; char op; Expr rhs; };
struct Object { Id id; Expr expr; };
Now To Parse Into That Expression AST
It's really 1:1 rules for each Ast node type. E.g.:
auto ref_prim = as<Ast::RefPrimitive>(lexeme["primitive" >> uint_]);
Now many of the expression rules can recurse, so we need declared rules with definitions via BOOST_SPIRIT_DEFINE:
// object expression grammar
rule<struct simple_tag, Ast::Expr> simple{"simple"};
rule<struct unary_tag, Ast::Unary> unary{"unary"};
rule<struct expr_tag, Ast::Expr> expr{"expr"};
rule<struct term_tag, Ast::Expr> term{"term"};
rule<struct factor_tag, Ast::Expr> factor{"factor"};
As you can tell, some of these are not 1:1 with the Ast nodes, mainly because of the recursion and the difference in operator precedence (term vs factor vs. simple). It's easier to see with the rule definition:
auto unary_def = char_("-+") >> simple;
auto simple_def = ref_prim | unary | '(' >> expr >> ")";
auto factor_def = simple;
auto term_def = factor[assign] >> *(char_("*/") >> term)[make_binary];
auto expr_def = term[assign] >> *(char_("-+") >> expr)[make_binary];
Because none of the rules actually expose a Binary, automatic attribute propagation is not convenient there². Instead, we use assign and make_binary semantic actions:
auto assign = [](auto& ctx) { _val(ctx) = _attr(ctx); };
auto make_binary = [](auto& ctx) {
using boost::fusion::at_c;
auto& attr = _attr(ctx);
auto op = at_c<0>(attr);
auto& rhs = at_c<1>(attr);
_val(ctx) = Ast::Binary { _val(ctx), op, rhs };
};
Finally, let's tie the defintions to the declared rules (using their tag types):
BOOST_SPIRIT_DEFINE(simple, unary, expr, term, factor)
All we need is a similar line to primitive:
auto object =
("object" >> uint_[draft] >> '=' >> (expr)[commit]) > ';';
And we can finish up by defining each line as a primitive|object:
auto line = primitive | object;
auto file = no_case[skip(ws_comment)[*eol >> "[geometry]" >> (-line % eol) >> eoi]];
At the top level we expect the [GEOMETRY] header, specify that we want to be case insensitive and ... that ws_comment is to be skipped³:
auto ws_comment = +(blank | lexeme["//" >> *(char_ - eol) >> eol]);
This allows us to ignore the // comments as well.
Live Demo Time
Live On Compiler Explorer
//#define BOOST_SPIRIT_X3_DEBUG
#include <boost/fusion/adapted.hpp>
#include <boost/poly_collection/base_collection.hpp>
#include <boost/spirit/home/x3.hpp>
#include <iostream>
#include <list>
#include <map>
namespace x3 = boost::spirit::x3;
namespace Ast {
using Id = uint32_t;
struct Point { }; // ?? where does this belong?
struct Primitive {
Id id;
virtual ~Primitive() = default;
};
struct Sphere : Primitive { double radius; };
struct Box : Primitive { double a, b; };
using Number = double;
struct RefPrimitive { Id id; };
struct Binary;
struct Unary;
using Expr = boost::variant< //
Number, //
RefPrimitive, //
boost::recursive_wrapper<Unary>, //
boost::recursive_wrapper<Binary> //
>;
struct Unary { char op; Expr oper; };
struct Binary { Expr lhs; char op; Expr rhs; };
struct Object { Id id; Expr expr; };
using Primitives = boost::poly_collection::base_collection<Primitive>;
using Objects = std::list<Object>;
using Index = std::map<Id, std::reference_wrapper<Primitive const>>;
std::ostream& operator<<(std::ostream& os, Primitive const& p) {
return os << boost::core::demangle(typeid(p).name()) << " "
<< "(id: " << p.id << ")";
}
std::ostream& operator<<(std::ostream& os, Object const& o) {
return os << "object(id:" << o.id << ", expr:" << o.expr << ")";
}
std::ostream& operator<<(std::ostream& os, RefPrimitive ref) {
return os << "reference(prim:" << ref.id << ")";
}
std::ostream& operator<<(std::ostream& os, Binary const& b) {
return os << '(' << b.lhs << b.op << b.rhs << ')';
}
std::ostream& operator<<(std::ostream& os, Unary const& u) {
return os << '(' << u.op << u.oper << ')';
}
} // namespace Ast
BOOST_FUSION_ADAPT_STRUCT(Ast::Primitive, id)
BOOST_FUSION_ADAPT_STRUCT(Ast::Sphere, radius)
BOOST_FUSION_ADAPT_STRUCT(Ast::Box, a, b)
BOOST_FUSION_ADAPT_STRUCT(Ast::Object, id)
BOOST_FUSION_ADAPT_STRUCT(Ast::RefPrimitive, id)
BOOST_FUSION_ADAPT_STRUCT(Ast::Unary, op, oper)
namespace Parser {
using namespace x3;
struct State {
Ast::Id next_id;
Ast::Primitives primitives;
Ast::Objects objects;
template <typename... T> void commit(boost::variant<T...>& val) {
boost::apply_visitor([this](auto& obj) { commit(obj); }, val);
}
template <typename T> void commit(T& val) {
auto id = std::exchange(next_id, 0);
if constexpr (std::is_base_of_v<Ast::Primitive, T>) {
val.id = id;
primitives.insert(std::move(val));
} else {
objects.push_back(Ast::Object{id, std::move(val)});
}
}
};
static inline auto& state(auto& ctx) { return get<State>(ctx); }
auto draft = [](auto& ctx) { state(ctx).next_id = _attr(ctx); };
auto commit = [](auto& ctx) { state(ctx).commit(_attr(ctx)); };
template <typename T>
auto as = [](auto p, char const* name = "as") {
return rule<struct _, T>{name} = p;
};
auto ws_comment = +(blank | lexeme["//" >> *(char_ - eol) >> (eol | eoi)]);
auto number = as<Ast::Number>(double_, "number");
auto param(auto name, auto p) { return eps >> omit[name] >> '=' >> p; }
auto param(auto name) { return param(name, number); }
auto sphere = as<Ast::Sphere>(eps >> "sphere" >>'(' >> param("radius") >> ')');
auto box = as<Ast::Box>(eps >> "box" >> '(' >> param('a') >> ',' >> param('b') >> ')');
auto primitive =
("primitive" >> uint_[draft] >> '=' >> (sphere | box)[commit]) > ';';
auto ref_prim = as<Ast::RefPrimitive>(lexeme["primitive" >> uint_], "ref_prim");
// object expression grammar
rule<struct simple_tag, Ast::Expr> simple{"simple"};
rule<struct unary_tag, Ast::Unary> unary{"unary"};
rule<struct expr_tag, Ast::Expr> expr{"expr"};
rule<struct term_tag, Ast::Expr> term{"term"};
rule<struct factor_tag, Ast::Expr> factor{"factor"};
auto assign = [](auto& ctx) { _val(ctx) = _attr(ctx); };
auto make_binary = [](auto& ctx) {
using boost::fusion::at_c;
auto& attr = _attr(ctx);
auto op = at_c<0>(attr);
auto& rhs = at_c<1>(attr);
_val(ctx) = Ast::Binary { _val(ctx), op, rhs };
};
auto unary_def = char_("-+") >> simple;
auto simple_def = ref_prim | unary | '(' >> expr >> ")";
auto factor_def = simple;
auto term_def = factor[assign] >> *(char_("*/") >> term)[make_binary];
auto expr_def = term[assign] >> *(char_("-+") >> expr)[make_binary];
BOOST_SPIRIT_DEFINE(simple, unary, expr, term, factor)
auto object =
("object" >> uint_[draft] >> '=' >> (expr)[commit]) > ';';
auto line = primitive | object;
auto file = no_case[skip(ws_comment)[*eol >> "[geometry]" >> (-line % eol) >> eoi]];
} // namespace Parser
int main() {
for (std::string const input :
{
R"(
[geometry]
primitive1=sphere(radius=5.5);
primitive2=box(a=-5.2, b=7.3);
//...
object1=primitive2*(-primitive1);
//...)",
R"(
[GEOMETRY]
PRIMITIVE1=SPHERE(RADIUS=5.5);
PRIMITIVE2=BOX(A=-5.2, B=7.3);
//...
OBJECT1=PRIMITIVE2*(-PRIMITIVE1);
//...)",
}) //
{
Parser::State state;
bool ok = parse(begin(input), end(input),
x3::with<Parser::State>(state)[Parser::file]);
std::cout << "Parse success? " << std::boolalpha << ok << "\n";
Ast::Index index;
for (auto& p : state.primitives)
if (auto[it,ok] = index.emplace(p.id, p); not ok) {
std::cout << "Duplicate id " << p
<< " (conflicts with existing " << it->second.get()
<< ")\n";
}
std::cout << "Primitives by ID:\n";
for (auto& [id, prim] : index)
std::cout << " - " << prim << "\n";
std::cout << "Objects in definition order:\n";
for (auto& obj: state.objects)
std::cout << " - " << obj << "\n";
}
}
Prints
Parse success? true
Primitives by ID:
- Ast::Sphere (id: 1)
- Ast::Box (id: 2)
Objects in definition order:
- object(id:1, expr:(reference(prim:2)*(-reference(prim:1))))
Parse success? true
Primitives by ID:
- Ast::Sphere (id: 1)
- Ast::Box (id: 2)
Objects in definition order:
- object(id:1, expr:(reference(prim:2)*(-reference(prim:1))))
¹ How can I use polymorphic attributes with boost::spirit::qi parsers?
² and insisting on that leads to classical in-efficiency with rules that cause a lot of backtracking
³ outside of lexemes
I have a following code to evaluate a Boolean string based on an string input.
The code supposed to work like this:
Boolean string: "((0|1)&3);"
Sting input: "101"
how's it working? each character in the input string is supposed to be substituted by corresponding character in Boolean string.
for example:
1 in the input string by 0 in Boolean string
0 in the input string by 1 in Boolean string
1 in the input string by 3 in Boolean string
I know it is confusing, My problem is that the code was used to work for many cases, but I don't understand why it is not working for above example.
I added the live version for editing here.
#include <iostream>
#include <fstream>
#include <vector>
#include <boost/lexical_cast.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/variant/recursive_wrapper.hpp>
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;
struct op_or {};
struct op_and {};
struct op_not {};
typedef std::string var;
template <typename tag> struct binop;
template <typename tag> struct unop;
typedef boost::variant<var,
boost::recursive_wrapper<unop <op_not> >,
boost::recursive_wrapper<binop<op_and> >,
boost::recursive_wrapper<binop<op_or> >
> expr;
template <typename tag> struct binop
{
explicit binop(const expr& l, const expr& r) : oper1(l), oper2(r) { }
expr oper1, oper2;
};
template <typename tag> struct unop
{
explicit unop(const expr& o) : oper1(o) { }
expr oper1;
};
struct eval2 : boost::static_visitor<bool>
{
eval2(const std::string& pk): pkey(pk) { iter = 0; }
//
bool operator()(const var& v) const
{
std:: cout << "**** " << v << "\titer: " << iter << std::endl;
iter ++;
return boost::lexical_cast<bool>(pkey[iter-1]);
}
bool operator()(const binop<op_and>& b) const
{
return recurse(b.oper1) && recurse(b.oper2);
}
bool operator()(const binop<op_or>& b) const
{
return recurse(b.oper1) || recurse(b.oper2);
}
bool operator()(const unop<op_not>& u) const
{
return !recurse(u.oper1);
}
private:
mutable int iter;
const std::string pkey;
template<typename T>
bool recurse(T const& v) const
{ return boost::apply_visitor(*this, v); }
};
struct printer : boost::static_visitor<void>
{
printer(std::ostream& os) : _os(os) {}
std::ostream& _os;
//
void operator()(const var& v) const { _os << v; }
void operator()(const binop<op_and>& b) const { print(" & ", b.oper1, b.oper2); }
void operator()(const binop<op_or >& b) const { print(" | ", b.oper1, b.oper2); }
void print(const std::string& op, const expr& l, const expr& r) const
{
_os << "(";
boost::apply_visitor(*this, l);
_os << op;
boost::apply_visitor(*this, r);
_os << ")";
}
void operator()(const unop<op_not>& u) const
{
_os << "(";
_os << "!";
boost::apply_visitor(*this, u.oper1);
_os << ")";
}
};
bool evaluate2(const expr& e, const std::string s)
{
return boost::apply_visitor(eval2(s), e);
}
std::ostream& operator<<(std::ostream& os, const expr& e)
{ boost::apply_visitor(printer(os), e); return os; }
template <typename It, typename Skipper = qi::space_type>
struct parser : qi::grammar<It, expr(), Skipper>
{
parser() : parser::base_type(expr_)
{
using namespace qi;
expr_ = or_.alias();
or_ = (and_ >> '|' >> or_ ) [ qi::_val = phx::construct<binop<op_or > >(qi::_1, qi::_2) ] | and_ [ qi::_val = qi::_1 ];
and_ = (not_ >> '&' >> and_) [ qi::_val = phx::construct<binop<op_and> >(qi::_1, qi::_2) ] | not_ [ qi::_val = qi::_1 ];
not_ = ('!' > simple ) [ qi::_val = phx::construct<unop <op_not> >(qi::_1) ] | simple [ qi::_val = qi::_1 ];
simple = (('(' > expr_ > ')') | var_);
var_ = qi::lexeme[ +(alpha|digit) ];
BOOST_SPIRIT_DEBUG_NODE(expr_);
BOOST_SPIRIT_DEBUG_NODE(or_);
BOOST_SPIRIT_DEBUG_NODE(and_);
BOOST_SPIRIT_DEBUG_NODE(not_);
BOOST_SPIRIT_DEBUG_NODE(simple);
BOOST_SPIRIT_DEBUG_NODE(var_);
}
private:
qi::rule<It, var() , Skipper> var_;
qi::rule<It, expr(), Skipper> not_, and_, or_, simple, expr_;
};
bool string2BooleanExe(std::string bStatement, std::string bKey)
{
typedef std::string::const_iterator It;
It f(bStatement.begin()), l(bStatement.end());
parser<It> p;
try
{
expr result;
bool ok = qi::phrase_parse(f,l,p > ';',qi::space,result);
if (!ok)
std::cerr << "invalid input\n";
else
{
std::cout << "result:\t" << result << "\n";
bool returnResult = evaluate2(result, bKey);
std::cout << "evaluated:\t" << returnResult << "\n";
return returnResult;
}
} catch (const qi::expectation_failure<It>& e)
{
std::cerr << "expectation_failure at '" << std::string(e.first, e.last) << "'\n";
}
if (f!=l) std::cerr << "unparsed: '" << std::string(f,l) << "'\n";
return false;
}
int main()
{
bool res = string2BooleanExe("((0|1)&3);", "101");
std::cout << "res: " << res << std::endl;
return 0;
}
Please note I can only use C++03.
So you want variables. And they are implicit.
And you denote them with integers in the expression. Yes, that's confusing, but why not, I guess.
The grammar suggests that variables could be any length of alphanumeric characters, though. Let's do this, and fix the sample to be:
bool res = string2BooleanExe("((a|b)&c);", {
{ "a", true }, { "b", false }, { "c", true } }); // was: 101
Now in your implementation there are two big problems:
you are using names 0, 1, 2 for the placeholders in the source expression but these are ignored (this means that ((0|1)&2) is functionally equivalent to ((1|2)&0)... I doubt that's what anyone wanted)
your eval2¹ visitor is stateful. You need to pass and use it by reference if you're going to retain state. Alternatively, make sure your copy constructor actually copies the value of iter
Here's my take on things, using
typedef std::map<std::string, bool> VarMap;
Let's use it in the evaluator visitor:
struct evaluator : boost::static_visitor<bool>
{
evaluator(VarMap const& pk) : pk(pk) { }
bool operator()(const var& v) const { return pk.at(v); }
bool operator()(const binop<op_and>& b) const { return recurse(b.oper1) && recurse(b.oper2); }
bool operator()(const binop<op_or>& b) const { return recurse(b.oper1) || recurse(b.oper2); }
bool operator()(const unop<op_not>& u) const { return !recurse(u.oper1); }
private:
template<typename T> bool recurse(T const& v) const { return boost::apply_visitor(*this, v); }
const VarMap pk;
};
Splitting the evaluate and parse functions:
static const parser<std::string::const_iterator> s_parser_instance;
expr parse(std::string const& bStatement) {
std::string::const_iterator f = bStatement.begin(), l = bStatement.end();
expr parsed;
qi::parse(f, l, s_parser_instance, parsed);
return parsed;
}
bool evaluate(expr const& e, VarMap const& vars) {
return boost::apply_visitor(evaluator(vars), e);
}
Now let's see the full demo
Full Demo
Live On Coliru
//#define BOOST_SPIRIT_DEBUG
#include <iostream>
#include <fstream>
#include <vector>
#include <boost/lexical_cast.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/variant/recursive_wrapper.hpp>
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;
typedef std::map<std::string, bool> VarMap;
struct op_or {};
struct op_and {};
struct op_not {};
typedef std::string var;
template <typename tag> struct binop;
template <typename tag> struct unop;
typedef boost::variant<var,
boost::recursive_wrapper<unop <op_not> >,
boost::recursive_wrapper<binop<op_and> >,
boost::recursive_wrapper<binop<op_or> >
> expr;
template <typename tag> struct binop {
explicit binop(const expr& l, const expr& r) : oper1(l), oper2(r) { }
expr oper1, oper2;
};
template <typename tag> struct unop {
explicit unop(const expr& o) : oper1(o) { }
expr oper1;
};
struct evaluator : boost::static_visitor<bool>
{
evaluator(VarMap const& pk) : pk(pk) { }
bool operator()(const var& v) const { return pk.at(v); }
bool operator()(const binop<op_and>& b) const { return recurse(b.oper1) && recurse(b.oper2); }
bool operator()(const binop<op_or>& b) const { return recurse(b.oper1) || recurse(b.oper2); }
bool operator()(const unop<op_not>& u) const { return !recurse(u.oper1); }
private:
template<typename T> bool recurse(T const& v) const { return boost::apply_visitor(*this, v); }
const VarMap pk;
};
struct printer : boost::static_visitor<void>
{
printer(std::ostream& os) : _os(os) {}
std::ostream& _os;
//
void operator()(const var& v) const { _os << v; }
void operator()(const binop<op_and>& b) const { print(" & ", b.oper1, b.oper2); }
void operator()(const binop<op_or >& b) const { print(" | ", b.oper1, b.oper2); }
void print(const std::string& op, const expr& l, const expr& r) const
{
_os << "(";
boost::apply_visitor(*this, l);
_os << op;
boost::apply_visitor(*this, r);
_os << ")";
}
void operator()(const unop<op_not>& u) const
{
_os << "(";
_os << "!";
boost::apply_visitor(*this, u.oper1);
_os << ")";
}
};
std::ostream& operator<<(std::ostream& os, const expr& e)
{ boost::apply_visitor(printer(os), e); return os; }
template <typename It>
struct parser : qi::grammar<It, expr()>
{
parser() : parser::base_type(start) {
using namespace qi;
start = skip(space) [expr_ > ';' > eoi];
expr_ = or_.alias();
or_ = (and_ >> '|' >> or_ ) [ _val = phx::construct<binop<op_or > >(_1, _2) ] | and_ [ _val = _1 ];
and_ = (not_ >> '&' >> and_) [ _val = phx::construct<binop<op_and> >(_1, _2) ] | not_ [ _val = _1 ];
not_ = ('!' > simple ) [ _val = phx::construct<unop <op_not> >(_1) ] | simple [ _val = _1 ];
simple = ('(' > expr_ > ')') | var_;
var_ = lexeme[ +(alpha|digit) ];
BOOST_SPIRIT_DEBUG_NODES((expr_) (or_) (and_) (not_) (simple) (var_));
}
private:
qi::rule<It, expr()> start;
qi::rule<It, var() , qi::space_type> var_;
qi::rule<It, expr(), qi::space_type> not_, and_, or_, simple, expr_;
};
static const parser<std::string::const_iterator> s_parser_instance;
expr parse(std::string const& bStatement) {
std::string::const_iterator f = bStatement.begin(), l = bStatement.end();
expr parsed;
qi::parse(f, l, s_parser_instance, parsed);
return parsed;
}
bool evaluate(expr const& e, VarMap const& vars) {
return boost::apply_visitor(evaluator(vars), e);
}
void test(std::string const& expression, VarMap const& vars, bool expected) {
try {
std::cout << "'" << expression << "'";
expr parsed = parse(expression);
std::cout << " -> " << parsed;
bool actual = evaluate(parsed, vars);
std::cout
<< " - evaluates to " << std::boolalpha << actual
<< (expected == actual? " Correct." : " INCORRECT!!!")
<< "\n";
} catch(std::exception const& e) {
std::cout << " EXCEPTION(" << e.what() << ")\n";
}
}
int main() {
VarMap vars;
vars["a"] = true;
vars["b"] = false;
vars["c"] = true;
test("a;", vars, true);
test("b;", vars, false);
test("c;", vars, true);
test("((a|b)&c);", vars, true);
vars["c"] = false;
test("((a|b)&c);", vars, false);
// let's use an undefined variable - should throw
test("((z|y)&x);", vars, false|true);
// you CAN still use confusing numeric placeholders:
vars["0"] = true;
vars["1"] = false;
vars["2"] = true;
test("((0|1)&2);", vars, true);
test("((2|0)&1);", vars, false);
test("((1|0)&2);", vars, true);
// note you can also have "special variables"; no need for single-letter names
vars["TRUE"] = true;
vars["FALSE"] = false;
test("TRUE | FALSE;", vars, true);
test("TRUE & FALSE;", vars, false);
}
Prints:
'a;' -> a - evaluates to true Correct.
'b;' -> b - evaluates to false Correct.
'c;' -> c - evaluates to true Correct.
'((a|b)&c);' -> ((a | b) & c) - evaluates to true Correct.
'((a|b)&c);' -> ((a | b) & c) - evaluates to false Correct.
'((z|y)&x);' -> ((z | y) & x) EXCEPTION(map::at)
'((0|1)&2);' -> ((0 | 1) & 2) - evaluates to true Correct.
'((2|0)&1);' -> ((2 | 0) & 1) - evaluates to false Correct.
'((1|0)&2);' -> ((1 | 0) & 2) - evaluates to true Correct.
'TRUE | FALSE;' -> (TRUE | FALSE) - evaluates to true Correct.
'TRUE & FALSE;' -> (TRUE & FALSE) - evaluates to false Correct.
¹ FIX BAD NAMING. Also, single-responsibility. Make a parse function and an evaluate function. Put ';' and the skipper inside the grammar. Check for qi::eoi inside the grammar. Propagate exceptions instead of doing magic console output inside your parse/evaluate function.
I'm going to implement a CNF generator in C++, using Boots/Spirit. but after finish "the order of precedence" and "eliminating equivalences & implications" these two parts, I can't figure out how to implement "move NOTs inwards" and "distribute ORs inwards over ANDs".
Desired output is documented here:
https://en.wikipedia.org/wiki/Conjunctive_normal_form
Here are more detail description below:
The order of precedence:
NOT > AND > OR > IMP > IFF
Input example:
A iff B imp C
Now the output is:
(A or not ( not B or C)) and ( not A or ( not B or C))
And the code( I implement output at printer part ):
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/variant/recursive_wrapper.hpp>
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;
// Abstract data type
struct op_or {};
struct op_and {};
struct op_imp {};
struct op_iff {};
struct op_not {};
typedef std::string var;
template <typename tag> struct binop;
template <typename tag> struct unop;
typedef boost::variant<var,
boost::recursive_wrapper<unop <op_not> >,
boost::recursive_wrapper<binop<op_and> >,
boost::recursive_wrapper<binop<op_or> >,
boost::recursive_wrapper<binop<op_imp> >,
boost::recursive_wrapper<binop<op_iff> >
> expr;
template <typename tag> struct binop
{
explicit binop(const expr& l, const expr& r) : oper1(l), oper2(r) { }
expr oper1, oper2;
};
template <typename tag> struct unop
{
explicit unop(const expr& o) : oper1(o) { }
expr oper1;
};
// Operating on the syntax tree
struct printer : boost::static_visitor<void>
{
printer(std::ostream& os) : _os(os) {}
std::ostream& _os;
//
void operator()(const var& v) const { _os << v; }
void operator()(const binop<op_and>& b) const { print(" and ", b.oper1, b.oper2); }
void operator()(const binop<op_or >& b) const { print(" or ", b.oper1, b.oper2); }
void operator()(const binop<op_iff>& b) const { eliminate_iff(b.oper1, b.oper2); }
void operator()(const binop<op_imp>& b) const { eliminate_imp(b.oper1, b.oper2); }
void print(const std::string& op, const expr& l, const expr& r) const
{
_os << "(";
boost::apply_visitor(*this, l);
_os << op;
boost::apply_visitor(*this, r);
_os << ")";
}
void operator()(const unop<op_not>& u) const
{
_os << "( not ";
boost::apply_visitor(*this, u.oper1);
_os << ")";
}
void eliminate_iff(const expr& l, const expr& r) const
{
_os << "(";
boost::apply_visitor(*this, l);
_os << " or not ";
boost::apply_visitor(*this, r);
_os << ") and ( not ";
boost::apply_visitor(*this, l);
_os << " or ";
boost::apply_visitor(*this, r);
_os << ")";
}
void eliminate_imp(const expr& l, const expr& r) const
{
_os << "( not ";
boost::apply_visitor(*this, l);
_os << " or ";
boost::apply_visitor(*this, r);
_os << ")";
}
};
std::ostream& operator<<(std::ostream& os, const expr& e)
{ boost::apply_visitor(printer(os), e); return os; }
// Grammar rules
template <typename It, typename Skipper = qi::space_type>
struct parser : qi::grammar<It, expr(), Skipper>
{
parser() : parser::base_type(expr_)
{
using namespace qi;
expr_ = iff_.alias();
iff_ = (imp_ >> "iff" >> iff_) [ _val = phx::construct<binop<op_iff>>(_1, _2) ] | imp_ [ _val = _1 ];
imp_ = (or_ >> "imp" >> imp_) [ _val = phx::construct<binop<op_imp>>(_1, _2) ] | or_ [ _val = _1 ];
or_ = (and_ >> "or" >> or_ ) [ _val = phx::construct<binop<op_or >>(_1, _2) ] | and_ [ _val = _1 ];
and_ = (not_ >> "and" >> and_) [ _val = phx::construct<binop<op_and>>(_1, _2) ] | not_ [ _val = _1 ];
not_ = ("not" > simple ) [ _val = phx::construct<unop <op_not>>(_1) ] | simple [ _val = _1 ];
simple = (('(' > expr_ > ')') | var_);
var_ = qi::lexeme[ +alpha ];
BOOST_SPIRIT_DEBUG_NODE(expr_);
BOOST_SPIRIT_DEBUG_NODE(iff_);
BOOST_SPIRIT_DEBUG_NODE(imp_);
BOOST_SPIRIT_DEBUG_NODE(or_);
BOOST_SPIRIT_DEBUG_NODE(and_);
BOOST_SPIRIT_DEBUG_NODE(not_);
BOOST_SPIRIT_DEBUG_NODE(simple);
BOOST_SPIRIT_DEBUG_NODE(var_);
}
private:
qi::rule<It, var() , Skipper> var_;
qi::rule<It, expr(), Skipper> not_, and_, or_, imp_, iff_, simple, expr_;
};
// Test some examples in main and check the order of precedence
int main()
{
for (auto& input : std::list<std::string> {
// Test the order of precedence
"(a and b) imp ((c and d) or (a and b));",
"a and b iff (c and d or a and b);",
"a and b imp (c and d or a and b);",
"not a or not b;",
"a or b;",
"not a and b;",
"not (a and b);",
"a or b or c;",
"aaa imp bbb iff ccc;",
"aaa iff bbb imp ccc;",
// Test elimination of equivalences
"a iff b;",
"a iff b or c;",
"a or b iff b;",
"a iff b iff c;",
// Test elimination of implications
"p imp q;",
"p imp not q;",
"not p imp not q;",
"p imp q and r;",
"p imp q imp r;",
})
{
auto f(std::begin(input)), l(std::end(input));
parser<decltype(f)> p;
try
{
expr result;
bool ok = qi::phrase_parse(f,l,p > ';',qi::space,result);
if (!ok)
std::cerr << "invalid input\n";
else
std::cout << "result: " << result << "\n";
} catch (const qi::expectation_failure<decltype(f)>& e)
{
std::cerr << "expectation_failure at '" << std::string(e.first, e.last) << "'\n";
}
if (f!=l) std::cerr << "unparsed: '" << std::string(f,l) << "'\n";
}
return 0;
}
Compiling command:
clang++ -std=c++11 -stdlib=libc++ -Weverything CNF_generator.cpp
Moving NOT inward should be done before distributing OR across AND:
!(A AND B) ==> (!A OR !B)
!(A OR B) ==> (!A AND !B)
remember to cancel any !!X that occurs while doing that.
Also drop redundant ( )
OR distributes across AND:
A OR (B AND C) ==> (A OR B) AND (A OR C)
You Probably need to reduce some other redundancies that will creep in as you do all that, such as (X OR X)
(A ornot( not B or C)) and ( not A or ( not B or C)) ==>
(A or (notnot B andnotC)) and ( not A or(not B or C)) ==>
(Aor( B and not C)) and ( not A or not B or C) ==>
((AorB) and (Aornot C))and ( not A or not B or C) ==>
(A or B) and (A or not C) and ( not A or not B or C)
Maybe I misunderstood your question and you already understood all the above transformations, and you are having trouble with the mechanics of doing that inside the structure you have created.
You certainly have made things hard for yourself (maybe impossible) by trying to accomplish all the transformations inside the print routine. I would have parsed, then transformed, then printed.
If you insist on transforming in the print routine, then you likely miss some simplifications and you need print to be more aware of the rules of CNF. An AND node can simply print its two sides recursively with AND in between. But any other node most first inspect its children and conditionally transform enough to pull an AND up to the top before recursively calling.
You had:
void eliminate_iff(const expr& l, const expr& r) const
{
_os << "(";
boost::apply_visitor(*this, l);
_os << " or not ";
boost::apply_visitor(*this, r);
_os << ") and ( not ";
boost::apply_visitor(*this, l);
_os << " or ";
boost::apply_visitor(*this, r);
_os << ")";
}
But you can't recurse all the way into l or r from iff and you can't directly generate any "not" or "or" text until you have recursively reached the bottom. So with the mis design of transforming while printing, the iff routine would need to generate a temp object representing (l or not r) and then call the or processing routine to handle it, then output "AND" then create a temp object representing (not l or r) and call the or processing routine to handle it.
Similarly, the or processing routine would need to look at each operand. If each is simply a final variable or not of a final variable, or can simply emit itself to the stream. But if any operand is more complicated, or must do something more complicated.
In addition to doing transformation before you start printing, there are a couple other things you might change in order to make the code simpler:
First, you could avoid a lot of trouble by having or and and objects each hold a std::set of any number of operands, rather than a pair of operands. The big cost of that is you need a decent comparison function for the objects. But the pay back is worth the trouble of having a comparison function.
Next, you might consider having a single type for all subexpressions, rather than having a type for each operator. So each object must store an operator and a std::set of operands. There are some pretty big and obvious disadvantages to that design choice, but there is one big advantage: A subexpression can transform itself into a different kind.
The more common subexpression transformation scheme (which might still be best, just consider alternatives) is for the owner of a subexpression to ask the subexpression to conditionally generate a transformed clone of itself. That is more efficient than having objects able to directly transform themselves. But getting the coding details right requires more thought.
Another good choice for this grammar is to do all the transformations while parsing. More complicated problems really deserve the full split of parse, transform, print. But in this case transform fits beautifully into parsing if you think through your factory function:
The factory takes an operator and one (for NOT) or two subexpressions that are already CNF. It produces a new CNF expression:
AND:
a) Both inputs are AND's, form the union of their sets.
b) One input is an AND, insert the other input into that one's set.
c) Neither input is an AND, create a new AND with those two inputs.
OR:
a) Both inputs are OR's, form the union of their sets.
b) One input is an OR and the other is primitive or NOT, insert the other input into the OR's set.
c) At least one input is an AND, distribute the other input across that AND (the distribute function must handle the ugly sub cases).
NOT:
Inversion of a primitive is trivial. Inversion of a NOT is trivial. Inversion of an OR is pretty trivial. Inversion of an AND is the ugliest thing in this whole design (you need to turn the whole thing inside out) but is doable. To keep your sanity, you could forget about efficiency and use the factory recursively for the NOT and OR operations that a NOT AND trivially transforms to (but which need further transformation to get back to CNF).
IFF and IMP: Just make the appropriate several calls to the basic factories.
Inspired by what little I know about Boost.Proto I've tried to modify your code to allow for independent ast transformations. This approach uses 4 passes (eliminate_iff, eliminate_imp, distribute_nots and distribute_ors) and in each one it rebuilds the ast. There may be a way to do the same in a single pass, probably with better performance, but I think that approach would be (even) harder to understand.
Explanation of the changes:
The first change is a little gratuitous but I really think that all the phx::construct...s make the grammar harder to read. The grammar I use is:
iff_ = as_iff[imp_ >> "iff" >> iff_] | imp_;
imp_ = as_imp[or_ >> "imp" >> imp_] | or_;
or_ = as_or[and_ >> "or" >> or_] | and_;
and_ = as_and[not_ >> "and" >> and_] | not_;
not_ = as_not["not" > simple] | simple;
In order to be able to use this you need to adapt unop and binop using BOOST_FUSION_ADAPT_TPL_STRUCT and declare as_xxx as:
const as<binop<op_xxx>> as_xxx={};
If you don't like this change your original grammar should also work (if you add a using namespace ast;).
I've put everything related to the AST inside namespace ast and made a few additions:
enum class expr_type: the order of its enumerators needs to be kept in synch with the parameters in the variant. It is used to check whether one of a node's children has a particular type.
get_expr_type: simply returns what is the type of the expression.
printer: now it just prints the expression passed, without making any transformation. Maybe it could be changed to be smarter about the placing of parentheses.
operators !, && and ||: they are used to make the rebuilding of the AST easier.
And finally the transformations. Every transformation uses ast_helper<Transformation> as its base. This struct has several reused member functions:
pass_through: creates a node of the same type that has as members, the result of transforming the original members.
recurse: applies the transformation to the current node.
left: gets the first member of a node independently of the type of the node. Gets used in the more complex transformations to slightly help with readability.
child0: exactly the same as left, but the name makes more sense in unary nodes.
right: gets the second member of a node.
eliminate_imp :
This one is really easy:
If you get a binop<op_imp> return !p || q. Where p and q are the result of applying the transformation to the first and second operands respectively.
If you get anything else return a node of the same kind applying the transformation to its operands(pass_through).
eliminate_iff :
It's basically the same, changing binop<op_iff> with (p || !q)&&(!p || q).
distribute_nots :
If you get anything that is not a unop<op_not> simply pass_through.
If you get a unop<op_not>, first check the type of its operand:
If it's an and, substitute with !p || !q.
If it's an or, substitute with !p && !q.
If it's a not, substitute with p.
distribute_ors :
If it's anything but an or, pass_through.
If it's an or:
Check whether its first operand is an and. If it is distribute the ors and apply the transformation again in case another or->and is there.
Check whether its second operand is an and. Do the analogous work.
If neither direct child is an and, check recursively if there is any and in the subtree starting with this node. If there is it'll end up floating to the top so we'll need to recurse on the pass_through.
If there isn't any and in the subtree, it is already in CNF and simply pass_through.
Running on Ideone
Full Code:
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/variant/recursive_wrapper.hpp>
namespace qi = boost::spirit::qi;
// Abstract data type
struct op_or {};
struct op_and {};
struct op_imp {};
struct op_iff {};
struct op_not {};
namespace ast
{
typedef std::string var;
template <typename tag> struct binop;
template <typename tag> struct unop;
enum class expr_type { var = 0, not_, and_, or_, imp, iff };
typedef boost::variant<var,
boost::recursive_wrapper<unop <op_not> >,
boost::recursive_wrapper<binop<op_and> >,
boost::recursive_wrapper<binop<op_or> >,
boost::recursive_wrapper<binop<op_imp> >,
boost::recursive_wrapper<binop<op_iff> >
> expr;
expr_type get_expr_type(const expr& expression)
{
return static_cast<expr_type>(expression.which());
}
template <typename tag> struct binop
{
expr oper1, oper2;
};
template <typename tag> struct unop
{
expr oper1;
};
struct printer : boost::static_visitor<void>
{
printer(std::ostream& os) : _os(os) {}
std::ostream& _os;
mutable bool first{ true };
//
void operator()(const ast::var& v) const { _os << v; }
void operator()(const ast::binop<op_and>& b) const { print(" and ", b.oper1, b.oper2); }
void operator()(const ast::binop<op_or>& b) const { print(" or ", b.oper1, b.oper2); }
void operator()(const ast::binop<op_iff>& b) const { print(" iff ", b.oper1, b.oper2); }
void operator()(const ast::binop<op_imp>& b) const { print(" imp ", b.oper1, b.oper2); }
void print(const std::string& op, const ast::expr& l, const ast::expr& r) const
{
_os << "(";
boost::apply_visitor(*this, l);
_os << op;
boost::apply_visitor(*this, r);
_os << ")";
}
void operator()(const ast::unop<op_not>& u) const
{
_os << "not(";
boost::apply_visitor(*this, u.oper1);
_os << ")";
}
};
std::ostream& operator<<(std::ostream& os, const expr& e)
{
boost::apply_visitor(printer(os), e); return os;
}
expr operator!(const expr& e)
{
return unop<op_not>{e};
}
expr operator||(const expr& l, const expr& r)
{
return binop<op_or>{l, r};
}
expr operator&&(const expr& l, const expr& r)
{
return binop<op_and>{l, r};
}
}
BOOST_FUSION_ADAPT_TPL_STRUCT(
(Tag),
(ast::binop) (Tag),
(ast::expr, oper1)
(ast::expr, oper2)
)
BOOST_FUSION_ADAPT_TPL_STRUCT(
(Tag),
(ast::unop) (Tag),
(ast::expr, oper1)
)
// Grammar rules
template <typename It, typename Skipper = qi::space_type>
struct parser : qi::grammar<It, ast::expr(), Skipper>
{
parser() : parser::base_type(expr_)
{
using namespace qi;
const as<ast::binop<op_iff> > as_iff = {};
const as<ast::binop<op_imp> > as_imp = {};
const as<ast::binop<op_or> > as_or = {};
const as<ast::binop<op_and> > as_and = {};
const as<ast::unop<op_not> > as_not = {};
expr_ = iff_.alias();
iff_ = as_iff[imp_ >> "iff" >> iff_] | imp_;
imp_ = as_imp[or_ >> "imp" >> imp_] | or_;
or_ = as_or[and_ >> "or" >> or_] | and_;
and_ = as_and[not_ >> "and" >> and_] | not_;
not_ = as_not["not" > simple] | simple;
simple = (('(' > expr_ > ')') | var_);
var_ = qi::lexeme[+alpha];
BOOST_SPIRIT_DEBUG_NODE(expr_);
BOOST_SPIRIT_DEBUG_NODE(iff_);
BOOST_SPIRIT_DEBUG_NODE(imp_);
BOOST_SPIRIT_DEBUG_NODE(or_);
BOOST_SPIRIT_DEBUG_NODE(and_);
BOOST_SPIRIT_DEBUG_NODE(not_);
BOOST_SPIRIT_DEBUG_NODE(simple);
BOOST_SPIRIT_DEBUG_NODE(var_);
}
private:
qi::rule<It, ast::var(), Skipper> var_;
qi::rule<It, ast::expr(), Skipper> not_, and_, or_, imp_, iff_, simple, expr_;
};
template <typename Transform>
struct ast_helper : boost::static_visitor<ast::expr>
{
template <typename Tag>
ast::expr pass_through(const ast::binop<Tag>& op) const
{
return ast::binop<Tag>{recurse(op.oper1), recurse(op.oper2)};
}
template <typename Tag>
ast::expr pass_through(const ast::unop<Tag>& op) const
{
return ast::unop<Tag>{recurse(op.oper1)};
}
ast::expr pass_through(const ast::var& variable) const
{
return variable;
}
ast::expr recurse(const ast::expr& expression) const
{
return boost::apply_visitor(Transform{}, expression);
}
struct left_getter:boost::static_visitor<ast::expr>
{
template< template<class> class Op,typename Tag>
ast::expr operator()(const Op<Tag>& op) const
{
return op.oper1;
}
ast::expr operator()(const ast::var&) const
{
return{};//throw something?
}
};
ast::expr left(const ast::expr& expression) const
{
return boost::apply_visitor(left_getter{}, expression);
}
ast::expr child0(const ast::expr& expression) const
{
return left(expression);
}
struct right_getter :boost::static_visitor<ast::expr>
{
template<typename Tag>
ast::expr operator()(const ast::binop<Tag>& op) const
{
return op.oper2;
}
template<typename Expr>
ast::expr operator()(const Expr&) const
{
return{};//throw something?
}
};
ast::expr right(const ast::expr& expression) const
{
return boost::apply_visitor(right_getter{}, expression);
}
};
struct eliminate_imp : ast_helper<eliminate_imp>
{
template <typename Op>
ast::expr operator()(const Op& op) const
{
return pass_through(op);
}
ast::expr operator()(const ast::binop<op_imp>& imp) const
{
return !recurse(imp.oper1) || recurse(imp.oper2);
}
ast::expr operator()(const ast::expr& expression) const
{
return recurse(expression);
}
};
struct eliminate_iff : ast_helper<eliminate_iff>
{
template <typename Op>
ast::expr operator()(const Op& op) const
{
return pass_through(op);
}
ast::expr operator()(const ast::binop<op_iff>& imp) const
{
return (recurse(imp.oper1) || !recurse(imp.oper2)) && (!recurse(imp.oper1) || recurse(imp.oper2));
}
ast::expr operator()(const ast::expr& expression) const
{
return recurse(expression);
}
};
struct distribute_nots : ast_helper<distribute_nots>
{
template <typename Op>
ast::expr operator()(const Op& op) const
{
return pass_through(op);
}
ast::expr operator()(const ast::unop<op_not>& not_) const
{
switch (ast::get_expr_type(not_.oper1)) //There is probably a better solution
{
case ast::expr_type::and_:
return recurse(!recurse(left(not_.oper1))) || recurse(!recurse(right(not_.oper1)));
case ast::expr_type::or_:
return recurse(!recurse(left(not_.oper1))) && recurse(!recurse(right(not_.oper1)));
case ast::expr_type::not_:
return recurse(child0(not_.oper1));
default:
return pass_through(not_);
}
}
ast::expr operator()(const ast::expr& expression) const
{
return recurse(expression);
}
};
struct any_and_inside : boost::static_visitor<bool>
{
any_and_inside(const ast::expr& expression) :expression(expression) {}
const ast::expr& expression;
bool operator()(const ast::var&) const
{
return false;
}
template <typename Tag>
bool operator()(const ast::binop<Tag>& op) const
{
return boost::apply_visitor(*this, op.oper1) || boost::apply_visitor(*this, op.oper2);
}
bool operator()(const ast::binop<op_and>&) const
{
return true;
}
template<typename Tag>
bool operator()(const ast::unop<Tag>& op) const
{
return boost::apply_visitor(*this, op.oper1);
}
explicit operator bool() const
{
return boost::apply_visitor(*this, expression);
}
};
struct distribute_ors : ast_helper<distribute_ors>
{
template <typename Op>
ast::expr operator()(const Op& op) const
{
return pass_through(op);
}
ast::expr operator()(const ast::binop<op_or>& or_) const
{
if (ast::get_expr_type(or_.oper1) == ast::expr_type::and_)
{
return recurse(recurse(left(or_.oper1)) || recurse(or_.oper2))
&& recurse(recurse(right(or_.oper1)) || recurse(or_.oper2));
}
else if (ast::get_expr_type(or_.oper2) == ast::expr_type::and_)
{
return recurse(recurse(or_.oper1) || recurse(left(or_.oper2)))
&& recurse(recurse(or_.oper1) || recurse(right(or_.oper2)));
}
else if (any_and_inside( or_ ))
{
return recurse(recurse(or_.oper1) || recurse(or_.oper2));
}
else
{
return pass_through(or_);
}
}
ast::expr operator()(const ast::expr& expression) const
{
return recurse(expression);
}
};
ast::expr to_CNF(const ast::expr& expression)
{
return distribute_ors()(distribute_nots()(eliminate_iff()(eliminate_imp()(expression))));
}
// Test some examples in main and check the order of precedence
int main()
{
for (auto& input : std::list<std::string>{
// Test the order of precedence
"(a and b) imp ((c and d) or (a and b));",
"a and b iff (c and d or a and b);",
"a and b imp (c and d or a and b);",
"not a or not b;",
"a or b;",
"not a and b;",
"not (a and b);",
"a or b or c;",
"aaa imp bbb iff ccc;",
"aaa iff bbb imp ccc;",
// Test elimination of equivalences
"a iff b;",
"a iff b or c;",
"a or b iff b;",
"a iff b iff c;",
// Test elimination of implications
"p imp q;",
"p imp not q;",
"not p imp not q;",
"p imp q and r;",
"p imp q imp r;"
})
{
auto f(std::begin(input)), l(std::end(input));
parser<decltype(f)> p;
try
{
ast::expr result;
bool ok = qi::phrase_parse(f, l, p > ';', qi::space, result);
if (!ok)
std::cerr << "invalid input\n";
else
{
std::cout << "original: " << result << "\n";
std::cout << "CNF: " << to_CNF(result) << "\n";
}
}
catch (const qi::expectation_failure<decltype(f)>& e)
{
std::cerr << "expectation_failure at '" << std::string(e.first, e.last) << "'\n";
}
if (f != l) std::cerr << "unparsed: '" << std::string(f, l) << "'\n";
}
return 0;
}