Say I have a (simplified) recursive grammar like this:
OrExpr := AndExpr % "or"
AndExpr := Term % "and"
Term := ParenExpr | String
ParenExpr := '(' >> OrExpr >> ')'
String := lexeme['"' >> *(char_ - '"') >> '"']
So this works, but the problem is that it will wrap everything in multiple layers of expression. For example, the string "hello" and ("world" or "planet" or "globe") would parse as OrExpr(AndExpr("hello", OrExpr(AndExpr("world"), AndExpr("planet"), AndExpr("globe")))) (playing fast and loose with the syntax, but hopefully you understand). What I'd like is for the one-element nodes to be collapsed into their parent, so it would end up as AndExpr("hello", OrExpr("world", "parent", "globe"))
This can be solved with actions and using a state machine that only constructs the outer object if there's more than one child inside it. But I'm wondering if there's a way to fix this problem without using parser actions?
EDIT: Almost minimal example
Coliru
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <iostream>
namespace x3 = boost::spirit::x3;
namespace burningmime::setmatch::ast
{
// an expression node (either an AND or an OR)
struct Expr;
// child of an expression -- either another expression, or a terminal
struct Node : x3::variant<std::string, x3::forward_ast<Expr>>
{
using base_type::base_type;
using base_type::operator=;
};
// tags for expression type
enum OPER
{
OPER_AND = 1,
OPER_OR = 2
};
// see above
struct Expr
{
OPER op;
std::vector<Node> children;
};
// for debugging purposes; this will print all the expressions
struct AstPrinter
{
void operator()(const Expr& node) const
{
std::cout << (node.op == OPER_AND ? "And(" : "Or(");
bool first = true;
for(const auto& child : node.children)
{
if(!first) std::cout << ", ";
first = false;
boost::apply_visitor(*this, child);
}
std::cout << ")";
}
void operator()(const std::string& node) const
{
std::cout << node;
}
};
}
// these need to be at top-level scope
// basically this adds compile-time type information, so the parser knows where to put various attributes
BOOST_FUSION_ADAPT_STRUCT(burningmime::setmatch::ast::Expr, op, children)
#define DECLARE_RULE(NAME, TYPE) static const x3::rule<class NAME, TYPE> NAME = #NAME;
#define KEYWORD(X) static const auto kw_##X = x3::no_case[#X];
#define DEFINE_RULE(NAME, GRAMMAR) \
static const auto NAME##_def = GRAMMAR; \
BOOST_SPIRIT_DEFINE(NAME)
namespace burningmime::setmatch::parser
{
// we need to pre-declare the rules so they can be used recursively
DECLARE_RULE(Phrase, std::string)
DECLARE_RULE(Term, ast::Node)
DECLARE_RULE(AndExpr, ast::Expr)
DECLARE_RULE(OrExpr, ast::Expr)
DECLARE_RULE(ParenExpr, ast::Expr)
// keywords
KEYWORD(and)
KEYWORD(or)
static const auto lparen = x3::lit('(');
static const auto rparen = x3::lit(')');
// helper parsers
static const auto keywords = kw_and | kw_or | lparen | rparen;
static const auto word = x3::lexeme[+(x3::char_ - x3::ascii::space - lparen - rparen)];
static const auto bareWord = word - keywords;
static const auto quotedString = x3::lexeme[x3::char_('"') >> *(x3::char_ - '"') >> x3::char_('"')];
DEFINE_RULE(Phrase, quotedString | bareWord)
DEFINE_RULE(Term, ParenExpr | Phrase)
DEFINE_RULE(ParenExpr, lparen >> OrExpr >> rparen)
DEFINE_RULE(AndExpr, x3::attr(ast::OPER_AND) >> (Term % kw_and))
DEFINE_RULE(OrExpr, x3::attr(ast::OPER_OR) >> (AndExpr % kw_or))
}
namespace burningmime::setmatch
{
void parseRuleFluent(const char* buf)
{
ast::Expr root;
auto start = buf, end = start + strlen(buf);
bool success = x3::phrase_parse(start, end, parser::OrExpr, x3::ascii::space, root);
if(!success || start != end)
throw std::runtime_error(std::string("Could not parse rule: ") + buf);
printf("Result of parsing: %s\n=========================\n", start);
ast::Node root2(root);
boost::apply_visitor(ast::AstPrinter(), root2);
}
}
int main()
{
burningmime::setmatch::parseRuleFluent(R"#("hello" and ("world" or "planet" or "globe"))#");
}
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <iostream>
namespace x3 = boost::spirit::x3;
namespace burningmime::setmatch::ast
{
// an expression node (either an AND or an OR)
struct Expr;
// child of an expression -- either another expression, or a terminal
struct Node : x3::variant<std::string, x3::forward_ast<Expr>>
{
using base_type::base_type;
using base_type::operator=;
};
// tags for expression type
enum OPER
{
OPER_AND = 1,
OPER_OR = 2
};
// see above
struct Expr
{
OPER op;
std::vector<Node> children;
};
// for debugging purposes; this will print all the expressions
struct AstPrinter
{
void operator()(const Expr& node) const
{
std::cout << (node.op == OPER_AND ? "And(" : "Or(");
bool first = true;
for(const auto& child : node.children)
{
if(!first) std::cout << ", ";
first = false;
boost::apply_visitor(*this, child);
}
std::cout << ")";
}
void operator()(const std::string& node) const
{
std::cout << node;
}
};
}
// these need to be at top-level scope
// basically this adds compile-time type information, so the parser knows where to put various attributes
BOOST_FUSION_ADAPT_STRUCT(burningmime::setmatch::ast::Expr, op, children)
#define DECLARE_RULE(NAME, TYPE) static const x3::rule<class NAME##_r, TYPE> NAME = #NAME;
#define KEYWORD(X) static const auto kw_##X = x3::no_case[#X];
#define DEFINE_RULE(NAME, GRAMMAR) \
static const auto NAME##_def = GRAMMAR; \
BOOST_SPIRIT_DEFINE(NAME)
namespace burningmime::setmatch::parser
{
// we need to pre-declare the rules so they can be used recursively
DECLARE_RULE(Phrase, std::string)
DECLARE_RULE(Term, ast::Node)
DECLARE_RULE(AndExpr, ast::Node)
DECLARE_RULE(OrExpr, ast::Node)
DECLARE_RULE(ParenExpr, ast::Node)
// keywords
KEYWORD(and)
KEYWORD(or)
static const auto lparen = x3::lit('(');
static const auto rparen = x3::lit(')');
// helper parsers
static const auto keywords = kw_and | kw_or | lparen | rparen;
static const auto word = x3::lexeme[+(x3::char_ - x3::ascii::space - lparen - rparen)];
static const auto bareWord = word - keywords;
static const auto quotedString = x3::lexeme[x3::char_('"') >> *(x3::char_ - '"') >> x3::char_('"')];
DEFINE_RULE(Phrase, quotedString | bareWord)
DEFINE_RULE(Term, ParenExpr | Phrase)
DEFINE_RULE(ParenExpr, lparen >> OrExpr >> rparen)
template <ast::OPER Op>
struct make_node
{
template <typename Context >
void operator()(Context const& ctx) const
{
if (_attr(ctx).size() == 1)
_val(ctx) = std::move(_attr(ctx)[0]);
else
_val(ctx) = ast::Expr{ Op, std::move(_attr(ctx)) };
}
};
DEFINE_RULE(AndExpr, (Term % kw_and)[make_node<ast::OPER_AND>{}])
DEFINE_RULE(OrExpr, (AndExpr % kw_or)[make_node<ast::OPER_OR>{}])
}
namespace burningmime::setmatch
{
void parseRuleFluent(const char* buf)
{
ast::Node root;
auto start = buf, end = start + strlen(buf);
bool success = x3::phrase_parse(start, end, parser::OrExpr, x3::ascii::space, root);
if (!success || start != end)
throw std::runtime_error(std::string("Could not parse rule: ") + buf);
printf("Result of parsing: %s\n=========================\n", start);
boost::apply_visitor(ast::AstPrinter(), root);
}
}
int main()
{
burningmime::setmatch::parseRuleFluent(R"#("hello" and ("world" or "planet" or "globe"))#");
}
https://wandbox.org/permlink/kMSHOHG0pgwGr0zv
Output:
Result of parsing:
=========================
And("hello", Or("world", "planet", "globe"))
Related
My ultimate goal is to write a hlsl shading language parser. My first experience with parsing has been by following bob nystrom's "crafting interpreters".
The issue I am currently facing is that I am trying to parse a 'chained member access' sequence (or multiple 'dot operators)....
first.Second.third
Obviously I could parse that into a list % sequence as a vector of strings, but I am trying to stick to the ast shown in the crafting interpreters book by having nested 'Get' nodes.
I am trying to parse this nested Get sequence so that I can eventually put that into a Set ast node. But I thought it would be best to at least get the 'Get' part first. before building on top of that.
https://craftinginterpreters.com/classes.html#set-expressions
Here's my minimal compiling program that tries to do that....
#include "boost/variant.hpp"
#include <boost/config/warning_disable.hpp>
#include <boost/fusion/adapted/std_tuple.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>
#include <boost/fusion/include/std_tuple.hpp>
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/position_tagged.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
#include <boost/spirit/home/x3/support/utility/annotate_on_success.hpp>
#include <boost/spirit/home/x3/support/utility/error_reporting.hpp>
#include <iostream>
#include <string>
#include <tuple>
#include <variant>
namespace hlsl {
namespace ast {
struct Get;
struct ExprVoidType {};
struct Variable {
Variable(std::string name) : name(std::move(name)) {
}
Variable() = default;
std::string name;
};
using Expr =
boost::spirit::x3::variant<ExprVoidType,
boost::spirit::x3::forward_ast<Get>, Variable>;
struct Get {
Get(Expr& object, std::string name) : object_{object}, name_{name} {
}
Get() = default;
Expr object_;
std::string name_;
};
} // namespace ast
} // namespace hlsl
struct visitor {
using result_type = void;
void operator()(const std::string name) {
std::cout << name << "\n";
}
void operator()(const hlsl::ast::Get& get) {
std::cout << "get expr\n";
get.object_.apply_visitor(*this);
std::cout << get.name_ << "\n";
}
void operator()(const hlsl::ast::Variable& var) {
std::cout << var.name << "\n";
};
void operator()(const hlsl::ast::ExprVoidType& var){};
};
BOOST_FUSION_ADAPT_STRUCT(hlsl::ast::Variable, name)
BOOST_FUSION_ADAPT_STRUCT(hlsl::ast::Get, object_, name_)
namespace x3 = boost::spirit::x3;
namespace ascii = boost::spirit::x3::ascii;
using ascii::char_;
using ascii::space;
using x3::alnum;
using x3::alpha;
using x3::double_;
using x3::int_;
using x3::lexeme;
using x3::lit;
struct error_handler {
template <typename Iterator, typename Exception, typename Context>
x3::error_handler_result on_error(Iterator& first, Iterator const& last,
Exception const& x,
Context const& context) {
auto& error_handler = x3::get<x3::error_handler_tag>(context).get();
std::string message = "Error! Expecting: " + x.which() + " here:";
error_handler(x.where(), message);
return x3::error_handler_result::fail;
}
};
/////////////////////////////////////////
// RULES
///////////////////////////////////////////
x3::rule<class identifier_class, std::string> const identifier = "identifier";
auto const identifier_def = +alnum;
BOOST_SPIRIT_DEFINE(identifier);
x3::rule<class expression_class, hlsl::ast::Expr> const expression =
"expression";
x3::rule<class variable_class, hlsl::ast::Variable> const variable = "variable";
x3::rule<class get_class, hlsl::ast::Get> const get = "get";
auto const variable_def = identifier;
BOOST_SPIRIT_DEFINE(variable);
auto const expression_def = get | variable;
BOOST_SPIRIT_DEFINE(expression);
///////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////
// get
auto const get_def = (variable | expression) >> '.' >> identifier;
BOOST_SPIRIT_DEFINE(get);
/////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////
struct program_class;
x3::rule<program_class, hlsl::ast::Expr> const program = "program";
auto const program_def = get;
BOOST_SPIRIT_DEFINE(program);
struct program_class : error_handler {};
// struct program_class;
/////////////////////////////////////////////////////////
// } // namespace parser
// } // namespace client
////////////////////////////////////////////////////////////////////////////
// Main program
////////////////////////////////////////////////////////////////////////////
int main() {
using boost::spirit::x3::error_handler_tag;
using boost::spirit::x3::with;
using iterator_type = std::string::const_iterator;
using error_handler_type = boost::spirit::x3::error_handler<iterator_type>;
// input string
std::string input = "first.Second.third";
hlsl::ast::Expr fs;
auto iter = input.begin();
auto const end = input.end();
// Our error handler
error_handler_type error_handler(iter, end, std::cerr);
auto const parser =
// we pass our error handler to the parser so we can access
// it later in our on_error and on_sucess handlers
with<error_handler_tag>(std::ref(error_handler))[program];
bool r;
r = phrase_parse(iter, end, parser, space, fs);
visitor v;
if (r) {
std::cout << "Parse Suceeded\n\n";
fs.apply_visitor(v);
} else {
std::cout << "Sorry :(\n\n";
std::cout << *iter;
}
std::cout << "Bye... :-) \n\n";
return 0;
}
What I want is something like this
Get {
object_: Get {
object_: Variable {
name : "first"
},
name_: second
},
name_: third
}
Is this kind of thing even possible using x3 and the way it constructs parsers from grammar?
Sure. Your grammar parses left-to right, and that's also how you want to build your ast (outside-in, not inside out).
I'd rephrase the whole thing:
expression = variable >> *('.' >> identifier);
Now you'll have to massage the attribute propagation as each . member access wraps the previous expression in another Get{expression, name} instance:
x3::rule<struct identifier_, std::string> const identifier{"identifier"};
x3::rule<struct variable_, ast::Variable> const variable{"variable"};
x3::rule<struct expression_, ast::Expr> const expression{"expression"};
x3::rule<struct program_, ast::Expr> const program{"program"};
auto identifier_def = x3::lexeme[x3::alpha >> *x3::alnum];
auto variable_def = identifier;
Now let's use two semantic actions to propagate the expression parts:
auto as_expr = [](auto& ctx) { _val(ctx) = ast::Expr(std::move(_attr(ctx))); };
auto as_get = [](auto& ctx) {
_val(ctx) = ast::Get{std::move(_val(ctx)), _attr(ctx)};
};
auto expression_def = variable[as_expr] >> *('.' >> identifier[as_get]);
Let's also bake the skipper into the grammar while we're at it:
auto program_def = x3::skip(x3::space)[expression];
Live Demo
With a lot of simplifications, e.g. for the AST & visitor:
Live On Coliru
#include <boost/fusion/adapted.hpp>
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
#include <boost/spirit/home/x3/support/utility/error_reporting.hpp>
#include <iomanip>
#include <iostream>
namespace x3 = boost::spirit::x3;
namespace hlsl {
namespace ast {
struct Void {};
struct Get;
struct Variable {
std::string name;
};
using Expr = x3::variant<Void, x3::forward_ast<Get>, Variable>;
struct Get {
Expr object_;
std::string property_;
};
} // namespace ast
struct printer {
std::ostream& _os;
using result_type = void;
void operator()(hlsl::ast::Get const& get) const {
_os << "get { object_:";
get.object_.apply_visitor(*this);
_os << ", property_:" << quoted(get.property_) << " }";
}
void operator()(hlsl::ast::Variable const& var) const {
_os << "var{" << quoted(var.name) << "}";
};
void operator()(hlsl::ast::Void const&) const { _os << "void{}"; };
};
} // namespace hlsl
BOOST_FUSION_ADAPT_STRUCT(hlsl::ast::Variable, name)
BOOST_FUSION_ADAPT_STRUCT(hlsl::ast::Get, object_, property_)
namespace hlsl::parser {
struct eh_tag;
struct error_handler {
template <typename It, typename Exc, typename Ctx>
auto on_error(It&, It, Exc const& x, Ctx const& context) const {
x3::get<eh_tag>(context)( //
x.where(), "Error! Expecting: " + x.which() + " here:");
return x3::error_handler_result::fail;
}
};
struct program_ : error_handler {};
x3::rule<struct identifier_, std::string> const identifier{"identifier"};
x3::rule<struct variable_, ast::Variable> const variable{"variable"};
x3::rule<struct expression_, ast::Expr> const expression{"expression"};
x3::rule<struct program_, ast::Expr> const program{"program"};
auto as_expr = [](auto& ctx) { _val(ctx) = ast::Expr(std::move(_attr(ctx))); };
auto as_get = [](auto& ctx) {
_val(ctx) = ast::Get{std::move(_val(ctx)), _attr(ctx)};
};
auto identifier_def = x3::lexeme[x3::alpha >> *x3::alnum];
auto variable_def = identifier;
auto expression_def = variable[as_expr] >> *('.' >> identifier)[as_get];
auto program_def = x3::skip(x3::space)[expression];
BOOST_SPIRIT_DEFINE(variable, expression, identifier, program);
} // namespace hlsl::parser
int main() {
using namespace hlsl;
for (std::string const input :
{
"first",
"first.second",
"first.Second.third",
}) //
{
std::cout << "===== " << quoted(input) << "\n";
auto f = input.begin(), l = input.end();
// Our error handler
auto const p = x3::with<parser::eh_tag>(
x3::error_handler{f, l, std::cerr})[hlsl::parser::program];
if (hlsl::ast::Expr fs; parse(f, l, p, fs)) {
fs.apply_visitor(hlsl::printer{std::cout << "Parsed: "});
std::cout << "\n";
} else {
std::cout << "Parse failed at " << quoted(std::string_view(f, l)) << "\n";
}
}
}
Prints
===== "first"
Parsed: var{"first"}
===== "first.second"
Parsed: get { object_:var{"first"}, property_:"second" }
===== "first.Second.third"
Parsed: get { object_:get { object_:var{"first"}, property_:"Second" }, property_:"third" }
More Simplifications
In the current scenario none of the rules are recursive, so don't need the _DEFINE magic. Assuming you need recursion in the expression later, you could at least remove some redundancy:
namespace hlsl::parser {
x3::rule<struct expression_, ast::Expr> const expression{"expression"};
auto as_expr = [](auto& ctx) { _val(ctx) = ast::Expr(std::move(_attr(ctx))); };
auto as_get = [](auto& ctx) { _val(ctx) = ast::Get{std::move(_val(ctx)), _attr(ctx)}; };
auto identifier
= x3::rule<void, std::string>{"identifier"}
= x3::lexeme[x3::alpha >> *x3::alnum];
auto variable = x3::rule<void, ast::Variable>{"variable"} = identifier;
auto expression_def = variable[as_expr] >> *('.' >> identifier)[as_get];
auto program = x3::skip(x3::space)[expression];
BOOST_SPIRIT_DEFINE(expression)
} // namespace hlsl::parser
Note also that the lexeme is important to suppress skipping (Boost spirit skipper issues)
See it Live On Coliru as well.
Oh and for bonus, a version without x3::variant or visitation:
Live On Coliru
#include <boost/fusion/adapted.hpp>
#include <boost/spirit/home/x3.hpp>
#include <iomanip>
#include <iostream>
namespace x3 = boost::spirit::x3;
namespace hlsl::ast {
struct Void {};
struct Get;
struct Variable {
std::string name;
};
using Expr = boost::variant<Void, boost::recursive_wrapper<Get>, Variable>;
struct Get {
Expr object_;
std::string property_;
};
static inline std::ostream& operator<<(std::ostream& os, Void) {
return os << "void()";
}
static inline std::ostream& operator<<(std::ostream& os, Variable const& v) {
return os << "var{" << std::quoted(v.name) << "}";
}
static inline std::ostream& operator<<(std::ostream& os, Get const& g) {
return os << "get{ object_:" << g.object_ << ", property_:" << quoted(g.property_)
<< " }";
}
} // namespace hlsl::ast
BOOST_FUSION_ADAPT_STRUCT(hlsl::ast::Variable, name)
BOOST_FUSION_ADAPT_STRUCT(hlsl::ast::Get, object_, property_)
namespace hlsl::parser {
x3::rule<struct expression_, ast::Expr> const expression{"expression"};
auto as_expr = [](auto& ctx) { _val(ctx) = ast::Expr(std::move(_attr(ctx))); };
auto as_get = [](auto& ctx) { _val(ctx) = ast::Get{std::move(_val(ctx)), _attr(ctx)}; };
auto identifier
= x3::rule<void, std::string>{"identifier"}
= x3::lexeme[x3::alpha >> *x3::alnum];
auto variable = x3::rule<void, ast::Variable>{"variable"} = identifier;
auto expression_def = variable[as_expr] >> *('.' >> identifier)[as_get];
auto program = x3::skip(x3::space)[expression];
BOOST_SPIRIT_DEFINE(expression)
} // namespace hlsl::parser
int main() {
using namespace hlsl;
for (std::string const input :
{
"first",
"first.second",
"first.Second.third",
}) //
{
std::cout << "===== " << quoted(input) << "\n";
auto f = input.begin(), l = input.end();
if (ast::Expr fs; parse(f, l, parser::program, fs)) {
std::cout << "Parsed: " << fs << "\n";
} else {
std::cout << "Parse failed at " << quoted(std::string_view(f, l)) << "\n";
}
}
}
Prints just the same:
===== "first"
Parsed: var{"first"}
===== "first.second"
Parsed: get{ object_:var{"first"}, property_:"second" }
===== "first.Second.third"
Parsed: get{ object_:get{ object_:var{"first"}, property_:"Second" }, property_:"third" }
That's >100 lines of code removed. With no functionality sacrificed.
Buon giorno,
I have to parse something such as:
foo: 123
"bar": 456
The quotes should be removed if they are here. I tried:
((+x3::alnum) | ('"' >> (+x3::alnum) >> '"'))
But the parser actions for this are of type variant<string, string> ; is there a way to make it so that the parser understands that those two are equivalent, and for my action to only get a single std::string as argument in its call?
edit: minimal repro (live on godbolt: https://gcc.godbolt.org/z/GcE8Pj4r5) :
#include <boost/spirit/home/x3.hpp>
using namespace boost::spirit;
// action handlers
struct handlers {
void create_member(const std::string& str) { }
};
// rules
static const x3::rule<struct id_obj_mem> obj_mem = "obj_mem";
#define EVENT(e) ([](auto& ctx) { x3::get<handlers>(ctx).e(x3::_attr(ctx)); })
static const auto obj_mem_def = ((
((+x3::alnum) | ('"' >> (+x3::alnum) >> '"'))
>> ':' >> x3::lit("123"))[EVENT(create_member)] % ',');
BOOST_SPIRIT_DEFINE(obj_mem)
// execution
int main()
{
handlers r;
std::string str = "foo: 123";
auto first = str.begin();
auto last = str.end();
bool res = phrase_parse(
first,
last,
boost::spirit::x3::with<handlers>(r)[obj_mem_def],
boost::spirit::x3::ascii::space);
}
I too consider this a kind of defect. X3 is definitely less "friendly" in terms of the synthesized attribute types. I guess it's just a tacit side-effect of being more core-language oriented, where attribute assignment is effectively done via default "visitor" actions.
Although I understand the value of keeping the magic to a minimum, and staying close to "pure C++", I vastly prefer the Qi way of synthesizing attributes here. I believe it has proven a hard problem to fix, as this problem has been coming/going in some iterations of X3.
I've long decided to basically fix it myself with variations of this idiom:
template <typename T> struct as_type {
auto operator()(auto p) const { return x3::rule<struct Tag, T>{} = p; }
};
static constexpr as_type<std::string> as_string{};
Now I'd write that as:
auto quoted = '"' >> +x3::alnum >> '"';
auto name = as_string(+x3::alnum | quoted);
auto prop = (name >> ':' >> "123")[EVENT(create_member)] % ',';
That will compile no problem:
Live On Coliru
#include <boost/spirit/home/x3.hpp>
#include <iomanip>
#include <iostream>
namespace x3 = boost::spirit::x3;
struct handlers {
void create_member(std::string const& str) {
std::cerr << __FUNCTION__ << " " << std::quoted(str) << "\n";
}
};
namespace Parser {
#define EVENT(e) ([](auto& ctx) { get<handlers>(ctx).e(_attr(ctx)); })
template <typename T> struct as_type {
auto operator()(auto p) const { return x3::rule<struct Tag, T>{} = p; }
};
static constexpr as_type<std::string> as_string{};
auto quoted = '"' >> +x3::alnum >> '"';
auto name = as_string(+x3::alnum | quoted);
auto prop = (name >> ':' >> "123")[EVENT(create_member)] % ',';
auto grammar = x3::skip(x3::space)[prop];
} // namespace Parser
int main() {
handlers r;
std::string const str = "foo: 123";
auto first = str.begin(), last = str.end();
bool res = parse(first, last, x3::with<handlers>(r)[Parser::grammar]);
return res ? 1 : 0;
}
Prints
create_member "foo"
Interesting Links
Spirit X3, How to get attribute type to match rule type?
Combining rules at runtime and returning rules
spirit x3 cannot propagate attributes of type optional<vector>
etc.
I write a minimum example to demonstrate this problem. It parses nested list of numbers like (1 2 3 (4 5) (6 (7 (8)))). I use spirit::lex to parse number and spirit::qi to parse list, so I code like this:
using TokenTypes = boost::mpl::vector<Object*>;
using Iterator = std::string::iterator;
class Lexer : public lex::lexer<actor_lexer<token<Iterator, TokenTypes>>>
{
public:
lex::token_def<> spaces; // used to skip spaces
lex::token_def<Object*> number; // create Number Object on heap and use the pointer as attribute
public:
Lexer();
};
template<typename... Ts>
using Rule = qi::rule<Lexer::iterator_type, Ts...>;
class Parser : public qi::grammar<Lexer::iterator_type, Object*>
{
public:
Lexer lexer;
Rule<Object*> list;
Rule<Object*> elem;
public:
Parser();
};
But in Parser::Parser(), I can't use Lexer::number in gramma expression:
Parser::Parser()
: base_type(elem)
{
// list = ...
elem %= list | lexer.number; // fail to compile!
}
Clang error message (brief):
/usr/include/boost/spirit/home/qi/detail/assign_to.hpp:42:36: error: type 'Object *' cannot be used prior to '::' because it has no members
: is_iter_range<typename C::value_type> {};
^
...
...
...
I can't understand why this is wrong considering it used to work fine when I use other scalar types like int and double as token attribute.
So, how to use pointer type as token attribute?
complete example
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/qi.hpp>
#include <iostream>
#include <string>
#include <vector>
class Object
{
public:
virtual ~Object() = default;
public:
virtual void print(std::ostream& out) = 0;
};
class Number : public Object
{
public:
int64_t _val;
public:
virtual void print(std::ostream& out) override { out << _val; }
};
class List : public Object
{
public:
std::vector<Object*> _objs;
public:
virtual void print(std::ostream& out) override
{
out << '(';
for (auto&& i : _objs) {
i->print(out);
out << ' ';
}
out << ')';
}
};
namespace qi = boost::spirit::qi;
namespace fu = boost::fusion;
namespace lex = boost::spirit::lex;
using lex::lexertl::actor_lexer;
using lex::lexertl::token;
using TokenTypes = boost::mpl::vector<Object*>;
using Iterator = std::string::iterator;
class Lexer : public lex::lexer<actor_lexer<token<Iterator, TokenTypes>>>
{
public:
lex::token_def<> spaces;
lex::token_def<Object*> number;
public:
Lexer();
};
template<typename... Ts>
using Rule = qi::rule<Lexer::iterator_type, Ts...>;
class Parser : public qi::grammar<Lexer::iterator_type, Object*>
{
public:
Lexer lexer;
Rule<Object*, qi::locals<List*>> list;
Rule<Object*> elem;
public:
Parser();
};
Lexer::Lexer()
{
self += '(';
self += ')';
spaces = R"(\s+)";
self +=
spaces[([](auto& start, auto& end, auto& matched, auto& id, auto& ctx) {
matched = lex::pass_flags::pass_ignore;
})];
number = R"(\d+)";
self +=
number[([](auto& start, auto& end, auto& matched, auto& id, auto& ctx) {
auto val = new Number();
auto iter = start;
qi::parse(iter, end, qi::long_long, val->_val);
ctx.set_value(val);
})];
}
Parser::Parser()
: base_type(elem)
{
list = ( //
qi::lit('(')[( //
[](auto& attr, auto& ctx, bool& pass) {
fu::at_c<0>(ctx.locals) = new List();
})] //
>> *(elem[( //
[](auto& attr, auto& ctx, bool& pass) {
List* list = fu::at_c<0>(ctx.locals);
list->_objs.push_back(attr);
})]) //
>> ')' //
)[( //
[](auto& attr, auto& ctx, bool& pass) {
List* list = fu::at_c<0>(ctx.locals);
fu::at_c<0>(ctx.attributes) = list;
})];
elem %= list | lexer.number;
}
int
main(int argc, char* argv[])
{
Parser parser;
std::string line;
while (std::getline(std::cin, line)) {
auto begin = line.begin();
Object* obj;
lex::tokenize_and_parse(begin, line.end(), parser.lexer, parser, obj);
obj->print(std::cout);
std::cout << std::endl;
}
}
Okay. Don't take this badly. Reading your sample (kudos for including a self-contained example! This saves a ton of time) I can't help but feeling that you've somehow stumbled on the worst possible cross-section of anti-patterns in Spirit Qi.
You're using a polymorphic AST:
How can I use polymorphic attributes with boost::spirit::qi parsers?
Semantic actions runs multiple times in boost::spirit parsing
Parsing inherited struct with boost spirit
You're using semantic actions. As a rule this already misses the sweet spot for embedded grammars, which is why I linked 126 answers to Boost Spirit: "Semantic actions are evil"?.
However, that's even just talking about semantic actions for Qi. You also use them for Lex:
self +=
spaces[([](auto& start, auto& end, auto& matched, auto& id,
auto& ctx) { matched = lex::pass_flags::pass_ignore; })];
Which is then further complicated by not using Phoenix, e.g.:
self += spaces[lex::_pass = lex::pass_flags::pass_ignore];
Which does exactly the same but with about 870% less noise and equal amounts of evil magic.
The other semantic action tops it all:
self += number[(
[](auto& start, auto& end, auto& matched, auto& id, auto& ctx) {
auto val = new Number();
auto iter = start;
qi::parse(iter, end, qi::long_long, val->_val);
ctx.set_value(val);
})];
Besides having all the problems already listed, it literally makes a fractal out of things by calling Qi from a Lex semantic action. Of course, this wants to be:
self += number[lex::_val = phx::new_<Number>(/*magic*/)];
But that magic doesn't exist. My gut feeling is that your issue that the Lexer shouldn't be concerned with AST types at all. At this point I feel that the lexer could/should be something like
using TokenTypes = boost::mpl::vector<uint64_t>;
using Iterator = std::string::const_iterator; // NOTE const_
struct Lexer : lex::lexer<actor_lexer<token<Iterator, TokenTypes>>> {
lex::token_def<> spaces;
lex::token_def<uint64_t> number;
Lexer() : spaces{R"(\s+)"}, number{R"(\d+)"} {
self += '(';
self += ')';
self += spaces[lex::_pass = lex::pass_flags::pass_ignore];
self += number;
}
};
That is, if it should exist at all.
That's the structural assessment. Let me apply simplifications to the Qi grammar along the same lines, just so we can reason about the code:
struct Parser : qi::grammar<Lexer::iterator_type, Object*()> {
Parser() : base_type(elem) {
using namespace qi::labels;
static constexpr qi::_a_type _list{};
const auto _objs = phx::bind(&List::_objs, _list);
list = ( //
'(' >> //
*(elem[phx::push_back(_objs, _1)]) //
>> ')' //
)[_val = phx::new_<List>(_list)];
elem //
= list[_val = _1] //
| lexer.number[_val = phx::new_<Number>(_1)];
}
Lexer lexer; // TODO FIXME excess scope
private:
using It = Lexer::iterator_type;
qi::rule<It, Object*(), qi::locals<List>> list;
qi::rule<It, Object*()> elem;
};
Note how I made the local List instead of List*, to just slightly reduce the chance of memory leaks. I guess for efficiency you could try to make Phoenix do move-semantics for you:
[_val = phx::new_<List>(phx::static_cast_<List&&>(_list))];
But at that point I wouldn't trust all the expression templates to do what you want and go to the more elaborate (even assuming c++17):
phx::function move_new = [](List& l) { return new List(std::move(l)); };
list = ( //
'(' >> //
*(elem[phx::push_back(_objs, _1)]) //
>> ')' //
)[_val = move_new(_list)];
Now we arrive at a workable demo:
Live On Coliru
int main() {
Parser parser;
for (std::string const line : {
"",
"42",
"()",
"(1 2 3)",
"(1 (44 55 () 66) 3)",
}) {
auto begin = line.begin();
Object* obj = nullptr;
if (lex::tokenize_and_parse(begin, line.end(), parser.lexer, parser,
obj)) {
obj->print(std::cout << std::quoted(line) << " -> ");
delete obj;
} else {
std::cout << std::quoted(line) << " -> FAILED";
}
std::cout << std::endl;
}
}
Printing
"" -> FAILED
"42" -> 42
"()" -> ()
"(1 2 3)" -> (1 2 3 )
"(1 (44 55 () 66) 3)" -> (1 (44 55 () 66 ) 3 )
Note that this simple test program ALREADY leaks 11 objects, for a total of 224 bytes. That's not even complicating things with error-handling or backtracking rules.
That's craziness. You could of course fix it with smart pointers, but that just further complicates everything while making sure performance will be very poor.
Further Simplifications
I would stop using Lex and dynamic polymorphism:
No More Lex:
The only "value" Lex is adding here is skipping spaces. Qi is very capable (see e.g. Boost spirit skipper issues for variations on that theme), so we'll use skip(space)[] instead:
Live On Coliru
#include <boost/phoenix.hpp>
#include <boost/spirit/include/qi.hpp>
#include <iomanip>
#include <iostream>
#include <string>
#include <vector>
struct Object {
virtual ~Object() = default;
virtual void print(std::ostream& out) const = 0;
friend std::ostream& operator<<(std::ostream& os, Object const& o) { return o.print(os), os; }
};
struct Number : Object {
Number(uint64_t v = 0) : _val(v) {}
int64_t _val;
virtual void print(std::ostream& out) const override { out << _val; }
};
struct List : Object {
std::vector<Object*> _objs;
virtual void print(std::ostream& out) const override {
out << '(';
for (auto&& el : _objs)
out << ' ' << *el;
out << ')';
}
};
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;
template <typename It>
struct Parser : qi::grammar<It, Object*()> {
Parser() : Parser::base_type(start) {
using namespace qi::labels;
static constexpr qi::_a_type _list{};
const auto _objs = phx::bind(&List::_objs, _list);
phx::function move_new = [](List& l) { return new List(std::move(l)); };
list = ( //
'(' >> //
*(elem[phx::push_back(_objs, _1)]) //
>> ')' //
)[_val = move_new(_list)];
elem //
= list[_val = _1] //
| qi::uint_[_val = phx::new_<Number>(_1)] //
;
start = qi::skip(qi::space)[elem];
}
private:
qi::rule<It, Object*(), qi::space_type, qi::locals<List>> list;
qi::rule<It, Object*(), qi::space_type> elem;
// lexemes
qi::rule<It, Object*()> start;
};
int main() {
Parser<std::string::const_iterator> const parser;
for (std::string const line : {
"",
"42",
"()",
"(1 2 3)",
"(1 (44 55 () 66) 3)",
}) {
Object* obj = nullptr;
if (parse(line.begin(), line.end(), parser >> qi::eoi, obj)) {
std::cout << std::quoted(line) << " -> " << *obj;
} else {
std::cout << std::quoted(line) << " -> FAILED";
}
delete obj;
std::cout << std::endl;
}
}
Still leaking like C++ went out of fashion, but at least doing so in 20 fewer LoC and half the compile time.
Static Polymorphism
Hiding all the raw pointer stuff (or avoiding it completely, depending on the exact AST requirements):
using Number = uint64_t;
using Object = boost::make_recursive_variant< //
Number, //
std::vector<boost::recursive_variant_>>::type;
using List = std::vector<Object>;
For ease of supplying operator<< I moved them into an AST namespace below.
The parser goes down to:
template <typename It> struct Parser : qi::grammar<It, AST::Object()> {
Parser() : Parser::base_type(start) {
list = '(' >> *elem >> ')';
elem = list | qi::uint_;
start = qi::skip(qi::space)[elem];
}
private:
qi::rule<It, AST::List(), qi::space_type> list;
qi::rule<It, AST::Object(), qi::space_type> elem;
qi::rule<It, AST::Object()> start;
};
No more lex, no more phoenix, no more leaks, no more manual semantic actions. Just, expressive code.
Live Demo
Live On Coliru
#include <boost/spirit/include/qi.hpp>
#include <iomanip>
#include <iostream>
namespace AST {
struct Number {
uint64_t v;
Number(uint64_t v = 0) : v(v){};
};
using Object = boost::make_recursive_variant< //
Number, //
std::vector<boost::recursive_variant_>>::type;
using List = std::vector<Object>;
std::ostream& operator<<(std::ostream& os, Number const& n) {
return os << n.v;
}
std::ostream& operator<<(std::ostream& os, List const& l) {
os << '(';
for (auto& el : l)
os << ' ' << el;
return os << ')';
}
} // namespace AST
namespace qi = boost::spirit::qi;
template <typename It> struct Parser : qi::grammar<It, AST::Object()> {
Parser() : Parser::base_type(start) {
list = '(' >> *elem >> ')';
elem = list | qi::uint_;
start = qi::skip(qi::space)[elem];
}
private:
qi::rule<It, AST::List(), qi::space_type> list;
qi::rule<It, AST::Object(), qi::space_type> elem;
qi::rule<It, AST::Object()> start;
};
int main() {
Parser<std::string::const_iterator> const parser;
for (std::string const line : {
"",
"42",
"()",
"(1 2 3)",
"(1 (44 55 () 66) 3)",
}) {
AST::Object obj;
if (parse(line.begin(), line.end(), parser >> qi::eoi, obj))
std::cout << std::quoted(line) << " -> " << obj << "\n";
else
std::cout << std::quoted(line) << " -> FAILED\n";
}
}
Prints
"" -> FAILED
"42" -> 42
"()" -> ()
"(1 2 3)" -> ( 1 2 3)
"(1 (44 55 () 66) 3)" -> ( 1 ( 44 55 () 66) 3)
But this time, without leaking memory. And also, it now compiles fast enough that Compiler Explorer can also handle it.
Found a walkaround: use std::size_t and reinterpret_cast to replace pointer types:
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/qi.hpp>
#include <iostream>
#include <string>
#include <vector>
class Object
{
public:
virtual ~Object() = default;
public:
virtual void print(std::ostream& out) = 0;
};
class Number : public Object
{
public:
int64_t _val;
public:
virtual void print(std::ostream& out) override { out << _val; }
};
class List : public Object
{
public:
std::vector<Object*> _objs;
public:
virtual void print(std::ostream& out) override
{
out << '(';
for (auto&& i : _objs) {
i->print(out);
out << ' ';
}
out << ')';
}
};
namespace qi = boost::spirit::qi;
namespace fu = boost::fusion;
namespace lex = boost::spirit::lex;
using lex::lexertl::actor_lexer;
using lex::lexertl::token;
using TokenTypes = boost::mpl::vector<std::size_t>;
using Iterator = std::string::iterator;
class Lexer : public lex::lexer<actor_lexer<token<Iterator, TokenTypes>>>
{
public:
lex::token_def<> spaces;
lex::token_def<std::size_t> number; // use std::size_t instead
public:
Lexer();
};
template<typename... Ts>
using Rule = qi::rule<Lexer::iterator_type, Ts...>;
class Parser : public qi::grammar<Lexer::iterator_type, Object*>
{
public:
Lexer lexer;
Rule<Object*, qi::locals<List*>> list;
Rule<Object*> elem;
public:
Parser();
};
Lexer::Lexer()
{
self += '(';
self += ')';
spaces = R"(\s+)";
self +=
spaces[([](auto& start, auto& end, auto& matched, auto& id, auto& ctx) {
matched = lex::pass_flags::pass_ignore;
})];
number = R"(\d+)";
self +=
number[([](auto& start, auto& end, auto& matched, auto& id, auto& ctx) {
auto val = new Number();
auto iter = start;
qi::parse(iter, end, qi::long_long, val->_val);
ctx.set_value(reinterpret_cast<std::size_t>(val)); // cast here
})];
}
Parser::Parser()
: base_type(elem)
{
list = ( //
qi::lit('(')[( //
[](auto& attr, auto& ctx, bool& pass) {
fu::at_c<0>(ctx.locals) = new List();
})] //
>> *(elem[( //
[](auto& attr, auto& ctx, bool& pass) {
List* list = fu::at_c<0>(ctx.locals);
list->_objs.push_back(attr);
})]) //
>> ')' //
)[( //
[](auto& attr, auto& ctx, bool& pass) {
List* list = fu::at_c<0>(ctx.locals);
fu::at_c<0>(ctx.attributes) = list;
})];
elem %= list | qi::omit[lexer.number[([](auto& attr, auto& ctx, bool& pass) {
fu::at_c<0>(ctx.attributes) = reinterpret_cast<Object*>(attr); // cast here
})]];
}
int
main(int argc, char* argv[])
{
Parser parser;
std::string line;
while (std::getline(std::cin, line)) {
auto begin = line.begin();
Object* obj;
lex::tokenize_and_parse(begin, line.end(), parser.lexer, parser, obj);
obj->print(std::cout);
std::cout << std::endl;
}
}
I think this is really ugly. Anyone has a better solution???
What is the algorithm for developing a string parser to create a geometry? The geometry is generated in 2 steps: at the first step, we create primitives; at the second, we combine primitives into objects.
The syntax is presented in the string below.
string str="[GEOMETRY]
PRIMITIVE1=SPHERE(RADIUS=5.5);
PRIMITIVE2=BOX(A=-5.2, B=7.3);
//...
OBJECT1=PRIMITIVE2*(-PRIMITIVE1);
//..."
class PRIMITIVE{
int number;
public:
Primitive& operator+ (Primitive& primitive) {}; //overloading arithmetic operations
Primitive& operator* (Primitive& primitive) {};
Primitive& operator- (Primitive& primitive) {};
virtual bool check_in_point_inside_primitive = 0;
};
class SPHERE:public PRIMITIVE{
double m_radius;
public:
SPHERE(double radius): m_radius(radius) {}; //In which part of the parser to create objects?
bool check_in_point_inside_sphere(Point& point){};
};
class BOX:public PRIMITIVE{
double m_A;
double m_B;
public:
BOX(double A, double B): m_A(A), m_B(B) {};
bool check_in_point_inside_box(Point& point){};
};
class OBJECT{
int number;
PRIMITIVE& primitive;
public:
OBJECT(){};
bool check_in_point_inside_object(Primitive& PRIMITIVE1, Primitive& PRIMITIVE2, Point& point){
//>How to construct a function from an expression 'PRIMITIVE2*(-PRIMITIVE1)' when parsing?
}
};
How to analyze the string PRIMITIVE1=SPHERE(RADIUS=5.5) and pass a parameter to the constructor of SPHERE()? How to identify this object with the name PRIMITIVE 1 to call to it in OBJECT? Is it possible to create a pair<PRIMITIVE1,SPHERE(5.5)> and store all primitives in map?
How to parse the string of the OBJECT1 and to construct a function from an expression PRIMITIVE2*(-PRIMITIVE1) inside an OBJECT1? This expression will be required multiple times when determining the position of each point relative to the object.
How to use boost::spirit for this task? Tokenize a string using boost::spirit::lex, and then develop rules using boost::spirit::qi?
As a finger exercise, and despite the serious problems I see with the chosen virtual type hierarchy, let's try to make a value-oriented container of Primitives that can be indexed by their id (ById):
Live On Coliru
#include <boost/intrusive/set.hpp>
#include <boost/poly_collection/base_collection.hpp>
#include <iostream>
namespace bi = boost::intrusive;
struct Point {
};
using IndexHook = bi::set_member_hook<bi::link_mode<bi::auto_unlink>>;
class Primitive {
int _id;
public:
struct ById {
bool operator()(auto const&... oper) const { return std::less<>{}(access(oper)...); }
private:
static int access(int id) { return id; }
static int access(Primitive const& p) { return p._id; }
};
IndexHook _index;
Primitive(int id) : _id(id) {}
virtual ~Primitive() = default;
int id() const { return _id; }
Primitive& operator+= (Primitive const& primitive) { return *this; } //overloading arithmetic operations
Primitive& operator*= (Primitive const& primitive) { return *this; }
Primitive& operator-= (Primitive const& primitive) { return *this; }
virtual bool check_in_point_inside(Point const&) const = 0;
};
using Index =
bi::set<Primitive, bi::constant_time_size<false>,
bi::compare<Primitive::ById>,
bi::member_hook<Primitive, IndexHook, &Primitive::_index>>;
class Sphere : public Primitive {
double _radius;
public:
Sphere(int id, double radius)
: Primitive(id)
, _radius(radius) {} // In which part of the parser to create objects?
bool check_in_point_inside(Point const& point) const override { return false; }
};
class Box : public Primitive {
double _A;
double _B;
public:
Box(int id, double A, double B) : Primitive(id), _A(A), _B(B) {}
bool check_in_point_inside(Point const& point) const override { return false; }
};
class Object{
int _id;
Primitive& _primitive;
public:
Object(int id, Primitive& p) : _id(id), _primitive(p) {}
bool check_in_point_inside_object(Primitive const& p1, Primitive const& p2,
Point const& point) const
{
//>How to construct a function from an expression
//'PRIMITIVE2*(-PRIMITIVE1)' when parsing?
return false;
}
};
using Primitives = boost::poly_collection::base_collection<Primitive>;
int main() {
Primitives test;
test.insert(Sphere{2, 4.0});
test.insert(Sphere{4, 4.0});
test.insert(Box{2, 5, 6});
test.insert(Sphere{1, 4.0});
test.insert(Box{3, 5, 6});
Index idx;
for (auto& p : test)
if (not idx.insert(p).second)
std::cout << "Duplicate id " << p.id() << " not indexed\n";
for (auto& p : idx)
std::cout << typeid(p).name() << " " << p.id() << "\n";
std::cout << "---\n";
for (auto& p : test)
std::cout << typeid(p).name() << " " << p.id() << "\n";
}
Prints
Duplicate id 2 not indexed
6Sphere 1
3Box 2
3Box 3
6Sphere 4
---
3Box 2
3Box 3
6Sphere 2
6Sphere 4
6Sphere 1
So far so good. This is an important building block to prevent all manner of pain when dealing with virtual types in Spirit grammars¹
PS: I've since dropped the idea of intrusive_set. It doesn't work because the base_container moves items around on reallocation, and that unlinks the items from their intrusive set.
Instead, see below for an approach that doesn't try to resolve ids during the parse.
Parsing primitives
We get the ID from the PRIMITIVE1. We could store it somewhere before naturally parsing the primitives themselves, then set the id on it on commit.
Let's start with defining a State object for the parser:
struct State {
Ast::Id next_id;
Ast::Primitives primitives;
Ast::Objects objects;
template <typename... T> void commit(boost::variant<T...>& val) {
boost::apply_visitor([this](auto& obj) { commit(obj); }, val);
}
template <typename T> void commit(T& primitiveOrExpr) {
auto id = std::exchange(next_id, 0);
if constexpr (std::is_base_of_v<Ast::Primitive, T>) {
primitiveOrExpr.id = id;
primitives.insert(std::move(primitiveOrExpr));
} else {
objects.push_back(Ast::Object{id, std::move(primitiveOrExpr)});
}
}
};
As you can see, we just have a place to store the primitives, objects. And then there is the temporary storage for our next_id while we're still parsing the next entity.
The commit function helps sorting the products of the parser rules. As it happens, they can be variant, which is why we have the apply_visitor dispatch for commit on a variant.
Again, as the footnote¹ explains, Spirit's natural attribute synthesis favors static polymorphism.
The semantic actions we need are now:
static inline auto& state(auto& ctx) { return get<State>(ctx); }
auto draft = [](auto& ctx) { state(ctx).next_id = _attr(ctx); };
auto commit = [](auto& ctx) { state(ctx).commit(_attr(ctx)); };
Now let's jump ahead to the primitives:
auto sphere = as<Ast::Sphere>(eps >> "sphere" >>'(' >> param("radius") >> ')');
auto box = as<Ast::Box>(eps >> "box" >> '(' >> param('a') >> ',' >> param('b') >> ')');
auto primitive =
("primitive" >> uint_[draft] >> '=' >> (sphere | box)[commit]) > ';';
That's still cheating a little, as I've used the param helper to reduce typing:
auto number = as<Ast::Number>(double_, "number");
auto param(auto name, auto p) { return eps >> omit[name] >> '=' >> p; }
auto param(auto name) { return param(name, number); }
As you can see I've already assumed most parameters will have numerical nature.
What Are Objects Really?
Looking at it for a while, I concluded that really an Object is defined as an id number (OBJECT1, OBJECT2...) which is tied to an expression. The expression can reference primitives and have some unary and binary operators.
Let's sketch an AST for that:
using Number = double;
struct RefPrimitive { Id id; };
struct Binary;
struct Unary;
using Expr = boost::variant< //
Number, //
RefPrimitive, //
boost::recursive_wrapper<Unary>, //
boost::recursive_wrapper<Binary> //
>;
struct Unary { char op; Expr oper; };
struct Binary { Expr lhs; char op; Expr rhs; };
struct Object { Id id; Expr expr; };
Now To Parse Into That Expression AST
It's really 1:1 rules for each Ast node type. E.g.:
auto ref_prim = as<Ast::RefPrimitive>(lexeme["primitive" >> uint_]);
Now many of the expression rules can recurse, so we need declared rules with definitions via BOOST_SPIRIT_DEFINE:
// object expression grammar
rule<struct simple_tag, Ast::Expr> simple{"simple"};
rule<struct unary_tag, Ast::Unary> unary{"unary"};
rule<struct expr_tag, Ast::Expr> expr{"expr"};
rule<struct term_tag, Ast::Expr> term{"term"};
rule<struct factor_tag, Ast::Expr> factor{"factor"};
As you can tell, some of these are not 1:1 with the Ast nodes, mainly because of the recursion and the difference in operator precedence (term vs factor vs. simple). It's easier to see with the rule definition:
auto unary_def = char_("-+") >> simple;
auto simple_def = ref_prim | unary | '(' >> expr >> ")";
auto factor_def = simple;
auto term_def = factor[assign] >> *(char_("*/") >> term)[make_binary];
auto expr_def = term[assign] >> *(char_("-+") >> expr)[make_binary];
Because none of the rules actually expose a Binary, automatic attribute propagation is not convenient there². Instead, we use assign and make_binary semantic actions:
auto assign = [](auto& ctx) { _val(ctx) = _attr(ctx); };
auto make_binary = [](auto& ctx) {
using boost::fusion::at_c;
auto& attr = _attr(ctx);
auto op = at_c<0>(attr);
auto& rhs = at_c<1>(attr);
_val(ctx) = Ast::Binary { _val(ctx), op, rhs };
};
Finally, let's tie the defintions to the declared rules (using their tag types):
BOOST_SPIRIT_DEFINE(simple, unary, expr, term, factor)
All we need is a similar line to primitive:
auto object =
("object" >> uint_[draft] >> '=' >> (expr)[commit]) > ';';
And we can finish up by defining each line as a primitive|object:
auto line = primitive | object;
auto file = no_case[skip(ws_comment)[*eol >> "[geometry]" >> (-line % eol) >> eoi]];
At the top level we expect the [GEOMETRY] header, specify that we want to be case insensitive and ... that ws_comment is to be skipped³:
auto ws_comment = +(blank | lexeme["//" >> *(char_ - eol) >> eol]);
This allows us to ignore the // comments as well.
Live Demo Time
Live On Compiler Explorer
//#define BOOST_SPIRIT_X3_DEBUG
#include <boost/fusion/adapted.hpp>
#include <boost/poly_collection/base_collection.hpp>
#include <boost/spirit/home/x3.hpp>
#include <iostream>
#include <list>
#include <map>
namespace x3 = boost::spirit::x3;
namespace Ast {
using Id = uint32_t;
struct Point { }; // ?? where does this belong?
struct Primitive {
Id id;
virtual ~Primitive() = default;
};
struct Sphere : Primitive { double radius; };
struct Box : Primitive { double a, b; };
using Number = double;
struct RefPrimitive { Id id; };
struct Binary;
struct Unary;
using Expr = boost::variant< //
Number, //
RefPrimitive, //
boost::recursive_wrapper<Unary>, //
boost::recursive_wrapper<Binary> //
>;
struct Unary { char op; Expr oper; };
struct Binary { Expr lhs; char op; Expr rhs; };
struct Object { Id id; Expr expr; };
using Primitives = boost::poly_collection::base_collection<Primitive>;
using Objects = std::list<Object>;
using Index = std::map<Id, std::reference_wrapper<Primitive const>>;
std::ostream& operator<<(std::ostream& os, Primitive const& p) {
return os << boost::core::demangle(typeid(p).name()) << " "
<< "(id: " << p.id << ")";
}
std::ostream& operator<<(std::ostream& os, Object const& o) {
return os << "object(id:" << o.id << ", expr:" << o.expr << ")";
}
std::ostream& operator<<(std::ostream& os, RefPrimitive ref) {
return os << "reference(prim:" << ref.id << ")";
}
std::ostream& operator<<(std::ostream& os, Binary const& b) {
return os << '(' << b.lhs << b.op << b.rhs << ')';
}
std::ostream& operator<<(std::ostream& os, Unary const& u) {
return os << '(' << u.op << u.oper << ')';
}
} // namespace Ast
BOOST_FUSION_ADAPT_STRUCT(Ast::Primitive, id)
BOOST_FUSION_ADAPT_STRUCT(Ast::Sphere, radius)
BOOST_FUSION_ADAPT_STRUCT(Ast::Box, a, b)
BOOST_FUSION_ADAPT_STRUCT(Ast::Object, id)
BOOST_FUSION_ADAPT_STRUCT(Ast::RefPrimitive, id)
BOOST_FUSION_ADAPT_STRUCT(Ast::Unary, op, oper)
namespace Parser {
using namespace x3;
struct State {
Ast::Id next_id;
Ast::Primitives primitives;
Ast::Objects objects;
template <typename... T> void commit(boost::variant<T...>& val) {
boost::apply_visitor([this](auto& obj) { commit(obj); }, val);
}
template <typename T> void commit(T& val) {
auto id = std::exchange(next_id, 0);
if constexpr (std::is_base_of_v<Ast::Primitive, T>) {
val.id = id;
primitives.insert(std::move(val));
} else {
objects.push_back(Ast::Object{id, std::move(val)});
}
}
};
static inline auto& state(auto& ctx) { return get<State>(ctx); }
auto draft = [](auto& ctx) { state(ctx).next_id = _attr(ctx); };
auto commit = [](auto& ctx) { state(ctx).commit(_attr(ctx)); };
template <typename T>
auto as = [](auto p, char const* name = "as") {
return rule<struct _, T>{name} = p;
};
auto ws_comment = +(blank | lexeme["//" >> *(char_ - eol) >> (eol | eoi)]);
auto number = as<Ast::Number>(double_, "number");
auto param(auto name, auto p) { return eps >> omit[name] >> '=' >> p; }
auto param(auto name) { return param(name, number); }
auto sphere = as<Ast::Sphere>(eps >> "sphere" >>'(' >> param("radius") >> ')');
auto box = as<Ast::Box>(eps >> "box" >> '(' >> param('a') >> ',' >> param('b') >> ')');
auto primitive =
("primitive" >> uint_[draft] >> '=' >> (sphere | box)[commit]) > ';';
auto ref_prim = as<Ast::RefPrimitive>(lexeme["primitive" >> uint_], "ref_prim");
// object expression grammar
rule<struct simple_tag, Ast::Expr> simple{"simple"};
rule<struct unary_tag, Ast::Unary> unary{"unary"};
rule<struct expr_tag, Ast::Expr> expr{"expr"};
rule<struct term_tag, Ast::Expr> term{"term"};
rule<struct factor_tag, Ast::Expr> factor{"factor"};
auto assign = [](auto& ctx) { _val(ctx) = _attr(ctx); };
auto make_binary = [](auto& ctx) {
using boost::fusion::at_c;
auto& attr = _attr(ctx);
auto op = at_c<0>(attr);
auto& rhs = at_c<1>(attr);
_val(ctx) = Ast::Binary { _val(ctx), op, rhs };
};
auto unary_def = char_("-+") >> simple;
auto simple_def = ref_prim | unary | '(' >> expr >> ")";
auto factor_def = simple;
auto term_def = factor[assign] >> *(char_("*/") >> term)[make_binary];
auto expr_def = term[assign] >> *(char_("-+") >> expr)[make_binary];
BOOST_SPIRIT_DEFINE(simple, unary, expr, term, factor)
auto object =
("object" >> uint_[draft] >> '=' >> (expr)[commit]) > ';';
auto line = primitive | object;
auto file = no_case[skip(ws_comment)[*eol >> "[geometry]" >> (-line % eol) >> eoi]];
} // namespace Parser
int main() {
for (std::string const input :
{
R"(
[geometry]
primitive1=sphere(radius=5.5);
primitive2=box(a=-5.2, b=7.3);
//...
object1=primitive2*(-primitive1);
//...)",
R"(
[GEOMETRY]
PRIMITIVE1=SPHERE(RADIUS=5.5);
PRIMITIVE2=BOX(A=-5.2, B=7.3);
//...
OBJECT1=PRIMITIVE2*(-PRIMITIVE1);
//...)",
}) //
{
Parser::State state;
bool ok = parse(begin(input), end(input),
x3::with<Parser::State>(state)[Parser::file]);
std::cout << "Parse success? " << std::boolalpha << ok << "\n";
Ast::Index index;
for (auto& p : state.primitives)
if (auto[it,ok] = index.emplace(p.id, p); not ok) {
std::cout << "Duplicate id " << p
<< " (conflicts with existing " << it->second.get()
<< ")\n";
}
std::cout << "Primitives by ID:\n";
for (auto& [id, prim] : index)
std::cout << " - " << prim << "\n";
std::cout << "Objects in definition order:\n";
for (auto& obj: state.objects)
std::cout << " - " << obj << "\n";
}
}
Prints
Parse success? true
Primitives by ID:
- Ast::Sphere (id: 1)
- Ast::Box (id: 2)
Objects in definition order:
- object(id:1, expr:(reference(prim:2)*(-reference(prim:1))))
Parse success? true
Primitives by ID:
- Ast::Sphere (id: 1)
- Ast::Box (id: 2)
Objects in definition order:
- object(id:1, expr:(reference(prim:2)*(-reference(prim:1))))
¹ How can I use polymorphic attributes with boost::spirit::qi parsers?
² and insisting on that leads to classical in-efficiency with rules that cause a lot of backtracking
³ outside of lexemes
I need to convert infix notations like the one below to n-ary prefix notation with Boost::Spirit, but I am failing at building on the answers from https://stackoverflow.com/a/8707598/1816477 et al.
This is what I am trying to parse:
not (xyz='a' or xyz='b' or xyz='c') and abc='s' xor (pqr ='v' and xyz='d')
and this LISP-styled format is what I am trying to provide as output (do not mind the indentation):
(xor (and (= pqr 'v') (= xyz 'd'))
(and (= abc 's')
(not (or (= xyz 'a')
(= xyz 'b')
(= xyz 'c')))))
So, the terms I try to parse consist of prefixed (not <expression>) and infix expressions (<expression> and <expression> and ... etc.), i.e.: assignments, negations and n-ary ands, ors, xors etc., implying operator precedence (or < xor < and < assignment < negation).
What I am failing at is getting the grammar right. Outputting to a suitable boost::variant representing the parsed boolean expression I think I am able to accomplish. I am thinking of an output structure like this one:
struct prefixExpr;
struct infixExpr;
typedef boost::variant<
std::string, // identifiers, values etc.
boost::recursive_wrapper<prefixExpr>, // e.g. negation
boost::recursive_wrapper<infixExpr> // assignment, and, or, xor etc.
> expression;
struct prefixExpr {
std::string op; // currently only "not"
expression expr;
};
BOOST_FUSION_ADAPT_STRUCT(prefixExpr, op, expr)
struct infixExpr {
std::string op; // "and", "or", "xor", "="
std::vector<expression> exprs;
};
BOOST_FUSION_ADAPT_STRUCT(infixExpr, op, exprs)
What do I need to do to be able to parse expressions like the one mentioned above and convert them to a prefix notation?
I am using the boost 1.67.0 (the latest at the time of writing) and Visual Studio 15.7.3 (also the latest at the time of writing).
The code is not perfect but should be simple to understand:
#include <boost/variant.hpp>
#include <boost/spirit/home/x3.hpp>
#include <vector>
#include <string>
#include <iostream>
struct id : std::string {};
struct value : std::string {};
struct nary_expr;
using expr = boost::variant<
id, value,
boost::recursive_wrapper<nary_expr>
>;
struct nary_expr
{
std::string op;
std::vector<expr> exprs;
};
namespace x3 = boost::spirit::x3;
auto compose_nary_expr = [](auto& ctx)
{
//auto&& [left, tail] = x3::_attr(ctx);
auto&& left = boost::fusion::at_c<0>(x3::_attr(ctx));
auto&& tail = boost::fusion::at_c<1>(x3::_attr(ctx));
if (tail.size() == 0) {
x3::_val(ctx) = left;
return;
}
// left associativity
auto op = boost::fusion::at_c<0>(tail[0]);
std::vector<expr> exprs = { left, boost::fusion::at_c<1>(tail[0]) };
for (std::size_t i = 1; i < tail.size(); ++i) {
// same priority but different operator
auto&& next_op = boost::fusion::at_c<0>(tail[i]);
if (op != next_op) {
exprs = std::vector<expr>{ nary_expr{ op, std::move(exprs) } };
op = next_op;
}
exprs.push_back(boost::fusion::at_c<1>(tail[i]));
}
x3::_val(ctx) = nary_expr{ op, std::move(exprs) };
};
x3::rule<class prec4_expr_rule, expr> const prec4_expr("prec4_expr");
x3::rule<class prec3_expr_rule, expr> const prec3_expr("prec3_expr");
x3::rule<class prec2_expr_rule, expr> const prec2_expr("prec2_expr");
x3::rule<class prec1_expr_rule, expr> const prec1_expr("prec1_expr");
x3::rule<class prec0_expr_rule, expr> const prec0_expr("prec0_expr");
auto const prec4_expr_def = prec4_expr = (
prec3_expr
>> *( (x3::string("or") > prec3_expr)
)
)[compose_nary_expr];
auto const prec3_expr_def = prec3_expr = (
prec2_expr
>> *( (x3::string("xor") > prec2_expr)
)
)[compose_nary_expr];
auto const prec2_expr_def = prec2_expr = (
prec1_expr
>> *( (x3::string("and") > prec1_expr)
)
)[compose_nary_expr];
auto compose_binary_expr = [](auto& ctx)
{
auto&& rhs = boost::fusion::at_c<0>(x3::_attr(ctx));
auto&& tail = boost::fusion::at_c<1>(x3::_attr(ctx));
if (tail.size() > 0) {
auto&& op = boost::fusion::at_c<0>(tail[0]);
auto&& lhs = boost::fusion::at_c<1>(tail[0]);
x3::_val(ctx) = nary_expr{ op, { rhs, lhs } };
}
else {
x3::_val(ctx) = rhs;
}
};
// should use optional, but something wrong with spirit
auto const prec1_expr_def = prec1_expr = (
prec0_expr >> *(x3::string("=") > prec0_expr)
)[compose_binary_expr];
x3::rule<class not_expr_rule, expr> const not_expr("not_expr");
auto compose_unary_expr = [](auto& ctx)
{
//auto&& [op, expr] = x3::_attr(ctx);
auto&& op = boost::fusion::at_c<0>(x3::_attr(ctx));
auto&& expr = boost::fusion::at_c<1>(x3::_attr(ctx));
x3::_val(ctx) = nary_expr{ op, { expr } };
};
auto const not_expr_def = not_expr = (x3::string("not") > prec0_expr)[compose_unary_expr];
auto const id_term = x3::rule<class id_r, id>{} = x3::lexeme[x3::alpha >> *x3::alnum];
auto const value_term = x3::rule<class value_r, value>{} = x3::lexeme["'" > +~x3::char_('\'') >> "'"];
auto const prec0_expr_def =
value_term
| ( '(' > prec4_expr >> ')' )
| not_expr
| id_term
;
BOOST_SPIRIT_DEFINE(
prec0_expr
, prec1_expr
, prec2_expr
, prec3_expr
, prec4_expr
, not_expr
);
struct indent
{
std::size_t cur;
};
indent operator+(indent lhs, std::size_t rhs)
{
return { lhs.cur + rhs };
}
std::ostream& operator<<(std::ostream& os, indent const& v)
{
for (unsigned i = 0; i < v.cur; ++i) os << ' ';
return os;
}
struct is_simple
{
template <typename T>
bool operator()(T const&) const
{
return std::is_same<T, id>::value || std::is_same<T, value>::value;
}
};
struct printer
{
indent indent_;
void operator()(id const& v)
{
std::cout << v;
}
void operator()(value const& v)
{
std::cout << '\'' << v << '\'';
}
void operator()(nary_expr const& v)
{
std::cout << '(' << v.op << ' ';
printer p{ indent_ + 2 + v.op.size() };
boost::apply_visitor(p, v.exprs[0]);
for (std::size_t i = 1; i < v.exprs.size(); ++i) {
if (boost::apply_visitor(is_simple{}, v.exprs[i])) {
std::cout << ' ';
}
else {
std::cout << '\n' << p.indent_;
}
boost::apply_visitor(p, v.exprs[i]);
}
std::cout << ')';
}
};
int main()
{
std::string s = "not (xyz='a' or xyz='b' or xyz='c') and abc='s' xor (pqr ='v' and xyz='d')";
expr expr;
auto iter = s.cbegin();
if (phrase_parse(iter, s.cend(), prec4_expr_def, x3::space, expr) && iter == s.cend()) {
boost::apply_visitor(printer{}, expr);
}
return 0;
}
It prints:
(xor (and (not (or (= xyz 'a')
(= xyz 'b')
(= xyz 'c')))
(= abc 's'))
(and (= pqr 'v')
(= xyz 'd')))