the BNF I implement has a funny rule where, depending on operator, the terms can be chained or event not at this production rule. Hence I use the same AST data structure since only the enumeration changes:
#include <boost/spirit/home/x3.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <iostream>
#include <string>
#include <list>
namespace ast
{
struct identifer {
int name;
};
struct expression {
struct chunk {
char operator_;
ast::identifer identifer;
};
ast::identifer identifer;
std::list<chunk> chunk_list;
};
}
BOOST_FUSION_ADAPT_STRUCT(ast::identifer,
name
)
BOOST_FUSION_ADAPT_STRUCT(ast::expression::chunk,
operator_, identifer
)
BOOST_FUSION_ADAPT_STRUCT(ast::expression,
identifer, chunk_list
)
namespace boost { namespace spirit { namespace x3 { namespace traits {
void move_to(ast::expression::chunk&& chunk, std::list<ast::expression::chunk>& chunk_list,
mpl::identity<container_attribute>)
{
chunk_list.emplace(chunk_list.end(), std::move(chunk));
}
} } } }
namespace parser
{
namespace x3 = boost::spirit::x3;
auto const identifier = x3::rule<struct _, int> { "identifier" } =
x3::int_;
auto const operator_1 = x3::rule<struct _, char> { "operator" } =
x3::char_("ABC");
auto const operator_2 = x3::rule<struct _, char> { "operator" } =
x3::char_("XYZ");
auto const expression_chunk_1 = x3::rule<struct _, ast::expression::chunk> { "expression" } =
operator_1 > identifier
;
auto const expression_chunk_2 = x3::rule<struct _, ast::expression::chunk> { "expression" } =
operator_2 > identifier
;
auto const expression = x3::rule<struct _, ast::expression> { "expression" } =
identifier >> *expression_chunk_1 // foo { and foo }
// rule below fails to compile
| identifier >> expression_chunk_2 // foo [ nand foo ]
;
}
struct visitor {
visitor(std::ostream& os) : os{ os } { }
void operator()(ast::expression const& node) {
os << "(";
(*this)(node.identifer);
for(auto const& chunk : node.chunk_list) {
os << "(" << chunk.operator_ << " ";
(*this)(chunk.identifer);
os << ")";
}
os << ")\n";
}
void operator()(ast::identifer const& node) {
os << "(" << node.name << ")";
}
std::ostream& os;
};
int main()
{
namespace x3 = boost::spirit::x3;
for(std::string const str: {
"1 X 2",
"3 A 4 A 5"
}) {
auto iter = str.begin(), end = str.end();
ast::expression attr;
bool r = x3::phrase_parse(iter, end, parser::expression, x3::space, attr);
std::cout << "parse '" << str << "': ";
if (r && iter == end) {
std::cout << "succeeded:\n";
visitor(std::cout)(attr);
} else {
std::cout << "*** failed ***\n";
}
}
return 0;
}
This was the idea - the operator X,Y,Z adds only one chunk to list. Following the compiler errors, I have to specialize x3::traits::move_to, but I don't found any solution to get this to compile. What is the wayy to do? is the list::emplace() and std::move() safe here?
I'd do without the trait. Instead, make the grammar result in a vector<T> artificially using repeat:
auto const expression = x3::rule<struct _, ast::expression> { "expression" } =
identifier >> *expression_chunk_1 // foo { and foo }
| identifier >> x3::repeat(1) [ expression_chunk_2 ] // foo [ nand foo ]
;
Related
My ultimate goal is to write a hlsl shading language parser. My first experience with parsing has been by following bob nystrom's "crafting interpreters".
The issue I am currently facing is that I am trying to parse a 'chained member access' sequence (or multiple 'dot operators)....
first.Second.third
Obviously I could parse that into a list % sequence as a vector of strings, but I am trying to stick to the ast shown in the crafting interpreters book by having nested 'Get' nodes.
I am trying to parse this nested Get sequence so that I can eventually put that into a Set ast node. But I thought it would be best to at least get the 'Get' part first. before building on top of that.
https://craftinginterpreters.com/classes.html#set-expressions
Here's my minimal compiling program that tries to do that....
#include "boost/variant.hpp"
#include <boost/config/warning_disable.hpp>
#include <boost/fusion/adapted/std_tuple.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>
#include <boost/fusion/include/std_tuple.hpp>
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/position_tagged.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
#include <boost/spirit/home/x3/support/utility/annotate_on_success.hpp>
#include <boost/spirit/home/x3/support/utility/error_reporting.hpp>
#include <iostream>
#include <string>
#include <tuple>
#include <variant>
namespace hlsl {
namespace ast {
struct Get;
struct ExprVoidType {};
struct Variable {
Variable(std::string name) : name(std::move(name)) {
}
Variable() = default;
std::string name;
};
using Expr =
boost::spirit::x3::variant<ExprVoidType,
boost::spirit::x3::forward_ast<Get>, Variable>;
struct Get {
Get(Expr& object, std::string name) : object_{object}, name_{name} {
}
Get() = default;
Expr object_;
std::string name_;
};
} // namespace ast
} // namespace hlsl
struct visitor {
using result_type = void;
void operator()(const std::string name) {
std::cout << name << "\n";
}
void operator()(const hlsl::ast::Get& get) {
std::cout << "get expr\n";
get.object_.apply_visitor(*this);
std::cout << get.name_ << "\n";
}
void operator()(const hlsl::ast::Variable& var) {
std::cout << var.name << "\n";
};
void operator()(const hlsl::ast::ExprVoidType& var){};
};
BOOST_FUSION_ADAPT_STRUCT(hlsl::ast::Variable, name)
BOOST_FUSION_ADAPT_STRUCT(hlsl::ast::Get, object_, name_)
namespace x3 = boost::spirit::x3;
namespace ascii = boost::spirit::x3::ascii;
using ascii::char_;
using ascii::space;
using x3::alnum;
using x3::alpha;
using x3::double_;
using x3::int_;
using x3::lexeme;
using x3::lit;
struct error_handler {
template <typename Iterator, typename Exception, typename Context>
x3::error_handler_result on_error(Iterator& first, Iterator const& last,
Exception const& x,
Context const& context) {
auto& error_handler = x3::get<x3::error_handler_tag>(context).get();
std::string message = "Error! Expecting: " + x.which() + " here:";
error_handler(x.where(), message);
return x3::error_handler_result::fail;
}
};
/////////////////////////////////////////
// RULES
///////////////////////////////////////////
x3::rule<class identifier_class, std::string> const identifier = "identifier";
auto const identifier_def = +alnum;
BOOST_SPIRIT_DEFINE(identifier);
x3::rule<class expression_class, hlsl::ast::Expr> const expression =
"expression";
x3::rule<class variable_class, hlsl::ast::Variable> const variable = "variable";
x3::rule<class get_class, hlsl::ast::Get> const get = "get";
auto const variable_def = identifier;
BOOST_SPIRIT_DEFINE(variable);
auto const expression_def = get | variable;
BOOST_SPIRIT_DEFINE(expression);
///////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////
// get
auto const get_def = (variable | expression) >> '.' >> identifier;
BOOST_SPIRIT_DEFINE(get);
/////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////
struct program_class;
x3::rule<program_class, hlsl::ast::Expr> const program = "program";
auto const program_def = get;
BOOST_SPIRIT_DEFINE(program);
struct program_class : error_handler {};
// struct program_class;
/////////////////////////////////////////////////////////
// } // namespace parser
// } // namespace client
////////////////////////////////////////////////////////////////////////////
// Main program
////////////////////////////////////////////////////////////////////////////
int main() {
using boost::spirit::x3::error_handler_tag;
using boost::spirit::x3::with;
using iterator_type = std::string::const_iterator;
using error_handler_type = boost::spirit::x3::error_handler<iterator_type>;
// input string
std::string input = "first.Second.third";
hlsl::ast::Expr fs;
auto iter = input.begin();
auto const end = input.end();
// Our error handler
error_handler_type error_handler(iter, end, std::cerr);
auto const parser =
// we pass our error handler to the parser so we can access
// it later in our on_error and on_sucess handlers
with<error_handler_tag>(std::ref(error_handler))[program];
bool r;
r = phrase_parse(iter, end, parser, space, fs);
visitor v;
if (r) {
std::cout << "Parse Suceeded\n\n";
fs.apply_visitor(v);
} else {
std::cout << "Sorry :(\n\n";
std::cout << *iter;
}
std::cout << "Bye... :-) \n\n";
return 0;
}
What I want is something like this
Get {
object_: Get {
object_: Variable {
name : "first"
},
name_: second
},
name_: third
}
Is this kind of thing even possible using x3 and the way it constructs parsers from grammar?
Sure. Your grammar parses left-to right, and that's also how you want to build your ast (outside-in, not inside out).
I'd rephrase the whole thing:
expression = variable >> *('.' >> identifier);
Now you'll have to massage the attribute propagation as each . member access wraps the previous expression in another Get{expression, name} instance:
x3::rule<struct identifier_, std::string> const identifier{"identifier"};
x3::rule<struct variable_, ast::Variable> const variable{"variable"};
x3::rule<struct expression_, ast::Expr> const expression{"expression"};
x3::rule<struct program_, ast::Expr> const program{"program"};
auto identifier_def = x3::lexeme[x3::alpha >> *x3::alnum];
auto variable_def = identifier;
Now let's use two semantic actions to propagate the expression parts:
auto as_expr = [](auto& ctx) { _val(ctx) = ast::Expr(std::move(_attr(ctx))); };
auto as_get = [](auto& ctx) {
_val(ctx) = ast::Get{std::move(_val(ctx)), _attr(ctx)};
};
auto expression_def = variable[as_expr] >> *('.' >> identifier[as_get]);
Let's also bake the skipper into the grammar while we're at it:
auto program_def = x3::skip(x3::space)[expression];
Live Demo
With a lot of simplifications, e.g. for the AST & visitor:
Live On Coliru
#include <boost/fusion/adapted.hpp>
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
#include <boost/spirit/home/x3/support/utility/error_reporting.hpp>
#include <iomanip>
#include <iostream>
namespace x3 = boost::spirit::x3;
namespace hlsl {
namespace ast {
struct Void {};
struct Get;
struct Variable {
std::string name;
};
using Expr = x3::variant<Void, x3::forward_ast<Get>, Variable>;
struct Get {
Expr object_;
std::string property_;
};
} // namespace ast
struct printer {
std::ostream& _os;
using result_type = void;
void operator()(hlsl::ast::Get const& get) const {
_os << "get { object_:";
get.object_.apply_visitor(*this);
_os << ", property_:" << quoted(get.property_) << " }";
}
void operator()(hlsl::ast::Variable const& var) const {
_os << "var{" << quoted(var.name) << "}";
};
void operator()(hlsl::ast::Void const&) const { _os << "void{}"; };
};
} // namespace hlsl
BOOST_FUSION_ADAPT_STRUCT(hlsl::ast::Variable, name)
BOOST_FUSION_ADAPT_STRUCT(hlsl::ast::Get, object_, property_)
namespace hlsl::parser {
struct eh_tag;
struct error_handler {
template <typename It, typename Exc, typename Ctx>
auto on_error(It&, It, Exc const& x, Ctx const& context) const {
x3::get<eh_tag>(context)( //
x.where(), "Error! Expecting: " + x.which() + " here:");
return x3::error_handler_result::fail;
}
};
struct program_ : error_handler {};
x3::rule<struct identifier_, std::string> const identifier{"identifier"};
x3::rule<struct variable_, ast::Variable> const variable{"variable"};
x3::rule<struct expression_, ast::Expr> const expression{"expression"};
x3::rule<struct program_, ast::Expr> const program{"program"};
auto as_expr = [](auto& ctx) { _val(ctx) = ast::Expr(std::move(_attr(ctx))); };
auto as_get = [](auto& ctx) {
_val(ctx) = ast::Get{std::move(_val(ctx)), _attr(ctx)};
};
auto identifier_def = x3::lexeme[x3::alpha >> *x3::alnum];
auto variable_def = identifier;
auto expression_def = variable[as_expr] >> *('.' >> identifier)[as_get];
auto program_def = x3::skip(x3::space)[expression];
BOOST_SPIRIT_DEFINE(variable, expression, identifier, program);
} // namespace hlsl::parser
int main() {
using namespace hlsl;
for (std::string const input :
{
"first",
"first.second",
"first.Second.third",
}) //
{
std::cout << "===== " << quoted(input) << "\n";
auto f = input.begin(), l = input.end();
// Our error handler
auto const p = x3::with<parser::eh_tag>(
x3::error_handler{f, l, std::cerr})[hlsl::parser::program];
if (hlsl::ast::Expr fs; parse(f, l, p, fs)) {
fs.apply_visitor(hlsl::printer{std::cout << "Parsed: "});
std::cout << "\n";
} else {
std::cout << "Parse failed at " << quoted(std::string_view(f, l)) << "\n";
}
}
}
Prints
===== "first"
Parsed: var{"first"}
===== "first.second"
Parsed: get { object_:var{"first"}, property_:"second" }
===== "first.Second.third"
Parsed: get { object_:get { object_:var{"first"}, property_:"Second" }, property_:"third" }
More Simplifications
In the current scenario none of the rules are recursive, so don't need the _DEFINE magic. Assuming you need recursion in the expression later, you could at least remove some redundancy:
namespace hlsl::parser {
x3::rule<struct expression_, ast::Expr> const expression{"expression"};
auto as_expr = [](auto& ctx) { _val(ctx) = ast::Expr(std::move(_attr(ctx))); };
auto as_get = [](auto& ctx) { _val(ctx) = ast::Get{std::move(_val(ctx)), _attr(ctx)}; };
auto identifier
= x3::rule<void, std::string>{"identifier"}
= x3::lexeme[x3::alpha >> *x3::alnum];
auto variable = x3::rule<void, ast::Variable>{"variable"} = identifier;
auto expression_def = variable[as_expr] >> *('.' >> identifier)[as_get];
auto program = x3::skip(x3::space)[expression];
BOOST_SPIRIT_DEFINE(expression)
} // namespace hlsl::parser
Note also that the lexeme is important to suppress skipping (Boost spirit skipper issues)
See it Live On Coliru as well.
Oh and for bonus, a version without x3::variant or visitation:
Live On Coliru
#include <boost/fusion/adapted.hpp>
#include <boost/spirit/home/x3.hpp>
#include <iomanip>
#include <iostream>
namespace x3 = boost::spirit::x3;
namespace hlsl::ast {
struct Void {};
struct Get;
struct Variable {
std::string name;
};
using Expr = boost::variant<Void, boost::recursive_wrapper<Get>, Variable>;
struct Get {
Expr object_;
std::string property_;
};
static inline std::ostream& operator<<(std::ostream& os, Void) {
return os << "void()";
}
static inline std::ostream& operator<<(std::ostream& os, Variable const& v) {
return os << "var{" << std::quoted(v.name) << "}";
}
static inline std::ostream& operator<<(std::ostream& os, Get const& g) {
return os << "get{ object_:" << g.object_ << ", property_:" << quoted(g.property_)
<< " }";
}
} // namespace hlsl::ast
BOOST_FUSION_ADAPT_STRUCT(hlsl::ast::Variable, name)
BOOST_FUSION_ADAPT_STRUCT(hlsl::ast::Get, object_, property_)
namespace hlsl::parser {
x3::rule<struct expression_, ast::Expr> const expression{"expression"};
auto as_expr = [](auto& ctx) { _val(ctx) = ast::Expr(std::move(_attr(ctx))); };
auto as_get = [](auto& ctx) { _val(ctx) = ast::Get{std::move(_val(ctx)), _attr(ctx)}; };
auto identifier
= x3::rule<void, std::string>{"identifier"}
= x3::lexeme[x3::alpha >> *x3::alnum];
auto variable = x3::rule<void, ast::Variable>{"variable"} = identifier;
auto expression_def = variable[as_expr] >> *('.' >> identifier)[as_get];
auto program = x3::skip(x3::space)[expression];
BOOST_SPIRIT_DEFINE(expression)
} // namespace hlsl::parser
int main() {
using namespace hlsl;
for (std::string const input :
{
"first",
"first.second",
"first.Second.third",
}) //
{
std::cout << "===== " << quoted(input) << "\n";
auto f = input.begin(), l = input.end();
if (ast::Expr fs; parse(f, l, parser::program, fs)) {
std::cout << "Parsed: " << fs << "\n";
} else {
std::cout << "Parse failed at " << quoted(std::string_view(f, l)) << "\n";
}
}
}
Prints just the same:
===== "first"
Parsed: var{"first"}
===== "first.second"
Parsed: get{ object_:var{"first"}, property_:"second" }
===== "first.Second.third"
Parsed: get{ object_:get{ object_:var{"first"}, property_:"Second" }, property_:"third" }
That's >100 lines of code removed. With no functionality sacrificed.
I'd like to make a keyword parser that matches i.e. int, but does not match int in integer with eger left over. I use x3::symbols to get automatically get the parsed keyword represented as an enum value.
Minimal example:
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/utility/error_reporting.hpp>
namespace x3 = boost::spirit::x3;
enum class TypeKeyword { Int, Float, Bool };
struct TypeKeywordSymbolTable : x3::symbols<TypeKeyword> {
TypeKeywordSymbolTable()
{
add("float", TypeKeyword::Float)
("int", TypeKeyword::Int)
("bool", TypeKeyword::Bool);
}
};
const TypeKeywordSymbolTable type_keyword_symbol_table;
struct TypeKeywordRID {};
using TypeKeywordRule = x3::rule<TypeKeywordRID, TypeKeyword>;
const TypeKeywordRule type_keyword = "type_keyword";
const auto type_keyword_def = type_keyword_symbol_table;
BOOST_SPIRIT_DEFINE(type_keyword);
using Iterator = std::string_view::const_iterator;
/* Thrown when the parser has failed to parse the whole input stream. Contains
* the part of the input stream that has not been parsed. */
class LeftoverError : public std::runtime_error {
public:
LeftoverError(Iterator begin, Iterator end)
: std::runtime_error(std::string(begin, end))
{}
std::string_view get_leftover_data() const noexcept { return what(); }
};
template<typename Rule>
typename Rule::attribute_type parse(std::string_view input, const Rule& rule)
{
Iterator begin = input.begin();
Iterator end = input.end();
using ExpectationFailure = boost::spirit::x3::expectation_failure<Iterator>;
typename Rule::attribute_type result;
try {
bool r = x3::phrase_parse(begin, end, rule, x3::space, result);
if (r && begin == end) {
return result;
} else { // Occurs when the whole input stream has not been consumed.
throw LeftoverError(begin, end);
}
} catch (const ExpectationFailure& exc) {
throw LeftoverError(exc.where(), end);
}
}
int main()
{
// TypeKeyword::Bool is parsed and "ean" is leftover, but failed parse with
// "boolean" leftover is desired.
parse("boolean", type_keyword);
// TypeKeyword::Int is parsed and "eger" is leftover, but failed parse with
// "integer" leftover is desired.
parse("integer", type_keyword);
// TypeKeyword::Int is parsed successfully and this is the desired behavior.
parse("int", type_keyword);
}
Basicly, I want integer not to be recognized as a keyword with additional eger left to parse.
I morphed the test cases into self-describing expectations:
Live On Compiler Explorer
Prints:
FAIL "boolean" -> TypeKeyword::Bool (expected Leftover:"boolean")
FAIL "integer" -> TypeKeyword::Int (expected Leftover:"integer")
OK "int" -> TypeKeyword::Int
Now, the simplest, naive approach would be to make sure you parse till the eoi, by simply changing
auto actual = parse(input, Parser::type_keyword);
To
auto actual = parse(input, Parser::type_keyword >> x3::eoi);
And indeed the tests pass: Live
OK "boolean" -> Leftover:"boolean"
OK "integer" -> Leftover:"integer"
OK "int" -> TypeKeyword::Int
However, this fits the tests, but not the goal. Let's imagine a more involved grammar, where type identifier; is to be parsed:
auto identifier
= x3::rule<struct id_, Ast::Identifier> {"identifier"}
= x3::lexeme[x3::char_("a-zA-Z_") >> *x3::char_("a-zA-Z_0-9")];
auto type_keyword
= x3::rule<struct tk_, Ast::TypeKeyword> {"type_keyword"}
= type_;
auto declaration
= x3::rule<struct decl_, Ast::Declaration>{"declaration"}
= type_keyword >> identifier >> ';';
I'll leave the details for Compiler Explorer:
OK "boolean b;" -> Leftover:"boolean b;"
OK "integer i;" -> Leftover:"integer i;"
OK "int j;" -> (TypeKeyword::Int j)
Looks good. But what if we add some interesting tests:
{"flo at f;", LeftoverError("flo at f;")},
{"boolean;", LeftoverError("boolean;")},
It prints (Live)
OK "boolean b;" -> Leftover:"boolean b;"
OK "integer i;" -> Leftover:"integer i;"
OK "int j;" -> (TypeKeyword::Int j)
FAIL "boolean;" -> (TypeKeyword::Bool ean) (expected Leftover:"boolean;")
So, the test cases were lacking. Your prose description is actually closer:
I'd like to make a keyword parser that matches i.e. int, but does not match int in integer with eger left over
That correctly implies you want to check the lexeme inside the type_keyword rule. A naive try might be checking that no identifier character follows the type keyword:
auto type_keyword
= x3::rule<struct tk_, Ast::TypeKeyword> {"type_keyword"}
= type_ >> !identchar;
Where identchar was factored out of identifier like so:
auto identchar = x3::char_("a-zA-Z_0-9");
auto identifier
= x3::rule<struct id_, Ast::Identifier> {"identifier"}
= x3::lexeme[x3::char_("a-zA-Z_") >> *identchar];
However, this doesn't work. Can you see why (peeking allowed: https://godbolt.org/z/jb4zfhfWb)?
Our latest devious test case now passes (yay), but int j; is now rejected! If you think about it, it only makes sense, because you have spaced skipped.
The essential word I used a moment ago was lexeme: you want to treat some units as lexemes (and whitespace stops the lexeme. Or rather, whitespace isn't automatically skipped inside the lexeme¹). So, a fix would be:
auto type_keyword
// ...
= x3::lexeme[type_ >> !identchar];
(Note how I sneakily already included that on the identifier rule earlier)
Lo and behold (Live):
OK "boolean b;" -> Leftover:"boolean b;"
OK "integer i;" -> Leftover:"integer i;"
OK "int j;" -> (TypeKeyword::Int j)
OK "boolean;" -> Leftover:"boolean;"
Summarizing
This topic is a frequently recurring one, and it requires a solid understanding of skippers, lexemes first and foremost. Here are some other posts for inspiration:
Stop X3 symbols from matching substrings
parsing identifiers except keywords
Boost Spirit x3: parse delimited string Where I introduce a more general helper you might find useful:
auto kw = [](auto p) {
return x3::lexeme [ x3::as_parser(p) >> !x3::char_("a-zA-Z0-9_") ];
};
Stop X3 symbols from matching substrings
Dynamically switching symbol tables in x3
Good luck!
Complete Listing
Anti-Bitrot, the final listing:
#include <boost/fusion/adapted.hpp>
#include <boost/fusion/include/as_vector.hpp>
#include <boost/fusion/include/io.hpp>
#include <boost/lexical_cast.hpp>
#include <boost/spirit/home/x3.hpp>
#include <iomanip>
#include <iostream>
namespace x3 = boost::spirit::x3;
namespace Ast {
enum class TypeKeyword { Int, Float, Bool };
static std::ostream& operator<<(std::ostream& os, TypeKeyword tk) {
switch (tk) {
case TypeKeyword::Int: return os << "TypeKeyword::Int";
case TypeKeyword::Float: return os << "TypeKeyword::Float";
case TypeKeyword::Bool: return os << "TypeKeyword::Bool";
};
return os << "?";
}
using Identifier = std::string;
struct Declaration {
TypeKeyword type;
Identifier id;
bool operator==(Declaration const&) const = default;
};
} // namespace Ast
BOOST_FUSION_ADAPT_STRUCT(Ast::Declaration, type, id)
namespace Ast{
static std::ostream& operator<<(std::ostream& os, Ast::Declaration const& d) {
return os << boost::lexical_cast<std::string>(boost::fusion::as_vector(d));
}
} // namespace Ast
namespace Parser {
struct Type : x3::symbols<Ast::TypeKeyword> {
Type() {
add("float", Ast::TypeKeyword::Float) //
("int", Ast::TypeKeyword::Int) //
("bool", Ast::TypeKeyword::Bool); //
}
} const static type_;
auto identchar = x3::char_("a-zA-Z_0-9");
auto identifier
= x3::rule<struct id_, Ast::Identifier> {"identifier"}
= x3::lexeme[x3::char_("a-zA-Z_") >> *identchar];
auto type_keyword
= x3::rule<struct tk_, Ast::TypeKeyword> {"type_keyword"}
= x3::lexeme[type_ >> !identchar];
auto declaration
= x3::rule<struct decl_, Ast::Declaration>{"declaration"}
= type_keyword >> identifier >> ';';
} // namespace Parser
struct LeftoverError : std::runtime_error {
using std::runtime_error::runtime_error;
friend std::ostream& operator<<(std::ostream& os, LeftoverError const& e) {
return os << (std::string("Leftover:\"") + e.what() + "\"");
}
bool operator==(LeftoverError const& other) const {
return std::string_view(what()) == other.what();
}
};
template<typename T>
using Maybe = boost::variant<T, LeftoverError>;
template <typename Rule,
typename Attr = typename x3::traits::attribute_of<Rule, x3::unused_type>::type,
typename R = Maybe<Attr>>
R parse(std::string_view input, Rule const& rule) {
Attr result;
auto f = input.begin(), l = input.end();
return x3::phrase_parse(f, l, rule, x3::space, result)
? R(std::move(result))
: LeftoverError({f, l});
}
int main()
{
using namespace Ast;
struct {
std::string_view input;
Maybe<Declaration> expected;
} cases[] = {
{"boolean b;", LeftoverError("boolean b;")},
{"integer i;", LeftoverError("integer i;")},
{"int j;", Declaration{TypeKeyword::Int, "j"}},
{"boolean;", LeftoverError("boolean;")},
};
for (auto [input, expected] : cases) {
auto actual = parse(input, Parser::declaration >> x3::eoi);
bool ok = expected == actual;
std::cout << std::left << std::setw(6) << (ok ? "OK" : "FAIL")
<< std::setw(12) << std::quoted(input) << " -> "
<< std::setw(20) << actual;
if (not ok)
std::cout << " (expected " << expected << ")";
std::cout << "\n";
}
}
¹ see Boost spirit skipper issues
I have a parser in which I want to capture certain types of whitespace as enum values and preserve the spaces for the "text" values.
My whitespace parser is pretty basic (Note: I've only added the pipe character here for test/dev purposes):
struct whitespace_p : x3::symbols<Whitespace>
{
whitespace_p()
{
add
("\n", Whitespace::NEWLINE)
("\t", Whitespace::TAB)
("|", Whitespace::PIPE)
;
}
} whitespace;
And I want to capture everything either into my enum or into std::strings:
struct Element : x3::variant<Whitespace, std::string>
{
using base_type::base_type;
using base_type::operator=;
};
And to parse my input I use something like this:
const auto contentParser
= x3::rule<class ContentParserID, Element, true> { "contentParser" }
= x3::no_skip[+(x3::char_ - (whitespace))]
| whitespace
;
using Elements = std::vector<Element>;
const auto elementsParser
= x3::rule<class ContentParserID, Elements, true> { "elementsParser" }
= contentParser >> *(contentParser);
The problem though is that the parser stops at the first tab or newline it hits.
Code: http://coliru.stacked-crooked.com/a/d2cda4ce721279a4
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
#include <iostream>
namespace x3 = boost::spirit::x3;
enum Whitespace
{
NEWLINE,
TAB,
PIPE
};
struct whitespace_p : x3::symbols<Whitespace>
{
whitespace_p()
{
add
("\n", Whitespace::NEWLINE)
("\t", Whitespace::TAB)
("|", Whitespace::PIPE)
;
}
} whitespace;
struct Element : x3::variant<Whitespace, std::string>
{
using base_type::base_type;
using base_type::operator=;
};
const auto contentParser
= x3::rule<class ContentParserID, Element, true> { "contentParser" }
= x3::no_skip[+(x3::char_ - (whitespace))]
| whitespace
;
using Elements = std::vector<Element>;
const auto elementsParser
= x3::rule<class ContentParserID, Elements, true> { "elementsParser" }
= contentParser >> *(contentParser);
struct print_visitor
: public boost::static_visitor<std::string>
{
std::string operator()(const Whitespace& ws) const
{
if (ws == Whitespace::NEWLINE)
{
return "newline";
}
else if (ws == Whitespace::PIPE)
{
return "pipe";
}
else
{
return "tab";
}
}
std::string operator()(const std::string& str) const
{
return str;
}
};
int main()
{
const std::string text = "Hello \n World";
std::string::const_iterator start = std::begin(text);
const std::string::const_iterator stop = std::end(text);
Elements elements{};
bool result =
phrase_parse(start, stop, elementsParser, x3::ascii::space, elements);
if (!result)
{
std::cout << "failed to parse!\n";
}
else if (start != stop)
{
std::cout << "unparsed: " << std::string{start, stop} << '\n';
}
else
{
for (const auto& e : elements)
{
std::cout << "element: [" << boost::apply_visitor(print_visitor{}, e) << "]\n";
}
}
}
If I parse the text Hello | World then I get the results I'm expecting. But if I instead use Hello \n World the whitespace after the \n is swallowed and the World is never parsed. Ideally I'd like to see this output:
element: [Hello ]
element: [newline]
element: [ World]
How can I accomplish this? Thank you!
My goto reference on skipper issues: Boost spirit skipper issues
In this case you made it work with no_skip[]. That's correct.
no_skip is like lexeme except it doesn't pre-skip, from the source (boost/spirit/home/x3/directive/no_skip.hpp):
// same as lexeme[], but does not pre-skip
Alternative Take
In your case I would flip the logic: just adjust the skipper itself.
Also, don't supply the skipper with phrase_parse, because your grammar is highly sensitive to the correct value of the skipper.
Your whole grammar could be:
const auto p = x3::skip(x3::space - whitespace) [
*(+x3::graph | whitespace)
];
Here's a Live Demo On Coliru
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
#include <iostream>
#include <iomanip>
namespace x3 = boost::spirit::x3;
enum Whitespace { NEWLINE, TAB, PIPE };
struct whitespace_p : x3::symbols<Whitespace> {
whitespace_p() {
add
("\n", Whitespace::NEWLINE)
("\t", Whitespace::TAB)
("|", Whitespace::PIPE)
;
}
} static const whitespace;
struct Element : x3::variant<Whitespace, std::string> {
using base_type::base_type;
using base_type::operator=;
};
using Elements = std::vector<Element>;
static inline std::ostream& operator<<(std::ostream& os, Element const& el) {
struct print_visitor {
std::ostream& os;
auto& operator()(Whitespace ws) const {
switch(ws) {
case Whitespace::NEWLINE: return os << "[newline]";
case Whitespace::PIPE: return os << "[pipe]";
case Whitespace::TAB: return os << "[tab]";
}
return os << "?";
}
auto& operator()(const std::string& str) const { return os << std::quoted(str); }
} vis{os};
return boost::apply_visitor(vis, el);
}
int main() {
std::string const text = "\tHello \n World";
auto start = begin(text), stop = end(text);
const auto p = x3::skip(x3::space - whitespace) [
*(+x3::graph | whitespace)
];
Elements elements;
if (!parse(start, stop, p, elements)) {
std::cout << "failed to parse!\n";
} else {
std::copy(begin(elements), end(elements), std::ostream_iterator<Element>(std::cout, "\n"));
}
if (start != stop) {
std::cout << "unparsed: " << std::quoted(std::string(start, stop)) << '\n';
}
}
Prints
[tab]
"Hello"
[newline]
"World"
Even Simpler?
It doesn't seem like you'd need any skipper here at all. Why not:
const auto p = *(+~x3::char_("\n\t|") | whitespace);
While we're at it, there's no need for symbols to map enums:
struct Element : x3::variant<char, std::string> {
// ...
};
using Elements = std::vector<Element>;
And then
const auto p
= x3::rule<struct ID, Element> {}
= +~x3::char_("\n\t|") | x3::char_;
Live On Coliru
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
#include <iostream>
#include <iomanip>
namespace x3 = boost::spirit::x3;
struct Element : x3::variant<char, std::string> {
using variant = x3::variant<char, std::string>;
using variant::variant;
using variant::operator=;
friend std::ostream& operator<<(std::ostream& os, Element const& el) {
struct print_visitor {
std::ostream& os;
auto& operator()(char ws) const {
switch(ws) {
case '\n': return os << "[newline]";
case '\t': return os << "[pipe]";
case '|': return os << "[tab]";
}
return os << "?";
}
auto& operator()(const std::string& str) const { return os << std::quoted(str); }
} vis{os};
return boost::apply_visitor(vis, el);
}
};
using Elements = std::vector<Element>;
int main() {
std::string const text = "\tHello \n World";
auto start = begin(text);
auto const stop = end(text);
Elements elements;
const auto p
= x3::rule<struct ID, Element> {}
= +~x3::char_("\n\t|") | x3::char_;
if (!parse(start, stop, *p, elements)) {
std::cout << "failed to parse!\n";
} else {
std::copy(begin(elements), end(elements), std::ostream_iterator<Element>(std::cout, "\n"));
}
if (start != stop) {
std::cout << "unparsed: " << std::quoted(std::string(start, stop)) << '\n';
}
}
Prints
[pipe]
"Hello "
[newline]
" World"
The problems are that you are using a phrase_parser instead of a parser at line 76.
Try to use something like
bool result =
parse(start, stop, elementsParser, elements);
Your phrase_parser was instructed to skip spaces, what you really don't want.
Look the first answer of How to use boost::spirit to parse a sequence of words into a vector?
I tried to run some simple parser that will parse [ 1, 11, 3, 6-4]. Basically, integer list with range notation.
I want to put everything into AST without semantic action. So I use x3::variant. My code 'seems' very similar to the expression example. However, it can't compile under g++ 6.2. It indeed compile ok with clang++ 6.0 but yield wrong result.
The boost version is 1.63.
It seems that I have some 'move' or initialization issue.
#include <iostream>
#include <list>
#include <vector>
#include <utility>
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/adapted/std_pair.hpp>
#include <boost/fusion/include/io.hpp>
namespace ns
{
namespace ast
{
namespace x3 = boost::spirit::x3;
// forward definition
class uintObj;
struct varVec;
// define type
using uintPair_t = std::pair<unsigned int, unsigned int>;
using uintVec_t = std::vector<uintObj>;
// general token value:
class uintObj : public x3::variant <
unsigned int,
uintPair_t
>
{
public:
using base_type::base_type;
using base_type::operator=;
};
struct varVec
{
uintVec_t valVector;
};
}
}
BOOST_FUSION_ADAPT_STRUCT(
ns::ast::varVec,
valVector
)
namespace ns
{
namespace parser
{
// namespace x3 = boost::spirit::x3;
// using namespace x3;
using namespace boost::spirit::x3;
// definition of the range pair:
rule<class uintPair, ast::uintPair_t> const uintPair = "uintPair";
auto const uintPair_def =
uint_
>> '-'
>> uint_
;
rule<class uintObj, ast::uintObj> const uintObj = "uintObj";
auto const uintObj_def =
uint_
| uintPair
;
// define rule definition : rule<ID, attrib>
// more terse definition :
// struct varVec_class;
// using varVec_rule_t = x3::rule<varVec_class, ast::varVec>;
// varVec_rule_t const varVec = "varVec";
// varVec is the rule, "varVec" is the string name of the rule.
rule<class varVec, ast::varVec> const varVec = "varVec";
auto const varVec_def =
'['
>> uintObj % ','
>> ']'
;
BOOST_SPIRIT_DEFINE(
varVec,
uintObj,
uintPair
);
}
}
int main()
{
std::string input ("[1, 11, 3, 6-4]\n");
std::string::const_iterator begin = input.begin();
std::string::const_iterator end = input.end();
ns::ast::varVec result; // ast tree
using ns::parser::varVec; // grammar
using boost::spirit::x3::ascii::space;
bool success = phrase_parse(begin, end, varVec, space, result);
if (success && begin == end)
std::cout << "good" << std::endl;
else
std::cout << "bad" << std::endl;
return 0;
}
Swap the alternative order for the uintObj_def
auto const uintObj_def =
uintPair
| uint_
;
The formulation you have now will always match on a uint_ because the uintPair begins with a valid uint_.
mjcaisse's answer calls out the main problem I think you had. There were a few missing pieces, so I decided to make a simplified version that shows parsing results:
Live On Wandbox
#include <iostream>
#include <iomanip>
//#include <boost/fusion/adapted.hpp>
//#include <boost/fusion/include/io.hpp>
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
namespace x3 = boost::spirit::x3;
namespace ns { namespace ast {
// forward definition
struct uintObj;
//struct varVec;
// define type
using uintPair_t = std::pair<unsigned int, unsigned int>;
using uintVec_t = std::vector<uintObj>;
// general token value:
struct uintObj : x3::variant<unsigned int, uintPair_t> {
using base_type::base_type;
using base_type::operator=;
friend std::ostream& operator<<(std::ostream& os, uintObj const& This) {
struct {
std::ostream& os;
void operator()(unsigned int v) const { os << v; }
void operator()(uintPair_t v) const { os << v.first << "-" << v.second; }
} vis { os };
boost::apply_visitor(vis, This);
return os;
}
};
using varVec = uintVec_t;
} }
namespace ns { namespace parser {
using namespace boost::spirit::x3;
template <typename T> auto as = [](auto p) { return rule<struct _, T> {} = p; };
auto const uintPair = as<ast::uintPair_t> ( uint_ >> '-' >> uint_ );
auto const uintObj = as<ast::uintObj> ( uintPair | uint_ );
auto const varVec = as<ast::varVec> ( '[' >> uintObj % ',' >> ']' );
} }
int main() {
using namespace ns;
std::string const input("[1, 11, 3, 6-4]\n");
auto begin = input.begin(), end = input.end();
ast::varVec result; // ast tree
bool success = phrase_parse(begin, end, parser::varVec, x3::ascii::space, result);
if (success) {
std::cout << "good\n";
for (auto& r : result)
std::cout << r << "\n";
}
else
std::cout << "bad\n";
if (begin != end)
std::cout << "Remaining unparsed: " << std::quoted(std::string(begin, end)) << std::endl;
}
Prints
good
1
11
3
6-4
In the grammar below, when I add the alternative (| property) to the start rule, I get this error
'boost::spirit::x3::traits::detail::move_to': none of the 3 overloads
could convert all the argument types
e:\data\boost\boost_1_65_1\boost\spirit\home\x3\support\traits\move_to.hpp 180
I suspect that the problem is that the property attribute is a struct, and property_list is a vector (shouldn't x3 create a vector of one entry?). What is the recommended way to design the AST structures to support alternatives? A Boost variant?
#include <string>
#include <vector>
#include <iostream>
#include <iomanip>
#include <map>
#pragma warning(push)
#pragma warning(disable : 4348)
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/home/x3.hpp>
#include <boost/variant.hpp>
#include <boost/fusion/adapted/struct.hpp>
#pragma warning(pop)
namespace x3 = boost::spirit::x3;
namespace scl_ast
{
struct KEYWORD : std::string
{
using std::string::string;
using std::string::operator=;
};
struct NIL
{
};
using VALUE = boost::variant <NIL, std::string, int, double, KEYWORD>;
struct PROPERTY
{
KEYWORD name;
VALUE value;
};
static inline std::ostream& operator<< (std::ostream& os, VALUE const& v)
{
struct
{
std::ostream& _os;
void operator () (std::string const& s) const { _os << std::quoted (s); }
void operator () (int i) const { _os << i; }
void operator () (double d) const { _os << d; }
void operator () (KEYWORD const& k) const { _os << k; }
void operator () (NIL) const { }
} vis { os };
boost::apply_visitor (vis, v);
return os;
}
static inline std::ostream& operator<< (std::ostream& os, PROPERTY const& prop)
{
os << prop.name;
if (prop.value.which ())
{
os << "=" << prop.value;
}
return os;
}
static inline std::ostream& operator<< (std::ostream& os, std::vector <PROPERTY> const& props)
{
for (auto const& prop : props)
{
os << prop << " ";
}
return os;
}
}; // End namespace scl_ast
BOOST_FUSION_ADAPT_STRUCT (scl_ast::PROPERTY, name, value)
//
// Keyword-value grammar for simple command language
//
namespace scl
{
using namespace x3;
auto const keyword = rule <struct _keyword, std::string> { "keyword" }
= lexeme [+char_ ("a-zA-Z0-9$_")];
auto const quoted_string
= lexeme ['"' >> *('\\' > char_ | ~char_ ('"')) >> '"'];
auto const value
= quoted_string
| x3::real_parser<double, x3::strict_real_policies<double>>{}
| x3::int_
| keyword;
auto const property = rule <struct _property, scl_ast::PROPERTY> { "property" }
= keyword >> -(("=" >> value));
auto const property_list = rule <struct _property_list, std::vector <scl_ast::PROPERTY>> { "property_list" }
= lit ('(') >> property % ',' >> lit (')');
auto const start = skip (blank) [property_list | property];
}; // End namespace scl
int
main ()
{
std::vector <std::string> input =
{
"(abc=1.,def=.5,ghi=2.0)",
"(ghi = 1, jkl = 3)",
"(abc,def=1,ghi=2.4,jkl=\"mno 123\", pqr = stu)",
"(abc = test, def, ghi=2)",
"abc=1",
"def = 2.7",
"ghi"
};
for (auto const& str : input)
{
std::vector <scl_ast::PROPERTY> result;
auto b = str.begin (), e = str.end ();
bool ok = x3::parse (b, e, scl::start, result);
std::cout << (ok ? "OK" : "FAIL") << '\t' << std::quoted (str) << std::endl;
if (ok)
{
std::cout << " -- Parsed: " << result << std::endl;
if (b != e)
{
std::cout << " -- Unparsed: " << std::quoted (std::string (b, e)) << std::endl;
}
}
std::cout << std::endl;
} // End for
return 0;
} // End main
I suspect that the problem is that the property attribute is a struct, and property_list is a vector (shouldn't x3 create a vector of one entry?)
Yes, yes, and yes, depending.
I see roughly 3 approaches to tackle this. Let's start with the simples:
Force Container-like synthesis: Live On Coliru
= skip(blank)[property_list | repeat(1)[property] ];
Coercing the attribute type. Turns out I was wrong here: It used to work for Qi, but apparently that has been dropped. Here it is, not-working and all:
auto coerce = [](auto p) { return rule<struct _, std::vector<scl_ast::PROPERTY> > {} = p; };
auto const start
= skip(blank)[property_list | coerce(property)];
Third is actually moot because the same problem. So I guess I owe you a contrived workaround, using semantic actions: Live On Coliru
auto push_back = [](auto& ctx) {
_val(ctx).push_back(_attr(ctx));
};
auto const start
= rule<struct _start, std::vector<scl_ast::PROPERTY>, true>{ "start" }
= skip(blank)[property_list | omit[property[push_back]]];