I write a minimum example to demonstrate this problem. It parses nested list of numbers like (1 2 3 (4 5) (6 (7 (8)))). I use spirit::lex to parse number and spirit::qi to parse list, so I code like this:
using TokenTypes = boost::mpl::vector<Object*>;
using Iterator = std::string::iterator;
class Lexer : public lex::lexer<actor_lexer<token<Iterator, TokenTypes>>>
{
public:
lex::token_def<> spaces; // used to skip spaces
lex::token_def<Object*> number; // create Number Object on heap and use the pointer as attribute
public:
Lexer();
};
template<typename... Ts>
using Rule = qi::rule<Lexer::iterator_type, Ts...>;
class Parser : public qi::grammar<Lexer::iterator_type, Object*>
{
public:
Lexer lexer;
Rule<Object*> list;
Rule<Object*> elem;
public:
Parser();
};
But in Parser::Parser(), I can't use Lexer::number in gramma expression:
Parser::Parser()
: base_type(elem)
{
// list = ...
elem %= list | lexer.number; // fail to compile!
}
Clang error message (brief):
/usr/include/boost/spirit/home/qi/detail/assign_to.hpp:42:36: error: type 'Object *' cannot be used prior to '::' because it has no members
: is_iter_range<typename C::value_type> {};
^
...
...
...
I can't understand why this is wrong considering it used to work fine when I use other scalar types like int and double as token attribute.
So, how to use pointer type as token attribute?
complete example
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/qi.hpp>
#include <iostream>
#include <string>
#include <vector>
class Object
{
public:
virtual ~Object() = default;
public:
virtual void print(std::ostream& out) = 0;
};
class Number : public Object
{
public:
int64_t _val;
public:
virtual void print(std::ostream& out) override { out << _val; }
};
class List : public Object
{
public:
std::vector<Object*> _objs;
public:
virtual void print(std::ostream& out) override
{
out << '(';
for (auto&& i : _objs) {
i->print(out);
out << ' ';
}
out << ')';
}
};
namespace qi = boost::spirit::qi;
namespace fu = boost::fusion;
namespace lex = boost::spirit::lex;
using lex::lexertl::actor_lexer;
using lex::lexertl::token;
using TokenTypes = boost::mpl::vector<Object*>;
using Iterator = std::string::iterator;
class Lexer : public lex::lexer<actor_lexer<token<Iterator, TokenTypes>>>
{
public:
lex::token_def<> spaces;
lex::token_def<Object*> number;
public:
Lexer();
};
template<typename... Ts>
using Rule = qi::rule<Lexer::iterator_type, Ts...>;
class Parser : public qi::grammar<Lexer::iterator_type, Object*>
{
public:
Lexer lexer;
Rule<Object*, qi::locals<List*>> list;
Rule<Object*> elem;
public:
Parser();
};
Lexer::Lexer()
{
self += '(';
self += ')';
spaces = R"(\s+)";
self +=
spaces[([](auto& start, auto& end, auto& matched, auto& id, auto& ctx) {
matched = lex::pass_flags::pass_ignore;
})];
number = R"(\d+)";
self +=
number[([](auto& start, auto& end, auto& matched, auto& id, auto& ctx) {
auto val = new Number();
auto iter = start;
qi::parse(iter, end, qi::long_long, val->_val);
ctx.set_value(val);
})];
}
Parser::Parser()
: base_type(elem)
{
list = ( //
qi::lit('(')[( //
[](auto& attr, auto& ctx, bool& pass) {
fu::at_c<0>(ctx.locals) = new List();
})] //
>> *(elem[( //
[](auto& attr, auto& ctx, bool& pass) {
List* list = fu::at_c<0>(ctx.locals);
list->_objs.push_back(attr);
})]) //
>> ')' //
)[( //
[](auto& attr, auto& ctx, bool& pass) {
List* list = fu::at_c<0>(ctx.locals);
fu::at_c<0>(ctx.attributes) = list;
})];
elem %= list | lexer.number;
}
int
main(int argc, char* argv[])
{
Parser parser;
std::string line;
while (std::getline(std::cin, line)) {
auto begin = line.begin();
Object* obj;
lex::tokenize_and_parse(begin, line.end(), parser.lexer, parser, obj);
obj->print(std::cout);
std::cout << std::endl;
}
}
Okay. Don't take this badly. Reading your sample (kudos for including a self-contained example! This saves a ton of time) I can't help but feeling that you've somehow stumbled on the worst possible cross-section of anti-patterns in Spirit Qi.
You're using a polymorphic AST:
How can I use polymorphic attributes with boost::spirit::qi parsers?
Semantic actions runs multiple times in boost::spirit parsing
Parsing inherited struct with boost spirit
You're using semantic actions. As a rule this already misses the sweet spot for embedded grammars, which is why I linked 126 answers to Boost Spirit: "Semantic actions are evil"?.
However, that's even just talking about semantic actions for Qi. You also use them for Lex:
self +=
spaces[([](auto& start, auto& end, auto& matched, auto& id,
auto& ctx) { matched = lex::pass_flags::pass_ignore; })];
Which is then further complicated by not using Phoenix, e.g.:
self += spaces[lex::_pass = lex::pass_flags::pass_ignore];
Which does exactly the same but with about 870% less noise and equal amounts of evil magic.
The other semantic action tops it all:
self += number[(
[](auto& start, auto& end, auto& matched, auto& id, auto& ctx) {
auto val = new Number();
auto iter = start;
qi::parse(iter, end, qi::long_long, val->_val);
ctx.set_value(val);
})];
Besides having all the problems already listed, it literally makes a fractal out of things by calling Qi from a Lex semantic action. Of course, this wants to be:
self += number[lex::_val = phx::new_<Number>(/*magic*/)];
But that magic doesn't exist. My gut feeling is that your issue that the Lexer shouldn't be concerned with AST types at all. At this point I feel that the lexer could/should be something like
using TokenTypes = boost::mpl::vector<uint64_t>;
using Iterator = std::string::const_iterator; // NOTE const_
struct Lexer : lex::lexer<actor_lexer<token<Iterator, TokenTypes>>> {
lex::token_def<> spaces;
lex::token_def<uint64_t> number;
Lexer() : spaces{R"(\s+)"}, number{R"(\d+)"} {
self += '(';
self += ')';
self += spaces[lex::_pass = lex::pass_flags::pass_ignore];
self += number;
}
};
That is, if it should exist at all.
That's the structural assessment. Let me apply simplifications to the Qi grammar along the same lines, just so we can reason about the code:
struct Parser : qi::grammar<Lexer::iterator_type, Object*()> {
Parser() : base_type(elem) {
using namespace qi::labels;
static constexpr qi::_a_type _list{};
const auto _objs = phx::bind(&List::_objs, _list);
list = ( //
'(' >> //
*(elem[phx::push_back(_objs, _1)]) //
>> ')' //
)[_val = phx::new_<List>(_list)];
elem //
= list[_val = _1] //
| lexer.number[_val = phx::new_<Number>(_1)];
}
Lexer lexer; // TODO FIXME excess scope
private:
using It = Lexer::iterator_type;
qi::rule<It, Object*(), qi::locals<List>> list;
qi::rule<It, Object*()> elem;
};
Note how I made the local List instead of List*, to just slightly reduce the chance of memory leaks. I guess for efficiency you could try to make Phoenix do move-semantics for you:
[_val = phx::new_<List>(phx::static_cast_<List&&>(_list))];
But at that point I wouldn't trust all the expression templates to do what you want and go to the more elaborate (even assuming c++17):
phx::function move_new = [](List& l) { return new List(std::move(l)); };
list = ( //
'(' >> //
*(elem[phx::push_back(_objs, _1)]) //
>> ')' //
)[_val = move_new(_list)];
Now we arrive at a workable demo:
Live On Coliru
int main() {
Parser parser;
for (std::string const line : {
"",
"42",
"()",
"(1 2 3)",
"(1 (44 55 () 66) 3)",
}) {
auto begin = line.begin();
Object* obj = nullptr;
if (lex::tokenize_and_parse(begin, line.end(), parser.lexer, parser,
obj)) {
obj->print(std::cout << std::quoted(line) << " -> ");
delete obj;
} else {
std::cout << std::quoted(line) << " -> FAILED";
}
std::cout << std::endl;
}
}
Printing
"" -> FAILED
"42" -> 42
"()" -> ()
"(1 2 3)" -> (1 2 3 )
"(1 (44 55 () 66) 3)" -> (1 (44 55 () 66 ) 3 )
Note that this simple test program ALREADY leaks 11 objects, for a total of 224 bytes. That's not even complicating things with error-handling or backtracking rules.
That's craziness. You could of course fix it with smart pointers, but that just further complicates everything while making sure performance will be very poor.
Further Simplifications
I would stop using Lex and dynamic polymorphism:
No More Lex:
The only "value" Lex is adding here is skipping spaces. Qi is very capable (see e.g. Boost spirit skipper issues for variations on that theme), so we'll use skip(space)[] instead:
Live On Coliru
#include <boost/phoenix.hpp>
#include <boost/spirit/include/qi.hpp>
#include <iomanip>
#include <iostream>
#include <string>
#include <vector>
struct Object {
virtual ~Object() = default;
virtual void print(std::ostream& out) const = 0;
friend std::ostream& operator<<(std::ostream& os, Object const& o) { return o.print(os), os; }
};
struct Number : Object {
Number(uint64_t v = 0) : _val(v) {}
int64_t _val;
virtual void print(std::ostream& out) const override { out << _val; }
};
struct List : Object {
std::vector<Object*> _objs;
virtual void print(std::ostream& out) const override {
out << '(';
for (auto&& el : _objs)
out << ' ' << *el;
out << ')';
}
};
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;
template <typename It>
struct Parser : qi::grammar<It, Object*()> {
Parser() : Parser::base_type(start) {
using namespace qi::labels;
static constexpr qi::_a_type _list{};
const auto _objs = phx::bind(&List::_objs, _list);
phx::function move_new = [](List& l) { return new List(std::move(l)); };
list = ( //
'(' >> //
*(elem[phx::push_back(_objs, _1)]) //
>> ')' //
)[_val = move_new(_list)];
elem //
= list[_val = _1] //
| qi::uint_[_val = phx::new_<Number>(_1)] //
;
start = qi::skip(qi::space)[elem];
}
private:
qi::rule<It, Object*(), qi::space_type, qi::locals<List>> list;
qi::rule<It, Object*(), qi::space_type> elem;
// lexemes
qi::rule<It, Object*()> start;
};
int main() {
Parser<std::string::const_iterator> const parser;
for (std::string const line : {
"",
"42",
"()",
"(1 2 3)",
"(1 (44 55 () 66) 3)",
}) {
Object* obj = nullptr;
if (parse(line.begin(), line.end(), parser >> qi::eoi, obj)) {
std::cout << std::quoted(line) << " -> " << *obj;
} else {
std::cout << std::quoted(line) << " -> FAILED";
}
delete obj;
std::cout << std::endl;
}
}
Still leaking like C++ went out of fashion, but at least doing so in 20 fewer LoC and half the compile time.
Static Polymorphism
Hiding all the raw pointer stuff (or avoiding it completely, depending on the exact AST requirements):
using Number = uint64_t;
using Object = boost::make_recursive_variant< //
Number, //
std::vector<boost::recursive_variant_>>::type;
using List = std::vector<Object>;
For ease of supplying operator<< I moved them into an AST namespace below.
The parser goes down to:
template <typename It> struct Parser : qi::grammar<It, AST::Object()> {
Parser() : Parser::base_type(start) {
list = '(' >> *elem >> ')';
elem = list | qi::uint_;
start = qi::skip(qi::space)[elem];
}
private:
qi::rule<It, AST::List(), qi::space_type> list;
qi::rule<It, AST::Object(), qi::space_type> elem;
qi::rule<It, AST::Object()> start;
};
No more lex, no more phoenix, no more leaks, no more manual semantic actions. Just, expressive code.
Live Demo
Live On Coliru
#include <boost/spirit/include/qi.hpp>
#include <iomanip>
#include <iostream>
namespace AST {
struct Number {
uint64_t v;
Number(uint64_t v = 0) : v(v){};
};
using Object = boost::make_recursive_variant< //
Number, //
std::vector<boost::recursive_variant_>>::type;
using List = std::vector<Object>;
std::ostream& operator<<(std::ostream& os, Number const& n) {
return os << n.v;
}
std::ostream& operator<<(std::ostream& os, List const& l) {
os << '(';
for (auto& el : l)
os << ' ' << el;
return os << ')';
}
} // namespace AST
namespace qi = boost::spirit::qi;
template <typename It> struct Parser : qi::grammar<It, AST::Object()> {
Parser() : Parser::base_type(start) {
list = '(' >> *elem >> ')';
elem = list | qi::uint_;
start = qi::skip(qi::space)[elem];
}
private:
qi::rule<It, AST::List(), qi::space_type> list;
qi::rule<It, AST::Object(), qi::space_type> elem;
qi::rule<It, AST::Object()> start;
};
int main() {
Parser<std::string::const_iterator> const parser;
for (std::string const line : {
"",
"42",
"()",
"(1 2 3)",
"(1 (44 55 () 66) 3)",
}) {
AST::Object obj;
if (parse(line.begin(), line.end(), parser >> qi::eoi, obj))
std::cout << std::quoted(line) << " -> " << obj << "\n";
else
std::cout << std::quoted(line) << " -> FAILED\n";
}
}
Prints
"" -> FAILED
"42" -> 42
"()" -> ()
"(1 2 3)" -> ( 1 2 3)
"(1 (44 55 () 66) 3)" -> ( 1 ( 44 55 () 66) 3)
But this time, without leaking memory. And also, it now compiles fast enough that Compiler Explorer can also handle it.
Found a walkaround: use std::size_t and reinterpret_cast to replace pointer types:
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/qi.hpp>
#include <iostream>
#include <string>
#include <vector>
class Object
{
public:
virtual ~Object() = default;
public:
virtual void print(std::ostream& out) = 0;
};
class Number : public Object
{
public:
int64_t _val;
public:
virtual void print(std::ostream& out) override { out << _val; }
};
class List : public Object
{
public:
std::vector<Object*> _objs;
public:
virtual void print(std::ostream& out) override
{
out << '(';
for (auto&& i : _objs) {
i->print(out);
out << ' ';
}
out << ')';
}
};
namespace qi = boost::spirit::qi;
namespace fu = boost::fusion;
namespace lex = boost::spirit::lex;
using lex::lexertl::actor_lexer;
using lex::lexertl::token;
using TokenTypes = boost::mpl::vector<std::size_t>;
using Iterator = std::string::iterator;
class Lexer : public lex::lexer<actor_lexer<token<Iterator, TokenTypes>>>
{
public:
lex::token_def<> spaces;
lex::token_def<std::size_t> number; // use std::size_t instead
public:
Lexer();
};
template<typename... Ts>
using Rule = qi::rule<Lexer::iterator_type, Ts...>;
class Parser : public qi::grammar<Lexer::iterator_type, Object*>
{
public:
Lexer lexer;
Rule<Object*, qi::locals<List*>> list;
Rule<Object*> elem;
public:
Parser();
};
Lexer::Lexer()
{
self += '(';
self += ')';
spaces = R"(\s+)";
self +=
spaces[([](auto& start, auto& end, auto& matched, auto& id, auto& ctx) {
matched = lex::pass_flags::pass_ignore;
})];
number = R"(\d+)";
self +=
number[([](auto& start, auto& end, auto& matched, auto& id, auto& ctx) {
auto val = new Number();
auto iter = start;
qi::parse(iter, end, qi::long_long, val->_val);
ctx.set_value(reinterpret_cast<std::size_t>(val)); // cast here
})];
}
Parser::Parser()
: base_type(elem)
{
list = ( //
qi::lit('(')[( //
[](auto& attr, auto& ctx, bool& pass) {
fu::at_c<0>(ctx.locals) = new List();
})] //
>> *(elem[( //
[](auto& attr, auto& ctx, bool& pass) {
List* list = fu::at_c<0>(ctx.locals);
list->_objs.push_back(attr);
})]) //
>> ')' //
)[( //
[](auto& attr, auto& ctx, bool& pass) {
List* list = fu::at_c<0>(ctx.locals);
fu::at_c<0>(ctx.attributes) = list;
})];
elem %= list | qi::omit[lexer.number[([](auto& attr, auto& ctx, bool& pass) {
fu::at_c<0>(ctx.attributes) = reinterpret_cast<Object*>(attr); // cast here
})]];
}
int
main(int argc, char* argv[])
{
Parser parser;
std::string line;
while (std::getline(std::cin, line)) {
auto begin = line.begin();
Object* obj;
lex::tokenize_and_parse(begin, line.end(), parser.lexer, parser, obj);
obj->print(std::cout);
std::cout << std::endl;
}
}
I think this is really ugly. Anyone has a better solution???
Related
What is the algorithm for developing a string parser to create a geometry? The geometry is generated in 2 steps: at the first step, we create primitives; at the second, we combine primitives into objects.
The syntax is presented in the string below.
string str="[GEOMETRY]
PRIMITIVE1=SPHERE(RADIUS=5.5);
PRIMITIVE2=BOX(A=-5.2, B=7.3);
//...
OBJECT1=PRIMITIVE2*(-PRIMITIVE1);
//..."
class PRIMITIVE{
int number;
public:
Primitive& operator+ (Primitive& primitive) {}; //overloading arithmetic operations
Primitive& operator* (Primitive& primitive) {};
Primitive& operator- (Primitive& primitive) {};
virtual bool check_in_point_inside_primitive = 0;
};
class SPHERE:public PRIMITIVE{
double m_radius;
public:
SPHERE(double radius): m_radius(radius) {}; //In which part of the parser to create objects?
bool check_in_point_inside_sphere(Point& point){};
};
class BOX:public PRIMITIVE{
double m_A;
double m_B;
public:
BOX(double A, double B): m_A(A), m_B(B) {};
bool check_in_point_inside_box(Point& point){};
};
class OBJECT{
int number;
PRIMITIVE& primitive;
public:
OBJECT(){};
bool check_in_point_inside_object(Primitive& PRIMITIVE1, Primitive& PRIMITIVE2, Point& point){
//>How to construct a function from an expression 'PRIMITIVE2*(-PRIMITIVE1)' when parsing?
}
};
How to analyze the string PRIMITIVE1=SPHERE(RADIUS=5.5) and pass a parameter to the constructor of SPHERE()? How to identify this object with the name PRIMITIVE 1 to call to it in OBJECT? Is it possible to create a pair<PRIMITIVE1,SPHERE(5.5)> and store all primitives in map?
How to parse the string of the OBJECT1 and to construct a function from an expression PRIMITIVE2*(-PRIMITIVE1) inside an OBJECT1? This expression will be required multiple times when determining the position of each point relative to the object.
How to use boost::spirit for this task? Tokenize a string using boost::spirit::lex, and then develop rules using boost::spirit::qi?
As a finger exercise, and despite the serious problems I see with the chosen virtual type hierarchy, let's try to make a value-oriented container of Primitives that can be indexed by their id (ById):
Live On Coliru
#include <boost/intrusive/set.hpp>
#include <boost/poly_collection/base_collection.hpp>
#include <iostream>
namespace bi = boost::intrusive;
struct Point {
};
using IndexHook = bi::set_member_hook<bi::link_mode<bi::auto_unlink>>;
class Primitive {
int _id;
public:
struct ById {
bool operator()(auto const&... oper) const { return std::less<>{}(access(oper)...); }
private:
static int access(int id) { return id; }
static int access(Primitive const& p) { return p._id; }
};
IndexHook _index;
Primitive(int id) : _id(id) {}
virtual ~Primitive() = default;
int id() const { return _id; }
Primitive& operator+= (Primitive const& primitive) { return *this; } //overloading arithmetic operations
Primitive& operator*= (Primitive const& primitive) { return *this; }
Primitive& operator-= (Primitive const& primitive) { return *this; }
virtual bool check_in_point_inside(Point const&) const = 0;
};
using Index =
bi::set<Primitive, bi::constant_time_size<false>,
bi::compare<Primitive::ById>,
bi::member_hook<Primitive, IndexHook, &Primitive::_index>>;
class Sphere : public Primitive {
double _radius;
public:
Sphere(int id, double radius)
: Primitive(id)
, _radius(radius) {} // In which part of the parser to create objects?
bool check_in_point_inside(Point const& point) const override { return false; }
};
class Box : public Primitive {
double _A;
double _B;
public:
Box(int id, double A, double B) : Primitive(id), _A(A), _B(B) {}
bool check_in_point_inside(Point const& point) const override { return false; }
};
class Object{
int _id;
Primitive& _primitive;
public:
Object(int id, Primitive& p) : _id(id), _primitive(p) {}
bool check_in_point_inside_object(Primitive const& p1, Primitive const& p2,
Point const& point) const
{
//>How to construct a function from an expression
//'PRIMITIVE2*(-PRIMITIVE1)' when parsing?
return false;
}
};
using Primitives = boost::poly_collection::base_collection<Primitive>;
int main() {
Primitives test;
test.insert(Sphere{2, 4.0});
test.insert(Sphere{4, 4.0});
test.insert(Box{2, 5, 6});
test.insert(Sphere{1, 4.0});
test.insert(Box{3, 5, 6});
Index idx;
for (auto& p : test)
if (not idx.insert(p).second)
std::cout << "Duplicate id " << p.id() << " not indexed\n";
for (auto& p : idx)
std::cout << typeid(p).name() << " " << p.id() << "\n";
std::cout << "---\n";
for (auto& p : test)
std::cout << typeid(p).name() << " " << p.id() << "\n";
}
Prints
Duplicate id 2 not indexed
6Sphere 1
3Box 2
3Box 3
6Sphere 4
---
3Box 2
3Box 3
6Sphere 2
6Sphere 4
6Sphere 1
So far so good. This is an important building block to prevent all manner of pain when dealing with virtual types in Spirit grammars¹
PS: I've since dropped the idea of intrusive_set. It doesn't work because the base_container moves items around on reallocation, and that unlinks the items from their intrusive set.
Instead, see below for an approach that doesn't try to resolve ids during the parse.
Parsing primitives
We get the ID from the PRIMITIVE1. We could store it somewhere before naturally parsing the primitives themselves, then set the id on it on commit.
Let's start with defining a State object for the parser:
struct State {
Ast::Id next_id;
Ast::Primitives primitives;
Ast::Objects objects;
template <typename... T> void commit(boost::variant<T...>& val) {
boost::apply_visitor([this](auto& obj) { commit(obj); }, val);
}
template <typename T> void commit(T& primitiveOrExpr) {
auto id = std::exchange(next_id, 0);
if constexpr (std::is_base_of_v<Ast::Primitive, T>) {
primitiveOrExpr.id = id;
primitives.insert(std::move(primitiveOrExpr));
} else {
objects.push_back(Ast::Object{id, std::move(primitiveOrExpr)});
}
}
};
As you can see, we just have a place to store the primitives, objects. And then there is the temporary storage for our next_id while we're still parsing the next entity.
The commit function helps sorting the products of the parser rules. As it happens, they can be variant, which is why we have the apply_visitor dispatch for commit on a variant.
Again, as the footnote¹ explains, Spirit's natural attribute synthesis favors static polymorphism.
The semantic actions we need are now:
static inline auto& state(auto& ctx) { return get<State>(ctx); }
auto draft = [](auto& ctx) { state(ctx).next_id = _attr(ctx); };
auto commit = [](auto& ctx) { state(ctx).commit(_attr(ctx)); };
Now let's jump ahead to the primitives:
auto sphere = as<Ast::Sphere>(eps >> "sphere" >>'(' >> param("radius") >> ')');
auto box = as<Ast::Box>(eps >> "box" >> '(' >> param('a') >> ',' >> param('b') >> ')');
auto primitive =
("primitive" >> uint_[draft] >> '=' >> (sphere | box)[commit]) > ';';
That's still cheating a little, as I've used the param helper to reduce typing:
auto number = as<Ast::Number>(double_, "number");
auto param(auto name, auto p) { return eps >> omit[name] >> '=' >> p; }
auto param(auto name) { return param(name, number); }
As you can see I've already assumed most parameters will have numerical nature.
What Are Objects Really?
Looking at it for a while, I concluded that really an Object is defined as an id number (OBJECT1, OBJECT2...) which is tied to an expression. The expression can reference primitives and have some unary and binary operators.
Let's sketch an AST for that:
using Number = double;
struct RefPrimitive { Id id; };
struct Binary;
struct Unary;
using Expr = boost::variant< //
Number, //
RefPrimitive, //
boost::recursive_wrapper<Unary>, //
boost::recursive_wrapper<Binary> //
>;
struct Unary { char op; Expr oper; };
struct Binary { Expr lhs; char op; Expr rhs; };
struct Object { Id id; Expr expr; };
Now To Parse Into That Expression AST
It's really 1:1 rules for each Ast node type. E.g.:
auto ref_prim = as<Ast::RefPrimitive>(lexeme["primitive" >> uint_]);
Now many of the expression rules can recurse, so we need declared rules with definitions via BOOST_SPIRIT_DEFINE:
// object expression grammar
rule<struct simple_tag, Ast::Expr> simple{"simple"};
rule<struct unary_tag, Ast::Unary> unary{"unary"};
rule<struct expr_tag, Ast::Expr> expr{"expr"};
rule<struct term_tag, Ast::Expr> term{"term"};
rule<struct factor_tag, Ast::Expr> factor{"factor"};
As you can tell, some of these are not 1:1 with the Ast nodes, mainly because of the recursion and the difference in operator precedence (term vs factor vs. simple). It's easier to see with the rule definition:
auto unary_def = char_("-+") >> simple;
auto simple_def = ref_prim | unary | '(' >> expr >> ")";
auto factor_def = simple;
auto term_def = factor[assign] >> *(char_("*/") >> term)[make_binary];
auto expr_def = term[assign] >> *(char_("-+") >> expr)[make_binary];
Because none of the rules actually expose a Binary, automatic attribute propagation is not convenient there². Instead, we use assign and make_binary semantic actions:
auto assign = [](auto& ctx) { _val(ctx) = _attr(ctx); };
auto make_binary = [](auto& ctx) {
using boost::fusion::at_c;
auto& attr = _attr(ctx);
auto op = at_c<0>(attr);
auto& rhs = at_c<1>(attr);
_val(ctx) = Ast::Binary { _val(ctx), op, rhs };
};
Finally, let's tie the defintions to the declared rules (using their tag types):
BOOST_SPIRIT_DEFINE(simple, unary, expr, term, factor)
All we need is a similar line to primitive:
auto object =
("object" >> uint_[draft] >> '=' >> (expr)[commit]) > ';';
And we can finish up by defining each line as a primitive|object:
auto line = primitive | object;
auto file = no_case[skip(ws_comment)[*eol >> "[geometry]" >> (-line % eol) >> eoi]];
At the top level we expect the [GEOMETRY] header, specify that we want to be case insensitive and ... that ws_comment is to be skipped³:
auto ws_comment = +(blank | lexeme["//" >> *(char_ - eol) >> eol]);
This allows us to ignore the // comments as well.
Live Demo Time
Live On Compiler Explorer
//#define BOOST_SPIRIT_X3_DEBUG
#include <boost/fusion/adapted.hpp>
#include <boost/poly_collection/base_collection.hpp>
#include <boost/spirit/home/x3.hpp>
#include <iostream>
#include <list>
#include <map>
namespace x3 = boost::spirit::x3;
namespace Ast {
using Id = uint32_t;
struct Point { }; // ?? where does this belong?
struct Primitive {
Id id;
virtual ~Primitive() = default;
};
struct Sphere : Primitive { double radius; };
struct Box : Primitive { double a, b; };
using Number = double;
struct RefPrimitive { Id id; };
struct Binary;
struct Unary;
using Expr = boost::variant< //
Number, //
RefPrimitive, //
boost::recursive_wrapper<Unary>, //
boost::recursive_wrapper<Binary> //
>;
struct Unary { char op; Expr oper; };
struct Binary { Expr lhs; char op; Expr rhs; };
struct Object { Id id; Expr expr; };
using Primitives = boost::poly_collection::base_collection<Primitive>;
using Objects = std::list<Object>;
using Index = std::map<Id, std::reference_wrapper<Primitive const>>;
std::ostream& operator<<(std::ostream& os, Primitive const& p) {
return os << boost::core::demangle(typeid(p).name()) << " "
<< "(id: " << p.id << ")";
}
std::ostream& operator<<(std::ostream& os, Object const& o) {
return os << "object(id:" << o.id << ", expr:" << o.expr << ")";
}
std::ostream& operator<<(std::ostream& os, RefPrimitive ref) {
return os << "reference(prim:" << ref.id << ")";
}
std::ostream& operator<<(std::ostream& os, Binary const& b) {
return os << '(' << b.lhs << b.op << b.rhs << ')';
}
std::ostream& operator<<(std::ostream& os, Unary const& u) {
return os << '(' << u.op << u.oper << ')';
}
} // namespace Ast
BOOST_FUSION_ADAPT_STRUCT(Ast::Primitive, id)
BOOST_FUSION_ADAPT_STRUCT(Ast::Sphere, radius)
BOOST_FUSION_ADAPT_STRUCT(Ast::Box, a, b)
BOOST_FUSION_ADAPT_STRUCT(Ast::Object, id)
BOOST_FUSION_ADAPT_STRUCT(Ast::RefPrimitive, id)
BOOST_FUSION_ADAPT_STRUCT(Ast::Unary, op, oper)
namespace Parser {
using namespace x3;
struct State {
Ast::Id next_id;
Ast::Primitives primitives;
Ast::Objects objects;
template <typename... T> void commit(boost::variant<T...>& val) {
boost::apply_visitor([this](auto& obj) { commit(obj); }, val);
}
template <typename T> void commit(T& val) {
auto id = std::exchange(next_id, 0);
if constexpr (std::is_base_of_v<Ast::Primitive, T>) {
val.id = id;
primitives.insert(std::move(val));
} else {
objects.push_back(Ast::Object{id, std::move(val)});
}
}
};
static inline auto& state(auto& ctx) { return get<State>(ctx); }
auto draft = [](auto& ctx) { state(ctx).next_id = _attr(ctx); };
auto commit = [](auto& ctx) { state(ctx).commit(_attr(ctx)); };
template <typename T>
auto as = [](auto p, char const* name = "as") {
return rule<struct _, T>{name} = p;
};
auto ws_comment = +(blank | lexeme["//" >> *(char_ - eol) >> (eol | eoi)]);
auto number = as<Ast::Number>(double_, "number");
auto param(auto name, auto p) { return eps >> omit[name] >> '=' >> p; }
auto param(auto name) { return param(name, number); }
auto sphere = as<Ast::Sphere>(eps >> "sphere" >>'(' >> param("radius") >> ')');
auto box = as<Ast::Box>(eps >> "box" >> '(' >> param('a') >> ',' >> param('b') >> ')');
auto primitive =
("primitive" >> uint_[draft] >> '=' >> (sphere | box)[commit]) > ';';
auto ref_prim = as<Ast::RefPrimitive>(lexeme["primitive" >> uint_], "ref_prim");
// object expression grammar
rule<struct simple_tag, Ast::Expr> simple{"simple"};
rule<struct unary_tag, Ast::Unary> unary{"unary"};
rule<struct expr_tag, Ast::Expr> expr{"expr"};
rule<struct term_tag, Ast::Expr> term{"term"};
rule<struct factor_tag, Ast::Expr> factor{"factor"};
auto assign = [](auto& ctx) { _val(ctx) = _attr(ctx); };
auto make_binary = [](auto& ctx) {
using boost::fusion::at_c;
auto& attr = _attr(ctx);
auto op = at_c<0>(attr);
auto& rhs = at_c<1>(attr);
_val(ctx) = Ast::Binary { _val(ctx), op, rhs };
};
auto unary_def = char_("-+") >> simple;
auto simple_def = ref_prim | unary | '(' >> expr >> ")";
auto factor_def = simple;
auto term_def = factor[assign] >> *(char_("*/") >> term)[make_binary];
auto expr_def = term[assign] >> *(char_("-+") >> expr)[make_binary];
BOOST_SPIRIT_DEFINE(simple, unary, expr, term, factor)
auto object =
("object" >> uint_[draft] >> '=' >> (expr)[commit]) > ';';
auto line = primitive | object;
auto file = no_case[skip(ws_comment)[*eol >> "[geometry]" >> (-line % eol) >> eoi]];
} // namespace Parser
int main() {
for (std::string const input :
{
R"(
[geometry]
primitive1=sphere(radius=5.5);
primitive2=box(a=-5.2, b=7.3);
//...
object1=primitive2*(-primitive1);
//...)",
R"(
[GEOMETRY]
PRIMITIVE1=SPHERE(RADIUS=5.5);
PRIMITIVE2=BOX(A=-5.2, B=7.3);
//...
OBJECT1=PRIMITIVE2*(-PRIMITIVE1);
//...)",
}) //
{
Parser::State state;
bool ok = parse(begin(input), end(input),
x3::with<Parser::State>(state)[Parser::file]);
std::cout << "Parse success? " << std::boolalpha << ok << "\n";
Ast::Index index;
for (auto& p : state.primitives)
if (auto[it,ok] = index.emplace(p.id, p); not ok) {
std::cout << "Duplicate id " << p
<< " (conflicts with existing " << it->second.get()
<< ")\n";
}
std::cout << "Primitives by ID:\n";
for (auto& [id, prim] : index)
std::cout << " - " << prim << "\n";
std::cout << "Objects in definition order:\n";
for (auto& obj: state.objects)
std::cout << " - " << obj << "\n";
}
}
Prints
Parse success? true
Primitives by ID:
- Ast::Sphere (id: 1)
- Ast::Box (id: 2)
Objects in definition order:
- object(id:1, expr:(reference(prim:2)*(-reference(prim:1))))
Parse success? true
Primitives by ID:
- Ast::Sphere (id: 1)
- Ast::Box (id: 2)
Objects in definition order:
- object(id:1, expr:(reference(prim:2)*(-reference(prim:1))))
¹ How can I use polymorphic attributes with boost::spirit::qi parsers?
² and insisting on that leads to classical in-efficiency with rules that cause a lot of backtracking
³ outside of lexemes
I would like to use boost::spirit in order to extract the stoichiometry of compounds made of several elements from a brute formula. Within a given compound, my parser should be able to distinguish three kind of chemical element patterns:
natural element made of a mixture of isotopes in natural abundance
pure isotope
mixture of isotopes in non-natural abundance
Those patterns are then used to parse such following compounds:
"C" --> natural carbon made of C[12] and C[13] in natural abundance
"CH4" --> methane made of natural carbon and hydrogen
"C2H{H[1](0.8)H[2](0.2)}6" --> ethane made of natural C and non-natural H made of 80% of hydrogen and 20% of deuterium
"U[235]" --> pure uranium 235
Obviously, the chemical element patterns can be in any order (e.g. CH[1]4 and H[1]4C ...) and frequencies.
I wrote my parser which is quite close to do the job but I still face one problem.
Here is my code:
template <typename Iterator>
struct ChemicalFormulaParser : qi::grammar<Iterator,isotopesMixture(),qi::locals<isotopesMixture,double>>
{
ChemicalFormulaParser(): ChemicalFormulaParser::base_type(_start)
{
namespace phx = boost::phoenix;
// Semantic action for handling the case of pure isotope
phx::function<PureIsotopeBuilder> const build_pure_isotope = PureIsotopeBuilder();
// Semantic action for handling the case of pure isotope mixture
phx::function<IsotopesMixtureBuilder> const build_isotopes_mixture = IsotopesMixtureBuilder();
// Semantic action for handling the case of natural element
phx::function<NaturalElementBuilder> const build_natural_element = NaturalElementBuilder();
phx::function<UpdateElement> const update_element = UpdateElement();
// XML database that store all the isotopes of the periodical table
ChemicalDatabaseManager<Isotope>* imgr=ChemicalDatabaseManager<Isotope>::Instance();
const auto& isotopeDatabase=imgr->getDatabase();
// Loop over the database to the spirit symbols for the isotopes names (e.g. H[1],C[14]) and the elements (e.g. H,C)
for (const auto& isotope : isotopeDatabase) {
_isotopeNames.add(isotope.second.getName(),isotope.second.getName());
_elementSymbols.add(isotope.second.getProperty<std::string>("symbol"),isotope.second.getProperty<std::string>("symbol"));
}
_mixtureToken = "{" >> +(_isotopeNames >> "(" >> qi::double_ >> ")") >> "}";
_isotopesMixtureToken = (_elementSymbols[qi::_a=qi::_1] >> _mixtureToken[qi::_b=qi::_1])[qi::_pass=build_isotopes_mixture(qi::_val,qi::_a,qi::_b)];
_pureIsotopeToken = (_isotopeNames[qi::_a=qi::_1])[qi::_pass=build_pure_isotope(qi::_val,qi::_a)];
_naturalElementToken = (_elementSymbols[qi::_a=qi::_1])[qi::_pass=build_natural_element(qi::_val,qi::_a)];
_start = +( ( (_isotopesMixtureToken | _pureIsotopeToken | _naturalElementToken)[qi::_a=qi::_1] >>
(qi::double_|qi::attr(1.0))[qi::_b=qi::_1])[qi::_pass=update_element(qi::_val,qi::_a,qi::_b)] );
}
//! Defines the rule for matching a prefix
qi::symbols<char,std::string> _isotopeNames;
qi::symbols<char,std::string> _elementSymbols;
qi::rule<Iterator,isotopesMixture()> _mixtureToken;
qi::rule<Iterator,isotopesMixture(),qi::locals<std::string,isotopesMixture>> _isotopesMixtureToken;
qi::rule<Iterator,isotopesMixture(),qi::locals<std::string>> _pureIsotopeToken;
qi::rule<Iterator,isotopesMixture(),qi::locals<std::string>> _naturalElementToken;
qi::rule<Iterator,isotopesMixture(),qi::locals<isotopesMixture,double>> _start;
};
Basically each separate element pattern can be parsed properly with their respective semantic action which produces as ouput a map between the isotopes that builds the compound and their corresponding stoichiometry. The problem starts when parsing the following compound:
CH{H[1](0.9)H[2](0.4)}
In such case the semantic action build_isotopes_mixture return false because 0.9+0.4 is non sense for a sum of ratio. Hence I would have expected and wanted my parser to fail for this compound. However, because of the _start rule which uses alternative operator for the three kind of chemical element pattern, the parser manages to parse it by 1) throwing away the {H[1](0.9)H[2](0.4)} part 2) keeping the preceding H 3) parsing it using the _naturalElementToken. Is my grammar not clear enough for being expressed as a parser ? How to use the alternative operator in such a way that, when an occurrence has been found but gave a false when running the semantic action, the parser stops ?
How to use the alternative operator in such a way that, when an occurrence has been found but gave a false when running the semantic action, the parser stops ?
In general, you achieve this by adding an expectation point to prevent backtracking.
In this case you are actually "conflating" several tasks:
matching input
interpreting matched input
validating matched input
Spirit excels at matching input, has
great facilities when it comes to interpreting (mostly in the sense of AST creation). However, things get "nasty" with validating on the fly.
An advice I often repeat is to consider separating the concerns whenever possible. I'd consider
building a direct AST representation of the input first,
transforming/normalizing/expanding/canonicalizing to a more convenient or meaningful domain representation
do final validations on the result
This gives you the most expressive code while keeping it highly maintainable.
Because I don't understand the problem domain well enough and the code sample is not nearly complete enough to induce it, I will not try to give a full sample of what I have in mind. Instead I'll try my best at sketching the expectation point approach I mentioned at the outset.
Mock Up Sample To Compile
This took the most time. (Consider doing the leg work for the people who are going to help you)
Live On Coliru
#include <boost/fusion/adapted/std_pair.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <map>
namespace qi = boost::spirit::qi;
struct DummyBuilder {
using result_type = bool;
template <typename... Ts>
bool operator()(Ts&&...) const { return true; }
};
struct PureIsotopeBuilder : DummyBuilder { };
struct IsotopesMixtureBuilder : DummyBuilder { };
struct NaturalElementBuilder : DummyBuilder { };
struct UpdateElement : DummyBuilder { };
struct Isotope {
std::string getName() const { return _name; }
Isotope(std::string const& name = "unnamed", std::string const& symbol = "?") : _name(name), _symbol(symbol) { }
template <typename T> std::string getProperty(std::string const& name) const {
if (name == "symbol")
return _symbol;
throw std::domain_error("no such property (" + name + ")");
}
private:
std::string _name, _symbol;
};
using MixComponent = std::pair<Isotope, double>;
using isotopesMixture = std::list<MixComponent>;
template <typename Isotope>
struct ChemicalDatabaseManager {
static ChemicalDatabaseManager* Instance() {
static ChemicalDatabaseManager s_instance;
return &s_instance;
}
auto& getDatabase() { return _db; }
private:
std::map<int, Isotope> _db {
{ 1, { "H[1]", "H" } },
{ 2, { "H[2]", "H" } },
{ 3, { "Carbon", "C" } },
{ 4, { "U[235]", "U" } },
};
};
template <typename Iterator>
struct ChemicalFormulaParser : qi::grammar<Iterator, isotopesMixture(), qi::locals<isotopesMixture, double> >
{
ChemicalFormulaParser(): ChemicalFormulaParser::base_type(_start)
{
using namespace qi;
namespace phx = boost::phoenix;
phx::function<PureIsotopeBuilder> build_pure_isotope; // Semantic action for handling the case of pure isotope
phx::function<IsotopesMixtureBuilder> build_isotopes_mixture; // Semantic action for handling the case of pure isotope mixture
phx::function<NaturalElementBuilder> build_natural_element; // Semantic action for handling the case of natural element
phx::function<UpdateElement> update_element;
// XML database that store all the isotopes of the periodical table
ChemicalDatabaseManager<Isotope>* imgr = ChemicalDatabaseManager<Isotope>::Instance();
const auto& isotopeDatabase=imgr->getDatabase();
// Loop over the database to the spirit symbols for the isotopes names (e.g. H[1],C[14]) and the elements (e.g. H,C)
for (const auto& isotope : isotopeDatabase) {
_isotopeNames.add(isotope.second.getName(),isotope.second.getName());
_elementSymbols.add(isotope.second.template getProperty<std::string>("symbol"),isotope.second.template getProperty<std::string>("symbol"));
}
_mixtureToken = "{" >> +(_isotopeNames >> "(" >> double_ >> ")") >> "}";
_isotopesMixtureToken = (_elementSymbols[_a=_1] >> _mixtureToken[_b=_1])[_pass=build_isotopes_mixture(_val,_a,_b)];
_pureIsotopeToken = (_isotopeNames[_a=_1])[_pass=build_pure_isotope(_val,_a)];
_naturalElementToken = (_elementSymbols[_a=_1])[_pass=build_natural_element(_val,_a)];
_start = +( ( (_isotopesMixtureToken | _pureIsotopeToken | _naturalElementToken)[_a=_1] >>
(double_|attr(1.0))[_b=_1]) [_pass=update_element(_val,_a,_b)] );
}
private:
//! Defines the rule for matching a prefix
qi::symbols<char, std::string> _isotopeNames;
qi::symbols<char, std::string> _elementSymbols;
qi::rule<Iterator, isotopesMixture()> _mixtureToken;
qi::rule<Iterator, isotopesMixture(), qi::locals<std::string, isotopesMixture> > _isotopesMixtureToken;
qi::rule<Iterator, isotopesMixture(), qi::locals<std::string> > _pureIsotopeToken;
qi::rule<Iterator, isotopesMixture(), qi::locals<std::string> > _naturalElementToken;
qi::rule<Iterator, isotopesMixture(), qi::locals<isotopesMixture, double> > _start;
};
int main() {
using It = std::string::const_iterator;
ChemicalFormulaParser<It> parser;
for (std::string const input : {
"C", // --> natural carbon made of C[12] and C[13] in natural abundance
"CH4", // --> methane made of natural carbon and hydrogen
"C2H{H[1](0.8)H[2](0.2)}6", // --> ethane made of natural C and non-natural H made of 80% of hydrogen and 20% of deuterium
"C2H{H[1](0.9)H[2](0.2)}6", // --> invalid mixture (total is 110%?)
"U[235]", // --> pure uranium 235
})
{
std::cout << " ============= '" << input << "' ===========\n";
It f = input.begin(), l = input.end();
isotopesMixture mixture;
bool ok = qi::parse(f, l, parser, mixture);
if (ok)
std::cout << "Parsed successfully\n";
else
std::cout << "Parse failure\n";
if (f != l)
std::cout << "Remaining input unparsed: '" << std::string(f, l) << "'\n";
}
}
Which, as given, just prints
============= 'C' ===========
Parsed successfully
============= 'CH4' ===========
Parsed successfully
============= 'C2H{H[1](0.8)H[2](0.2)}6' ===========
Parsed successfully
============= 'C2H{H[1](0.9)H[2](0.2)}6' ===========
Parsed successfully
============= 'U[235]' ===========
Parsed successfully
General remarks:
no need for the locals, just use the regular placeholders:
_mixtureToken = "{" >> +(_isotopeNames >> "(" >> double_ >> ")") >> "}";
_isotopesMixtureToken = (_elementSymbols >> _mixtureToken) [ _pass=build_isotopes_mixture(_val, _1, _2) ];
_pureIsotopeToken = _isotopeNames [ _pass=build_pure_isotope(_val, _1) ];
_naturalElementToken = _elementSymbols [ _pass=build_natural_element(_val, _1) ];
_start = +(
( (_isotopesMixtureToken | _pureIsotopeToken | _naturalElementToken) >>
(double_|attr(1.0)) ) [ _pass=update_element(_val, _1, _2) ]
);
// ....
qi::rule<Iterator, isotopesMixture()> _mixtureToken;
qi::rule<Iterator, isotopesMixture()> _isotopesMixtureToken;
qi::rule<Iterator, isotopesMixture()> _pureIsotopeToken;
qi::rule<Iterator, isotopesMixture()> _naturalElementToken;
qi::rule<Iterator, isotopesMixture()> _start;
you will want to handle conflicts between names/symbols (possibly just by prioritizing one or the other)
conforming compilers will require the template qualifier (unless I totally mis-guessed your datastructure, in which case I don't know what the template argument to ChemicalDatabaseManager was supposed to mean).
Hint, MSVC is not a standards-conforming compiler
Live On Coliru
Expectation Point Sketch
Assuming that the "weights" need to add up to 100% inside the _mixtureToken rule, we can either make build_isotopes_micture "not dummy" and add the validation:
struct IsotopesMixtureBuilder {
bool operator()(isotopesMixture&/* output*/, std::string const&/* elementSymbol*/, isotopesMixture const& mixture) const {
using namespace boost::adaptors;
// validate weights total only
return std::abs(1.0 - boost::accumulate(mixture | map_values, 0.0)) < 0.00001;
}
};
However, as you note, it will thwart things by backtracking. Instead you might /assert/ that any complete mixture add up to 100%:
_mixtureToken = "{" >> +(_isotopeNames >> "(" >> double_ >> ")") >> "}" > eps(validate_weight_total(_val));
With something like
struct ValidateWeightTotal {
bool operator()(isotopesMixture const& mixture) const {
using namespace boost::adaptors;
bool ok = std::abs(1.0 - boost::accumulate(mixture | map_values, 0.0)) < 0.00001;
return ok;
// or perhaps just :
return ok? ok : throw InconsistentsWeights {};
}
struct InconsistentsWeights : virtual std::runtime_error {
InconsistentsWeights() : std::runtime_error("InconsistentsWeights") {}
};
};
Live On Coliru
#include <boost/fusion/adapted/std_pair.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/range/adaptors.hpp>
#include <boost/range/numeric.hpp>
#include <map>
namespace qi = boost::spirit::qi;
struct DummyBuilder {
using result_type = bool;
template <typename... Ts>
bool operator()(Ts&&...) const { return true; }
};
struct PureIsotopeBuilder : DummyBuilder { };
struct NaturalElementBuilder : DummyBuilder { };
struct UpdateElement : DummyBuilder { };
struct Isotope {
std::string getName() const { return _name; }
Isotope(std::string const& name = "unnamed", std::string const& symbol = "?") : _name(name), _symbol(symbol) { }
template <typename T> std::string getProperty(std::string const& name) const {
if (name == "symbol")
return _symbol;
throw std::domain_error("no such property (" + name + ")");
}
private:
std::string _name, _symbol;
};
using MixComponent = std::pair<Isotope, double>;
using isotopesMixture = std::list<MixComponent>;
struct IsotopesMixtureBuilder {
bool operator()(isotopesMixture&/* output*/, std::string const&/* elementSymbol*/, isotopesMixture const& mixture) const {
using namespace boost::adaptors;
// validate weights total only
return std::abs(1.0 - boost::accumulate(mixture | map_values, 0.0)) < 0.00001;
}
};
struct ValidateWeightTotal {
bool operator()(isotopesMixture const& mixture) const {
using namespace boost::adaptors;
bool ok = std::abs(1.0 - boost::accumulate(mixture | map_values, 0.0)) < 0.00001;
return ok;
// or perhaps just :
return ok? ok : throw InconsistentsWeights {};
}
struct InconsistentsWeights : virtual std::runtime_error {
InconsistentsWeights() : std::runtime_error("InconsistentsWeights") {}
};
};
template <typename Isotope>
struct ChemicalDatabaseManager {
static ChemicalDatabaseManager* Instance() {
static ChemicalDatabaseManager s_instance;
return &s_instance;
}
auto& getDatabase() { return _db; }
private:
std::map<int, Isotope> _db {
{ 1, { "H[1]", "H" } },
{ 2, { "H[2]", "H" } },
{ 3, { "Carbon", "C" } },
{ 4, { "U[235]", "U" } },
};
};
template <typename Iterator>
struct ChemicalFormulaParser : qi::grammar<Iterator, isotopesMixture()>
{
ChemicalFormulaParser(): ChemicalFormulaParser::base_type(_start)
{
using namespace qi;
namespace phx = boost::phoenix;
phx::function<PureIsotopeBuilder> build_pure_isotope; // Semantic action for handling the case of pure isotope
phx::function<IsotopesMixtureBuilder> build_isotopes_mixture; // Semantic action for handling the case of pure isotope mixture
phx::function<NaturalElementBuilder> build_natural_element; // Semantic action for handling the case of natural element
phx::function<UpdateElement> update_element;
phx::function<ValidateWeightTotal> validate_weight_total;
// XML database that store all the isotopes of the periodical table
ChemicalDatabaseManager<Isotope>* imgr = ChemicalDatabaseManager<Isotope>::Instance();
const auto& isotopeDatabase=imgr->getDatabase();
// Loop over the database to the spirit symbols for the isotopes names (e.g. H[1],C[14]) and the elements (e.g. H,C)
for (const auto& isotope : isotopeDatabase) {
_isotopeNames.add(isotope.second.getName(),isotope.second.getName());
_elementSymbols.add(isotope.second.template getProperty<std::string>("symbol"), isotope.second.template getProperty<std::string>("symbol"));
}
_mixtureToken = "{" >> +(_isotopeNames >> "(" >> double_ >> ")") >> "}" > eps(validate_weight_total(_val));
_isotopesMixtureToken = (_elementSymbols >> _mixtureToken) [ _pass=build_isotopes_mixture(_val, _1, _2) ];
_pureIsotopeToken = _isotopeNames [ _pass=build_pure_isotope(_val, _1) ];
_naturalElementToken = _elementSymbols [ _pass=build_natural_element(_val, _1) ];
_start = +(
( (_isotopesMixtureToken | _pureIsotopeToken | _naturalElementToken) >>
(double_|attr(1.0)) ) [ _pass=update_element(_val, _1, _2) ]
);
}
private:
//! Defines the rule for matching a prefix
qi::symbols<char, std::string> _isotopeNames;
qi::symbols<char, std::string> _elementSymbols;
qi::rule<Iterator, isotopesMixture()> _mixtureToken;
qi::rule<Iterator, isotopesMixture()> _isotopesMixtureToken;
qi::rule<Iterator, isotopesMixture()> _pureIsotopeToken;
qi::rule<Iterator, isotopesMixture()> _naturalElementToken;
qi::rule<Iterator, isotopesMixture()> _start;
};
int main() {
using It = std::string::const_iterator;
ChemicalFormulaParser<It> parser;
for (std::string const input : {
"C", // --> natural carbon made of C[12] and C[13] in natural abundance
"CH4", // --> methane made of natural carbon and hydrogen
"C2H{H[1](0.8)H[2](0.2)}6", // --> ethane made of natural C and non-natural H made of 80% of hydrogen and 20% of deuterium
"C2H{H[1](0.9)H[2](0.2)}6", // --> invalid mixture (total is 110%?)
"U[235]", // --> pure uranium 235
}) try
{
std::cout << " ============= '" << input << "' ===========\n";
It f = input.begin(), l = input.end();
isotopesMixture mixture;
bool ok = qi::parse(f, l, parser, mixture);
if (ok)
std::cout << "Parsed successfully\n";
else
std::cout << "Parse failure\n";
if (f != l)
std::cout << "Remaining input unparsed: '" << std::string(f, l) << "'\n";
} catch(std::exception const& e) {
std::cout << "Caught exception '" << e.what() << "'\n";
}
}
Prints
============= 'C' ===========
Parsed successfully
============= 'CH4' ===========
Parsed successfully
============= 'C2H{H[1](0.8)H[2](0.2)}6' ===========
Parsed successfully
============= 'C2H{H[1](0.9)H[2](0.2)}6' ===========
Caught exception 'boost::spirit::qi::expectation_failure'
============= 'U[235]' ===========
Parsed successfully
Cannot figure out why this rule unary_msg doesnt work, it says the attribute type is qi::unused_type but this makes no sense to me. Why does boost torment me like this?
template<class It, class Skip= boost::spirit::ascii::space_type>
struct g3: qi::grammar<It, ast::expr(), Skip>
{
template<typename...Args>
using R = qi::rule<It, Args...>;
R<ast::expr(), Skip> start, expr_, term_, unary_term;
R<ast::intlit()> int_;
R<std::string()> selector_;
R<boost::fusion::vector<ast::expr, std::vector<std::string>>, Skip> unary_msg;
g3(): g3::base_type(start)
{
namespace ph = boost::phoenix;
using namespace boost::spirit::qi;
int_ = qi::int_;
selector_ = lexeme[+qi::alnum];
term_ = int_;
unary_msg = term_ >> *selector_;
unary_term = unary_msg[ qi::_val = ph::bind(&collect_unary, qi::_1) ];
expr_ = unary_term;
start = expr_;
}
};
full code: http://coliru.stacked-crooked.com/a/e9afef4585ce76c3
Like cv_and_he mentions, add the parens.
Working example with many cleanup suggestions:
Live On Coliru
Notes
don't use using namespace at toplevel
don't use conflicting namespaces (using std and boost are very likely to lead to surprises or conflicts)
don't use internal attribute types like fusion::vector
use modern style BOOST_FUSION_ADAPT_STRUCT
some minor style issues
For example the following function
ast::expr collect_unary (const boost::fusion::vector<ast::expr, std::vector<std::string>>& parts)
//ast::expr collect_unary (const ast::expr& a, const std::vector<std::string>& msgs)
{
ast::expr res = boost::fusion::at_c<0>(parts);//a;
const auto& msgs = boost::fusion::at_c<1>(parts);
for(const auto& m: msgs)
{
ast::message msg;
msg.name = m;
msg.args.push_back(res);
res = msg;
}
return res;
}
was changed into:
ast::expr collect_unary(ast::expr accum, const std::vector<std::string>& msgs) {
for (const auto &m : msgs)
accum = ast::message { m, { accum } };
return accum;
}
Full Listing And Output
Live On Coliru
#define BOOST_SPIRIT_USE_PHOENIX_V3
#include <iostream>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
namespace qi = boost::spirit::qi;
namespace ast {
struct intlit {
int value;
intlit(int i = 0) : value(i) { }
intlit(intlit const&other) = default;
};
struct nil {};
struct message;
using expr = boost::make_recursive_variant<nil, intlit, message>::type;
struct message {
std::string name;
std::vector<ast::expr> args;
};
}
#include <boost/fusion/include/adapt_struct.hpp>
BOOST_FUSION_ADAPT_STRUCT(ast::intlit, value)
BOOST_FUSION_ADAPT_STRUCT(ast::message, name, args)
struct ast_print {
void operator()(ast::nil &) const { std::cout << "nil"; }
void operator()(ast::intlit &i) const { std::cout << i.value; }
void operator()(ast::message &m) const {
std::cout << "(" << m.name;
for (auto &it : m.args) {
std::cout << " ";
boost::apply_visitor(ast_print(), it);
}
std::cout << ")" << std::endl;
}
};
ast::expr collect_unary(ast::expr accum, const std::vector<std::string>& msgs)
{
for (const auto &m : msgs)
accum = ast::message { m, { accum } };
return accum;
}
template <class It, class Skip = boost::spirit::ascii::space_type> struct g3 : qi::grammar<It, ast::expr(), Skip> {
g3() : g3::base_type(start) {
using namespace boost::spirit::qi;
namespace ph = boost::phoenix;
int_ = qi::int_;
selector_ = +qi::alnum;
term_ = int_;
unary_msg = (term_ >> *selector_) [ _val = ph::bind(collect_unary, _1, _2) ];
unary_term = unary_msg;
expr_ = unary_term;
start = expr_;
}
private:
template <typename Attr, typename... Args> using R = qi::rule<It, Attr(), Args...>;
R<ast::expr, Skip> start, expr_, term_, unary_term, unary_msg;
R<ast::intlit> int_;
R<std::string> selector_;
};
template <class Parser, typename Result> bool test(const std::string &input, const Parser &parser, Result &result) {
auto first = input.begin(), last = input.end();
return qi::phrase_parse(first, last, parser, boost::spirit::ascii::space, result);
}
int main() {
std::string const input = "42 x y";
g3<std::string::const_iterator> p;
ast::expr res;
if (test(input, p, res)) {
std::cout << "parse ok " << std::endl;
boost::apply_visitor(ast_print(), res);
}
}
Prints
parse ok
(y (x 42)
)
I'm trying to write a parser to create an AST using boost::spirit. As a first step I'm trying to wrap numerical values in an AST node. This is the code I'm using:
AST_NodePtr make_AST_NodePtr(const int& i) {
return std::make_shared<AST_Node>(i);
}
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace l = qi::labels;
template<typename Iterator>
struct test_grammar : qi::grammar<Iterator, AST_NodePtr(), ascii::space_type> {
test_grammar() : test_grammar::base_type(test) {
test = qi::int_ [qi::_val = make_AST_NodePtr(qi::_1)];
}
qi::rule<Iterator, AST_NodePtr(), ascii::space_type> test;
};
As far as I understood it from the documentation q::_1 should contain the value parsed by qi::int_, but the above code always gives me an error along the lines
invalid initialization of reference of type ‘const int&’ from expression of type ‘const _1_type {aka const boost::phoenix::actor<boost::spirit::argument<0> >}
Why does this not work even though qi::_1 is supposed to hold the parsed valued? How else would I parse the input into a custom AST?
You're using a regular function inside the semantic action.
This means that in the contructor the compiler will try to invoke that make_AST_NodePtr function with the argument supplied: qi::_1.
Q. Why does this not work even though qi::_1 is supposed to hold the parsed valued?
A. qi::_1 does not hold the parsed value. It represents (is-a-placeholder-for) the first unbound argument in the function call
This can /obviously/ never work. The function expects an integer.
So what gives?
You need to make a "lazy" or "deferred" function for use in the semantic action. Using only pre-supplied Boost Phoenix functors, you could spell it out:
test = qi::int_ [ qi::_val = px::construct<AST_NodePtr>(px::new_<AST_Node>(qi::_1)) ];
You don't need the helper function this way. But the result is both ugly and suboptimal. So, let's do better!
Using a Phoenix Function wrapper
struct make_shared_f {
std::shared_ptr<AST_Node> operator()(int v) const {
return std::make_shared<AST_Node>(v);
}
};
px::function<make_shared_f> make_shared_;
With this defined, you can simplify the semantic action to:
test = qi::int_ [ qi::_val = make_shared_(qi::_1) ];
Actually, if you make it generic you can reuse it for many types:
template <typename T>
struct make_shared_f {
template <typename... Args>
std::shared_ptr<T> operator()(Args&&... args) const {
return std::make_shared<T>(std::forward<Args>(args)...);
}
};
px::function<make_shared_f<AST_Node> > make_shared_;
DEMO
Here's a self-contained example showing some style fixes in the process:
Live On Coliru
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <memory>
struct AST_Node {
AST_Node(int v) : _value(v) {}
int value() const { return _value; }
private:
int _value;
};
using AST_NodePtr = std::shared_ptr<AST_Node>;
AST_NodePtr make_AST_NodePtr(const int& i) {
return std::make_shared<AST_Node>(i);
}
namespace qi = boost::spirit::qi;
namespace px = boost::phoenix;
template<typename Iterator>
struct test_grammar : qi::grammar<Iterator, AST_NodePtr()> {
test_grammar() : test_grammar::base_type(start) {
using boost::spirit::ascii::space;
start = qi::skip(space) [ test ];
test = qi::int_ [ qi::_val = make_shared_(qi::_1) ];
}
private:
struct make_shared_f {
std::shared_ptr<AST_Node> operator()(int v) const {
return std::make_shared<AST_Node>(v);
}
};
px::function<make_shared_f> make_shared_;
//
qi::rule<Iterator, AST_NodePtr()> start;
qi::rule<Iterator, AST_NodePtr(), boost::spirit::ascii::space_type> test;
};
int main() {
AST_NodePtr parsed;
std::string const input ("42");
auto f = input.begin(), l = input.end();
test_grammar<std::string::const_iterator> g;
bool ok = qi::parse(f, l, g, parsed);
if (ok) {
std::cout << "Parsed: " << (parsed? std::to_string(parsed->value()) : "nullptr") << "\n";
} else {
std::cout << "Failed\n";
}
if (f!=l)
{
std::cout << "Remaining input: '" << std::string(f, l) << "'\n";
}
}
Prints
Parsed: 42
BONUS: Alternative using BOOST_PHOENIX_ADAPT_FUNCTION
You can actually use your free function if you wish, and use it as follows:
Live On Coliru
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <memory>
struct AST_Node {
AST_Node(int v) : _value(v) {}
int value() const { return _value; }
private:
int _value;
};
using AST_NodePtr = std::shared_ptr<AST_Node>;
AST_NodePtr make_AST_NodePtr(int i) {
return std::make_shared<AST_Node>(i);
}
BOOST_PHOENIX_ADAPT_FUNCTION(AST_NodePtr, make_AST_NodePtr_, make_AST_NodePtr, 1)
namespace qi = boost::spirit::qi;
namespace px = boost::phoenix;
template<typename Iterator>
struct test_grammar : qi::grammar<Iterator, AST_NodePtr()> {
test_grammar() : test_grammar::base_type(start) {
using boost::spirit::ascii::space;
start = qi::skip(space) [ test ] ;
test = qi::int_ [ qi::_val = make_AST_NodePtr_(qi::_1) ] ;
}
private:
qi::rule<Iterator, AST_NodePtr()> start;
qi::rule<Iterator, AST_NodePtr(), boost::spirit::ascii::space_type> test;
};
int main() {
AST_NodePtr parsed;
std::string const input ("42");
auto f = input.begin(), l = input.end();
test_grammar<std::string::const_iterator> g;
bool ok = qi::parse(f, l, g, parsed);
if (ok) {
std::cout << "Parsed: " << (parsed? std::to_string(parsed->value()) : "nullptr") << "\n";
} else {
std::cout << "Failed\n";
}
if (f!=l)
{
std::cout << "Remaining input: '" << std::string(f, l) << "'\n";
}
}
I would like to split a string into parts:
input = "part1/part2/part3/also3"
and fill the structure that consist of three std::string with these parts.
struct strings
{
std::string a; // <- part1
std::string b; // <- part2
std::string c; // <- part3/also3
};
However my parser seems to merge the parts together and store it into the first std::string.
Here is the code on coliru
#include <iostream>
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapted.hpp>
namespace qi = ::boost::spirit::qi;
struct strings
{
std::string a;
std::string b;
std::string c;
};
BOOST_FUSION_ADAPT_STRUCT(strings,
(std::string, a) (std::string, b) (std::string, c))
template <typename It>
struct split_string_grammar: qi::grammar<It, strings ()>
{
split_string_grammar (int parts)
: split_string_grammar::base_type (split_string)
{
assert (parts > 0);
using namespace qi;
split_string = repeat (parts-1) [part > '/'] > last_part;
part = +(~char_ ("/"));
last_part = +char_;
BOOST_SPIRIT_DEBUG_NODES ((split_string) (part) (last_part))
}
private:
qi::rule<It, strings ()> split_string;
qi::rule<It, std::string ()> part, last_part;
};
int main ()
{
std::string const input { "one/two/three/four" };
auto const last = input.end ();
auto first = input.begin ();
// split into 3 parts.
split_string_grammar<decltype (first)> split_string (3);
strings ss;
bool ok = qi::parse (first, last, split_string, ss);
std::cout << "Parsed: " << ok << "\n";
if (ok) {
std::cout << "a:" << ss.a << "\n";
std::cout << "b:" << ss.b << "\n";
std::cout << "c:" << ss.c << "\n";
}
}
The output is:
Parsed: 1
a:onetwo
b:three/four
c:
while I expected:
Parsed: 1
a:one
b:two
c:three/four
I'd like not to modify the grammar heavily and leave "repeat" statement in it, because the "real" grammar is much more complex of course and I will need to have it there. Just need to find the way to disable the concatenations. I tried
repeat (parts-1) [as_string[part] > '/']
but that does not compile.
The trouble here is specifically that qi::repeat is documented to expose a container of element-types.
Now, because the exposed attribute type of the rule (strings) is not a container-type, Spirit "knows" how to flatten the values.
Of course it's not what you wanted in this case, but usually this heuristic makes for really convenient accumulation of string values.
Fix 1: use a container attribute
You could witness the reverse fix by getting rid of the non-container (sequence) target attribute:
Live On Coliru
//#define BOOST_SPIRIT_DEBUG
#include <iostream>
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapted.hpp>
namespace qi = ::boost::spirit::qi;
using strings = std::vector<std::string>;
template <typename It>
struct split_string_grammar: qi::grammar<It, strings ()>
{
split_string_grammar (int parts)
: split_string_grammar::base_type (split_string)
{
assert (parts > 0);
using namespace qi;
split_string = repeat (parts-1) [part > '/']
> last_part
;
part = +(~char_ ("/"))
;
last_part = +char_
;
BOOST_SPIRIT_DEBUG_NODES ((split_string) (part) (last_part))
}
private:
qi::rule<It, strings ()> split_string;
qi::rule<It, std::string ()> part, last_part;
};
int main ()
{
std::string const input { "one/two/three/four" };
auto const last = input.end ();
auto first = input.begin ();
// split into 3 parts.
split_string_grammar<decltype (first)> split_string (3);
strings ss;
bool ok = qi::parse (first, last, split_string, ss);
std::cout << "Parsed: " << ok << "\n";
if (ok) {
for(auto i = 0ul; i<ss.size(); ++i)
std::cout << static_cast<char>('a'+i) << ":" << ss[i] << "\n";
}
}
What you really wanted:
Of course you want to keep the struct/sequence adaptation (?); In this case that's really tricky because as soon as you use any kind of Kleene operator (*,%) or qi::repeat you'll have the attribute transformation rules as outlined above, ruining your mood.
Luckily, I just remembered I have a hacky solution based on the auto_ parser. Note the caveat in this older answer though:
Read empty values with boost::spirit
CAVEAT Specializing for std::string directly like this might not be the best idea (it might not always be appropriate and might interact badly with other parsers).
By default create_parser<std::string> is not defined, so you might decide this usage is good enough for your case:
Live On Coliru
#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/include/qi.hpp>
namespace qi = boost::spirit::qi;
struct strings {
std::string a;
std::string b;
std::string c;
};
namespace boost { namespace spirit { namespace traits {
template <> struct create_parser<std::string> {
typedef proto::result_of::deep_copy<
BOOST_TYPEOF(
qi::lexeme [+(qi::char_ - '/')] | qi::attr("(unspecified)")
)
>::type type;
static type call() {
return proto::deep_copy(
qi::lexeme [+(qi::char_ - '/')] | qi::attr("(unspecified)")
);
}
};
}}}
BOOST_FUSION_ADAPT_STRUCT(strings, (std::string, a)(std::string, b)(std::string, c))
template <typename Iterator>
struct google_parser : qi::grammar<Iterator, strings()> {
google_parser() : google_parser::base_type(entry, "contacts") {
using namespace qi;
entry =
skip('/') [auto_]
;
}
private:
qi::rule<Iterator, strings()> entry;
};
int main() {
using It = std::string::const_iterator;
google_parser<It> p;
std::string const input = "part1/part2/part3/also3";
It f = input.begin(), l = input.end();
strings ss;
bool ok = qi::parse(f, l, p >> *qi::char_, ss, ss.c);
if (ok)
{
std::cout << "a:" << ss.a << "\n";
std::cout << "b:" << ss.b << "\n";
std::cout << "c:" << ss.c << "\n";
}
else
std::cout << "Parse failed\n";
if (f!=l)
std::cout << "Remaining unparsed: '" << std::string(f,l) << "'\n";
}
Prints
a:part1
b:part2
c:part3/also3
Update/Bonus
In reponse to the OP's own answer I wanted to challenge myself to write it more generically indeed.
The main thing is to to write set_field_ in such a way that it doesn't know/assume more than required about the destination sequence type.
With a bit of Boost Fusion magic that became:
struct set_field_
{
template <typename Seq, typename Value>
void operator() (Seq& seq, Value const& src, unsigned idx) const {
fus::fold(seq, 0u, Visit<Value> { idx, src });
}
private:
template <typename Value>
struct Visit {
unsigned target_idx;
Value const& value;
template <typename B>
unsigned operator()(unsigned i, B& dest) const {
if (target_idx == i) {
boost::spirit::traits::assign_to(value, dest);
}
return i + 1;
}
};
};
It has the added flexibility of applying Spirit's attribute compatibility rules¹. So, you can use the same grammar with both the following types:
struct strings {
std::string a, b, c;
};
struct alternative {
std::vector<char> first;
std::string second;
std::string third;
};
To drive the point home, I made the adaptation of the second struct reverse the field order:
BOOST_FUSION_ADAPT_STRUCT(strings, a, b, c)
BOOST_FUSION_ADAPT_STRUCT(alternative, third, second, first) // REVERSE ORDER :)
Without further ado, the demo program:
Live On Coliru
#define BOOST_SPIRIT_USE_PHOENIX_V3
#define BOOST_RESULT_OF_USE_DECLTYPE
#include <boost/fusion/adapted.hpp>
#include <boost/fusion/algorithm/iteration.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
namespace qi = boost::spirit::qi;
namespace fus = boost::fusion;
struct strings {
std::string a, b, c;
};
struct alternative {
std::vector<char> first;
std::string second;
std::string third;
};
BOOST_FUSION_ADAPT_STRUCT(strings, a, b, c)
BOOST_FUSION_ADAPT_STRUCT(alternative, third, second, first) // REVERSE ORDER :)
// output helpers for demo:
namespace {
inline std::ostream& operator<<(std::ostream& os, strings const& data) {
return os
<< "a:\"" << data.a << "\" "
<< "b:\"" << data.b << "\" "
<< "c:\"" << data.c << "\" ";
}
inline std::ostream& operator<<(std::ostream& os, alternative const& data) {
os << "first: vector<char> { \""; os.write(&data.first[0], data.first.size()); os << "\" } ";
os << "second: \"" << data.second << "\" ";
os << "third: \"" << data.third << "\" ";
return os;
}
}
struct set_field_
{
template <typename Seq, typename Value>
void operator() (Seq& seq, Value const& src, unsigned idx) const {
fus::fold(seq, 0u, Visit<Value> { idx, src });
}
private:
template <typename Value>
struct Visit {
unsigned target_idx;
Value const& value;
template <typename B>
unsigned operator()(unsigned i, B& dest) const {
if (target_idx == i) {
boost::spirit::traits::assign_to(value, dest);
}
return i + 1;
}
};
};
boost::phoenix::function<set_field_> const set_field = {};
template <typename It, typename Target>
struct split_string_grammar: qi::grammar<It, Target(), qi::locals<unsigned> >
{
split_string_grammar (int parts)
: split_string_grammar::base_type (split_string)
{
assert (parts > 0);
using namespace qi;
using boost::phoenix::val;
_a_type _current; // custom placeholder
split_string =
eps [ _current = 0u ]
> repeat (parts-1)
[part [ set_field(_val, _1, _current++) ] > '/']
> last_part [ set_field(_val, _1, _current++) ];
part = +(~char_ ("/"));
last_part = +char_;
BOOST_SPIRIT_DEBUG_NODES ((split_string) (part) (last_part))
}
private:
qi::rule<It, Target(), qi::locals<unsigned> > split_string;
qi::rule<It, std::string()> part, last_part;
};
template <size_t N = 3, typename Target>
void run_test(Target target) {
using It = std::string::const_iterator;
std::string const input { "one/two/three/four" };
It first = input.begin(), last = input.end();
split_string_grammar<It, Target> split_string(N);
bool ok = qi::parse (first, last, split_string, target);
if (ok) {
std::cout << target << '\n';
} else {
std::cout << "Parse failed\n";
}
if (first != last)
std::cout << "Remaining input left unparsed: '" << std::string(first, last) << "'\n";
}
int main ()
{
run_test(strings {});
run_test(alternative {});
}
Output:
a:"one" b:"two" c:"three/four"
first: vector<char> { "three/four" } second: "two" third: "one"
¹ as with BOOST_SPIRIT_ACTIONS_ALLOW_ATTR_COMPAT
Besides sehe's suggestions one more possible way is to use semantic actions (coliru):
struct set_field_
{
void operator() (strings& dst, std::string const& src, unsigned& idx) const
{
assert (idx < 3);
switch (idx++) {
case 0: dst.a = src; break;
case 1: dst.b = src; break;
case 2: dst.c = src; break;
}
}
};
boost::phoenix::function<set_field_> const set_field { set_field_ {} };
template <typename It>
struct split_string_grammar: qi::grammar<It, strings (), qi::locals<unsigned> >
{
split_string_grammar (int parts)
: split_string_grammar::base_type (split_string)
{
assert (parts > 0);
using namespace qi;
using boost::phoenix::val;
split_string = eps [ _a = val (0) ]
> repeat (parts-1) [part [ set_field (_val, _1, _a) ] > '/']
> last_part [ set_field (_val, _1, _a) ];
part = +(~char_ ("/"));
last_part = +char_;
BOOST_SPIRIT_DEBUG_NODES ((split_string) (part) (last_part))
}
private:
qi::rule<It, strings (), qi::locals<unsigned> > split_string;
qi::rule<It, std::string ()> part, last_part;
};