parse enum using boost spirit qi parser - c++

I am trying to parse char to fill in a C++11 strongly typed enum. I need help with writing a parser for the enums.. it needs to be high performance as well.
I have a string with the following format
Category | Type | Attributes
Example:
std::string str1 = "A|D|name=tim, address=3 infinite loop"
std::string str2 = "A|C|name=poc, address=5 overflow street"
I am representing Category and Type as follows:
enum class CATEGORY : char
{
Animal:'A', Bird:'B'
}
enum class TYPE : char
{
Dog:'D', Bird:'B'
}
struct Zoo
{
Category category;
Type type;
std::string name;
std::string address;
};
namespace qi = boost::spirit::qi;
namespace repo = boost::spirit::repository;
namespace ascii = boost::spirit::ascii;
template <typename Iterator>
struct ZooBuilderGrammar : qi::grammar<Iterator, ascii::space_type>
{
ZooBuilderGrammar():ZooBuilderGrammar::base_type(start_)
{
using qi::char_;
using qi::_1;
using qi::lit
using boost::phoenix::ref;
//need help here
start_=char_[/*how to assign enum */ ]>>'|'
>>char_[ /*how to assign enum */ ]>>'|'
>>lit;
}
qi::rule<Iterator, ascii::space_type> start_;
};
I have problem around creating a parser type like the built in ex: qi::char_ to "parse enums CATEGORY and TYPE".
thanks for the help in advance..

As usual there's several approaches:
The semantic action way (ad-hoc)
The customization points way
The qi::symbols way
Which is the most appropriate depends. All three approaches should be equally efficient. The symbols<> apprach seems to be most safe (not involving casts) and flexible: you can e.g. use it with variable-length enum members, use it inside no_case[] etc.
Case by case:
The semantic action way (ad-hoc):
template <typename Iterator>
struct ZooBuilderGrammar : qi::grammar<Iterator, ascii::space_type>
{
ZooBuilderGrammar():ZooBuilderGrammar::base_type(start_)
{
using namespace qi;
category_ = char_("AB") [ _val = phx::static_cast_<Category>(_1) ];
type_ = char_("DB") [ _val = phx::static_cast_<Type>(_1) ];
start_ = category_ >> '|' > type_;
}
private:
qi::rule<Iterator, Category(), ascii::space_type> category_;
qi::rule<Iterator, Type(), ascii::space_type> type_;
qi::rule<Iterator, ascii::space_type> start_;
};
You can see it Live On Coliru printing:
Parse success: [A, D]
Remaining unparsed input '|name=tim, address=3 infinite loop'
---------------------------
expected: tag: char-set
got: "C|name=poc, address=5 overflow street"
Expectation failure: boost::spirit::qi::expectation_failure at 'C|name=poc, address=5 overflow street'
---------------------------
The customization points way:
namespace boost { namespace spirit { namespace traits {
template <typename Enum, typename RawValue>
struct assign_to_attribute_from_value<Enum, RawValue, typename enable_if<is_enum<Enum>>::type> {
static void call(RawValue const& raw, Enum& cat) {
cat = static_cast<Enum>(raw);
}
};
}}}
template <typename Iterator>
struct ZooBuilderGrammar : qi::grammar<Iterator, Zoo(), ascii::space_type>
{
ZooBuilderGrammar():ZooBuilderGrammar::base_type(start_)
{
start_ = qi::char_("AB") > '|' > qi::char_("DB");
}
private:
qi::rule<Iterator, Zoo(), ascii::space_type> start_;
};
See it Live On Coliru too, with the same output (obviously)
The qi::symbols way:
template <typename Iterator>
struct ZooBuilderGrammar : qi::grammar<Iterator, Zoo(), ascii::space_type>
{
ZooBuilderGrammar():ZooBuilderGrammar::base_type(start_)
{
start_ = category_ > '|' > type_;
}
private:
struct Category_ : qi::symbols<char,Category> {
Category_() {
this->add("A", Category::Animal)("B", Category::Bird);
}
} category_;
struct Type_ : qi::symbols<char,Type> {
Type_() {
this->add("D", Type::Dog)("B", Type::Bird);
}
} type_;
qi::rule<Iterator, Zoo(), ascii::space_type> start_;
};
See it Live On Coliru
Full Demo
This happens to be the traits approach, but you can reuse the skeleton with both other grammars:
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/adapted/struct.hpp>
enum class Category : char { Animal='A', Bird='B' };
enum class Type : char { Dog='D', Bird='B' };
struct Zoo {
Category category;
Type type;
};
BOOST_FUSION_ADAPT_STRUCT(Zoo, (Category,category)(Type,type))
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace phx = boost::phoenix;
namespace boost { namespace spirit { namespace traits {
template <typename Enum, typename RawValue>
struct assign_to_attribute_from_value<Enum, RawValue, typename enable_if<is_enum<Enum>>::type> {
static void call(RawValue const& raw, Enum& cat) {
cat = static_cast<Enum>(raw);
}
};
}}}
template <typename Iterator>
struct ZooBuilderGrammar : qi::grammar<Iterator, Zoo(), ascii::space_type>
{
ZooBuilderGrammar():ZooBuilderGrammar::base_type(start_)
{
start_ = qi::char_("AB") > '|' > qi::char_("DB");
}
private:
qi::rule<Iterator, Zoo(), ascii::space_type> start_;
};
/////////////////////////////////////////////////
// For exception output
struct printer {
typedef boost::spirit::utf8_string string;
void element(string const& tag, string const& value, int depth) const {
for (int i = 0; i < (depth*4); ++i) std::cout << ' '; // indent to depth
std::cout << "tag: " << tag;
if (value != "") std::cout << ", value: " << value;
std::cout << std::endl;
}
};
void print_info(boost::spirit::info const& what) {
using boost::spirit::basic_info_walker;
printer pr;
basic_info_walker<printer> walker(pr, what.tag, 0);
boost::apply_visitor(walker, what.value);
}
//
/////////////////////////////////////////////////
int main()
{
typedef std::string::const_iterator It;
static const ZooBuilderGrammar<It> p;
for (std::string const str1 : {
"A|D|name=tim, address=3 infinite loop",
"A|C|name=poc, address=5 overflow street" })
{
It f(str1.begin()), l(str1.end());
try {
Zoo zoo;
bool ok = qi::phrase_parse(f,l,p,ascii::space,zoo);
if (ok)
std::cout << "Parse success: [" << static_cast<char>(zoo.category) << ", " << static_cast<char>(zoo.type) << "]\n";
else
std::cout << "Failed to parse '" << str1 << "'\n";
if (f!=l)
std::cout << "Remaining unparsed input '" << std::string(f,l) << "'\n";
} catch(qi::expectation_failure<It> const& x)
{
std::cout << "expected: "; print_info(x.what_);
std::cout << "got: \"" << std::string(x.first, x.last) << '"' << std::endl;
}
std::cout << "---------------------------\n";
}
}

I'd use the qi::symbols way as sugested by sehe, but in this way to improve code readability:
template <typename Iterator>
struct ZooBuilderGrammar : qi::grammar<Iterator, Zoo(), ascii::space_type>
{
ZooBuilderGrammar():ZooBuilderGrammar::base_type(start_)
{
category_.add
("A", Category::Animal)
("B", Category::Bird)
;
type_.add
("D", Type::Dog)
("B", Type::Bird)
;
start_ = category_ > '|' > type_;
}
private:
qi::symbols<char,Type> category_;
qi::symbols<char,Category> type_;
qi::rule<Iterator, Zoo(), ascii::space_type> start_;
};

Related

How to refer to value from symbol table?

I can't figure out how to pass value from symbol table into the function.
template <typename Iterator>
class single_attribute_grammar : public qi::grammar<Iterator, AttributeData(), qi::blank_type>
{
public:
single_attribute_grammar(const word_symbols &words) : single_attribute_grammar::base_type(single_attribute_rule)
{
auto attr_word = phx::bind(&AttributeData::word, qi::_val);
auto grammar_word = phx::bind(&WordGrammar::word, qi::_1);
auto attr_value = phx::bind(&AttributeData::value, qi::_val);
single_attribute_rule = qi::lexeme[words[attr_word = grammar_word] >
qi::int_[attr_value = qi::_1] > (qi::space|qi::eoi)] >>
qi::eps(phx::bind(verify_range, qi::_r1, qi::_val)); // <-- HERE is the problem
BOOST_SPIRIT_DEBUG_NODE(single_attribute_rule);
}
private:
qi::rule<Iterator, AttributeData(), qi::blank_type> single_attribute_rule;
};
I would think I can refer to the value of the found key using qi::_r1 but the code doesn't compile:
main.cpp:72:31: required from ‘single_attribute_grammar<Iterator>::single_attribute_grammar(const word_symbols&) [with Iterator = boost::spirit::classic::position_iterator2<boost::spirit::multi_pass<std::istreambuf_iterator<char, std::char_traits<char> > > >; word_symbols = boost::spirit::qi::symbols<char, WordGrammar>]’
main.cpp:87:16: required from ‘all_attributes_grammar<Iterator>::all_attributes_grammar(const word_symbols&) [with Iterator = boost::spirit::classic::position_iterator2<boost::spirit::multi_pass<std::istreambuf_iterator<char, std::char_traits<char> > > >; word_symbols = boost::spirit::qi::symbols<char, WordGrammar>]’
main.cpp:130:62: required from here
/usr/include/boost/spirit/home/support/context.hpp:180:13: error: static assertion failed: index_is_out_of_bounds
BOOST_SPIRIT_ASSERT_MSG(
^
In file included from /usr/include/boost/spirit/home/qi/domain.hpp:18:0,
from /usr/include/boost/spirit/home/qi/meta_compiler.hpp:15,
from /usr/include/boost/spirit/home/qi/action/action.hpp:14,
from /usr/include/boost/spirit/home/qi/action.hpp:14,
from /usr/include/boost/spirit/home/qi.hpp:14,
from /usr/include/boost/spirit/include/qi.hpp:16,
from main.cpp:11:
/usr/include/boost/spirit/home/support/context.hpp:186:13: error: no type named ‘type’ in ‘struct boost::fusion::result_of::at_c<boost::fusion::cons<AttributeData&, boost::fusion::nil_>, 1>’
type;
^~~~
In case it's helpful here is the MVCE.
#include <iomanip>
#include <string>
#include <vector>
#include <boost/variant.hpp>
#include <boost/optional/optional.hpp>
#define BOOST_SPIRIT_DEBUG
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/qi_symbols.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_object.hpp> // construct
#include <boost/spirit/include/support_multi_pass.hpp>
#include <boost/spirit/include/classic_position_iterator.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/phoenix/bind.hpp>
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace classic = boost::spirit::classic;
namespace phx = boost::phoenix;
namespace fusion = boost::fusion;
struct AttributeData
{
std::string word;
int value;
};
using AttributeVariant = boost::variant<
AttributeData
>;
struct WordGrammar
{
std::string word;
int range_from;
int range_to;
};
BOOST_FUSION_ADAPT_STRUCT(
AttributeData,
(std::string, word)
(int, value)
)
using word_symbols = qi::symbols<char, WordGrammar>;
bool verify_range(const WordGrammar &grammar, const AttributeData &data)
{
if(data.value < grammar.range_from || data.value > grammar.range_to)
{
return false;
}
return true;
}
template <typename Iterator>
class single_attribute_grammar : public qi::grammar<Iterator, AttributeData(), qi::blank_type>
{
public:
single_attribute_grammar(const word_symbols &words) : single_attribute_grammar::base_type(single_attribute_rule)
{
auto attr_word = phx::bind(&AttributeData::word, qi::_val);
auto grammar_word = phx::bind(&WordGrammar::word, qi::_1);
auto attr_value = phx::bind(&AttributeData::value, qi::_val);
single_attribute_rule = qi::lexeme[words[attr_word = grammar_word] >
qi::int_[attr_value = qi::_1] > (qi::space|qi::eoi)] >>
qi::eps(phx::bind(verify_range, qi::_r1, qi::_val)); // <-- HERE is the problem
BOOST_SPIRIT_DEBUG_NODE(single_attribute_rule);
}
private:
qi::rule<Iterator, AttributeData(), qi::blank_type> single_attribute_rule;
};
template <typename Iterator>
class all_attributes_grammar : public qi::grammar<Iterator, std::vector<AttributeVariant>(), qi::blank_type>
{
public:
all_attributes_grammar(const word_symbols &words) : all_attributes_grammar::base_type(line_attribute_vec_rule)
, sag(words)
{
line_attribute_rule = (
sag
);
BOOST_SPIRIT_DEBUG_NODE(line_attribute_rule);
line_attribute_vec_rule = (line_attribute_rule % *qi::blank) > qi::eoi;
BOOST_SPIRIT_DEBUG_NODE(line_attribute_vec_rule);
}
private:
single_attribute_grammar<Iterator> sag;
qi::rule<Iterator, AttributeVariant(), qi::blank_type> line_attribute_rule;
qi::rule<Iterator, std::vector<AttributeVariant>(), qi::blank_type> line_attribute_vec_rule;
};
int main()
{
std::vector<AttributeVariant> value;
std::string data{"N100 X-100 AC5"};
std::istringstream input(data);
// iterate over stream input
typedef std::istreambuf_iterator<char> base_iterator_type;
base_iterator_type in_begin(input);
// convert input iterator to forward iterator, usable by spirit parser
typedef boost::spirit::multi_pass<base_iterator_type> forward_iterator_type;
forward_iterator_type fwd_begin = boost::spirit::make_default_multi_pass(in_begin);
forward_iterator_type fwd_end;
// wrap forward iterator with position iterator, to record the position
typedef classic::position_iterator2<forward_iterator_type> pos_iterator_type;
pos_iterator_type position_begin(fwd_begin, fwd_end);
pos_iterator_type position_end;
word_symbols sym;
sym.add
("N", {"N", 1, 9999})
("X", {"X", -999, 999})
("AC", {"AC", -99, 999})
;
all_attributes_grammar<pos_iterator_type> all_attr_gr(sym);
try
{
qi::phrase_parse(position_begin, position_end, all_attr_gr, qi::blank, value);
}
catch (const qi::expectation_failure<pos_iterator_type>& e)
{
const classic::file_position_base<std::string>& pos = e.first.get_position();
std::cout <<
"Parse error at line " << pos.line << " column " << pos.column << ":" << std::endl <<
"'" << e.first.get_currentline() << "'" << std::endl <<
std::setw(pos.column) << " " << "^- here" << std::endl;
}
return 0;
}
Any ideas?
Lots of things to be simplified.
why is AttributeData adapted, when you use semantic actions instead? (see Boost Spirit: "Semantic actions are evil"?)
skipping whitespace is much more elegant when using a skipper. It's also logically contradictory to skip whitespace inside a lexeme (see Boost spirit skipper issues)
In particular, the skipping of whitespace here:
line_attribute_vec_rule = (line_attribute_rule % *qi::blank) > qi::eoi;
is completely impotent (because blanks are already skipped, the *qi::blank will never match any characters). The whole thing reduces to +line_attribute_rule.
line_attribute_vec_rule = +line_attribute_rule > qi::eoi;
Re: your answer
Indeed, you can use more state in the rule. Instead, I'd simplify the AST to support your case. Say:
struct AttrDef {
std::string word;
std::pair<int,int> range;
};
struct AttributeData {
AttrDef def;
int value;
bool is_valid() const {
return std::minmax({value, def.range.first, def.range.second}) == def.range;
}
};
BOOST_FUSION_ADAPT_STRUCT(AttributeData, def, value)
Now, single_attribute_grammar would not care about skipping, and be like:
using attr_defs = qi::symbols<char, AttrDef>;
template <typename Iterator>
struct single_attribute_grammar : public qi::grammar<Iterator, AttributeData()> {
single_attribute_grammar(const attr_defs &defs)
: single_attribute_grammar::base_type(start)
{
using namespace qi;
attribute_data = defs >> int_;
start %= attribute_data [ _pass = is_valid_(_1) ];
BOOST_SPIRIT_DEBUG_NODES((attribute_data));
}
private:
phx::function<std::function<bool(AttributeData const&)> > is_valid_ {&AttributeData::is_valid};
qi::rule<Iterator, AttributeData()> attribute_data;
qi::rule<Iterator, AttributeData()> start;
};
As you can see, there's no state because I didn't use eps. That's right, I flew right in the face of my own guideline ("avoid semantic actions") for the simple reason that it avoids explicit state (instead using the exsting qi::_pass).
Doing a lot of simplifications in main (specifically, using boost::spirit::istream_iterator rather than roll-your-own multi-pass adapting), I would arrive at this:
Live On Coliru
//#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/include/classic_position_iterator.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>
#include <iomanip>
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;
struct AttrDef {
std::string word;
std::pair<int,int> range;
};
struct AttributeData {
AttrDef def;
int value;
bool is_valid() const {
return std::minmax({value, def.range.first, def.range.second}) == def.range;
}
};
BOOST_FUSION_ADAPT_STRUCT(AttributeData, def, value)
static inline std::ostream& operator<<(std::ostream& os, const AttrDef& def) {
return os << "AttrDef(" << def.word << ", " << def.range.first << ", " << def.range.second << ")";
}
using attr_defs = qi::symbols<char, AttrDef>;
template <typename Iterator>
struct single_attribute_grammar : public qi::grammar<Iterator, AttributeData()> {
single_attribute_grammar(const attr_defs &defs)
: single_attribute_grammar::base_type(start)
{
using namespace qi;
attribute_data = defs >> int_;
start %= attribute_data [ _pass = is_valid_(_1) ];
BOOST_SPIRIT_DEBUG_NODES((attribute_data));
}
private:
phx::function<std::function<bool(AttributeData const&)> > is_valid_ {&AttributeData::is_valid};
qi::rule<Iterator, AttributeData()> attribute_data;
qi::rule<Iterator, AttributeData()> start;
};
using AttributeVariant = boost::variant<AttributeData>;
template <typename Iterator>
class all_attributes_grammar : public qi::grammar<Iterator, std::vector<AttributeVariant>(), qi::blank_type>
{
public:
all_attributes_grammar(const attr_defs &defs)
: all_attributes_grammar::base_type(line_attribute_vec_rule),
sag(defs)
{
line_attribute_rule = (
sag
);
line_attribute_vec_rule = +line_attribute_rule > qi::eoi;
BOOST_SPIRIT_DEBUG_NODES((line_attribute_rule)(line_attribute_vec_rule));
}
private:
single_attribute_grammar<Iterator> sag;
qi::rule<Iterator, AttributeVariant()> line_attribute_rule;
qi::rule<Iterator, std::vector<AttributeVariant>(), qi::blank_type> line_attribute_vec_rule;
};
int main() {
constexpr bool fail = false, succeed = true;
struct _ { bool expect; std::string data; } const tests[] = {
{ succeed, "N100 X-100 AC5" },
{ fail, "" },
{ fail, " " },
{ succeed, "N1" },
{ succeed, " N1" },
{ succeed, "N1 " },
{ succeed, " N1 " },
{ fail, "N 1" },
{ fail, "N0" },
{ succeed, "N9999" },
{ fail, "N10000" },
};
for (auto test : tests) {
std::istringstream input(test.data);
typedef boost::spirit::classic::position_iterator2<boost::spirit::istream_iterator> pos_iterator_type;
pos_iterator_type position_begin(boost::spirit::istream_iterator{input >> std::noskipws}, {}), position_end;
attr_defs sym;
sym.add
("N", {"N", {1, 9999}})
("X", {"X", {-999, 999}})
("AC", {"AC", {-99, 999}})
;
all_attributes_grammar<pos_iterator_type> all_attr_gr(sym);
try {
std::vector<AttributeVariant> value;
std::cout << " --------- '" << test.data << "'\n";
bool actual = qi::phrase_parse(position_begin, position_end, all_attr_gr, qi::blank, value);
std::cout << ((test.expect == actual)?"PASS":"FAIL");
if (actual) {
std::cout << "\t";
for (auto& attr : value)
std::cout << boost::fusion::as_vector(boost::get<AttributeData>(attr)) << " ";
std::cout << "\n";
} else {
std::cout << "\t(no valid parse)\n";
}
}
catch (const qi::expectation_failure<pos_iterator_type>& e) {
auto& pos = e.first.get_position();
std::cout <<
"Parse error at line " << pos.line << " column " << pos.column << ":" << std::endl <<
"'" << e.first.get_currentline() << "'" << std::endl <<
std::setw(pos.column) << " " << "^- here" << std::endl;
}
if (position_begin != position_end)
std::cout << " -> Remaining '" << std::string(position_begin, position_end) << "'\n";
}
}
Prints:
--------- 'N100 X-100 AC5'
PASS (AttrDef(N, 1, 9999) 100) (AttrDef(X, -999, 999) -100) (AttrDef(AC, -99, 999) 5)
--------- ''
PASS (no valid parse)
--------- ' '
PASS (no valid parse)
-> Remaining ' '
--------- 'N1'
PASS (AttrDef(N, 1, 9999) 1)
--------- ' N1'
PASS (AttrDef(N, 1, 9999) 1)
--------- 'N1 '
PASS (AttrDef(N, 1, 9999) 1)
--------- ' N1 '
PASS (AttrDef(N, 1, 9999) 1)
--------- 'N 1'
PASS (no valid parse)
-> Remaining 'N 1'
--------- 'N0'
PASS (no valid parse)
-> Remaining 'N0'
--------- 'N9999'
PASS (AttrDef(N, 1, 9999) 9999)
--------- 'N10000'
PASS (no valid parse)
-> Remaining 'N10000'
After a bit more studying of boost spirit I've found the solution is to use locals and inherited attributes:
template <typename Iterator>
class single_attribute_grammar : public qi::grammar<Iterator, AttributeData(), qi::locals<WordGrammar>, qi::blank_type>
{
public:
single_attribute_grammar(const word_symbols &words) : single_attribute_grammar::base_type(single_attribute_rule)
{
auto attr_word = phx::bind(&AttributeData::word, qi::_val);
auto grammar_word = phx::bind(&WordGrammar::word, qi::_1);
auto attr_value = phx::bind(&AttributeData::value, qi::_val);
word_rule = words;
BOOST_SPIRIT_DEBUG_NODE(word_rule);
range_check_rule = qi::eps(phx::bind(verify_range, qi::_r1, qi::_r2));
BOOST_SPIRIT_DEBUG_NODE(range_check_rule);
single_attribute_rule = qi::lexeme[word_rule[attr_word = grammar_word,qi::_a=qi::_1] >
qi::int_[attr_value = qi::_1] > (qi::space|qi::eoi)] >>
range_check_rule(qi::_a, qi::_val);
BOOST_SPIRIT_DEBUG_NODE(single_attribute_rule);
}
private:
qi::rule<Iterator, WordGrammar()> word_rule;
qi::rule<Iterator, void(const WordGrammar&, const AttributeData&), qi::blank_type> range_check_rule;
qi::rule<Iterator, AttributeData(), qi::locals<WordGrammar>, qi::blank_type> single_attribute_rule;
};
Also because I use BOOST_SPIRIT_DEBUG_NODE overloading of stream operator for WordGrammar is necessary:
std::ostream& operator<<(std::ostream& ostr, const WordGrammar& grammar)
{
ostr << "WordGrammar(" << grammar.word << ", " << grammar.range_from << ", " << grammar.range_to << ")";
return ostr;
}

Why can I not access the value in a semantic action?

I'm trying to write a parser to create an AST using boost::spirit. As a first step I'm trying to wrap numerical values in an AST node. This is the code I'm using:
AST_NodePtr make_AST_NodePtr(const int& i) {
return std::make_shared<AST_Node>(i);
}
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace l = qi::labels;
template<typename Iterator>
struct test_grammar : qi::grammar<Iterator, AST_NodePtr(), ascii::space_type> {
test_grammar() : test_grammar::base_type(test) {
test = qi::int_ [qi::_val = make_AST_NodePtr(qi::_1)];
}
qi::rule<Iterator, AST_NodePtr(), ascii::space_type> test;
};
As far as I understood it from the documentation q::_1 should contain the value parsed by qi::int_, but the above code always gives me an error along the lines
invalid initialization of reference of type ‘const int&’ from expression of type ‘const _1_type {aka const boost::phoenix::actor<boost::spirit::argument<0> >}
Why does this not work even though qi::_1 is supposed to hold the parsed valued? How else would I parse the input into a custom AST?
You're using a regular function inside the semantic action.
This means that in the contructor the compiler will try to invoke that make_AST_NodePtr function with the argument supplied: qi::_1.
Q. Why does this not work even though qi::_1 is supposed to hold the parsed valued?
A. qi::_1 does not hold the parsed value. It represents (is-a-placeholder-for) the first unbound argument in the function call
This can /obviously/ never work. The function expects an integer.
So what gives?
You need to make a "lazy" or "deferred" function for use in the semantic action. Using only pre-supplied Boost Phoenix functors, you could spell it out:
test = qi::int_ [ qi::_val = px::construct<AST_NodePtr>(px::new_<AST_Node>(qi::_1)) ];
You don't need the helper function this way. But the result is both ugly and suboptimal. So, let's do better!
Using a Phoenix Function wrapper
struct make_shared_f {
std::shared_ptr<AST_Node> operator()(int v) const {
return std::make_shared<AST_Node>(v);
}
};
px::function<make_shared_f> make_shared_;
With this defined, you can simplify the semantic action to:
test = qi::int_ [ qi::_val = make_shared_(qi::_1) ];
Actually, if you make it generic you can reuse it for many types:
template <typename T>
struct make_shared_f {
template <typename... Args>
std::shared_ptr<T> operator()(Args&&... args) const {
return std::make_shared<T>(std::forward<Args>(args)...);
}
};
px::function<make_shared_f<AST_Node> > make_shared_;
DEMO
Here's a self-contained example showing some style fixes in the process:
Live On Coliru
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <memory>
struct AST_Node {
AST_Node(int v) : _value(v) {}
int value() const { return _value; }
private:
int _value;
};
using AST_NodePtr = std::shared_ptr<AST_Node>;
AST_NodePtr make_AST_NodePtr(const int& i) {
return std::make_shared<AST_Node>(i);
}
namespace qi = boost::spirit::qi;
namespace px = boost::phoenix;
template<typename Iterator>
struct test_grammar : qi::grammar<Iterator, AST_NodePtr()> {
test_grammar() : test_grammar::base_type(start) {
using boost::spirit::ascii::space;
start = qi::skip(space) [ test ];
test = qi::int_ [ qi::_val = make_shared_(qi::_1) ];
}
private:
struct make_shared_f {
std::shared_ptr<AST_Node> operator()(int v) const {
return std::make_shared<AST_Node>(v);
}
};
px::function<make_shared_f> make_shared_;
//
qi::rule<Iterator, AST_NodePtr()> start;
qi::rule<Iterator, AST_NodePtr(), boost::spirit::ascii::space_type> test;
};
int main() {
AST_NodePtr parsed;
std::string const input ("42");
auto f = input.begin(), l = input.end();
test_grammar<std::string::const_iterator> g;
bool ok = qi::parse(f, l, g, parsed);
if (ok) {
std::cout << "Parsed: " << (parsed? std::to_string(parsed->value()) : "nullptr") << "\n";
} else {
std::cout << "Failed\n";
}
if (f!=l)
{
std::cout << "Remaining input: '" << std::string(f, l) << "'\n";
}
}
Prints
Parsed: 42
BONUS: Alternative using BOOST_PHOENIX_ADAPT_FUNCTION
You can actually use your free function if you wish, and use it as follows:
Live On Coliru
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <memory>
struct AST_Node {
AST_Node(int v) : _value(v) {}
int value() const { return _value; }
private:
int _value;
};
using AST_NodePtr = std::shared_ptr<AST_Node>;
AST_NodePtr make_AST_NodePtr(int i) {
return std::make_shared<AST_Node>(i);
}
BOOST_PHOENIX_ADAPT_FUNCTION(AST_NodePtr, make_AST_NodePtr_, make_AST_NodePtr, 1)
namespace qi = boost::spirit::qi;
namespace px = boost::phoenix;
template<typename Iterator>
struct test_grammar : qi::grammar<Iterator, AST_NodePtr()> {
test_grammar() : test_grammar::base_type(start) {
using boost::spirit::ascii::space;
start = qi::skip(space) [ test ] ;
test = qi::int_ [ qi::_val = make_AST_NodePtr_(qi::_1) ] ;
}
private:
qi::rule<Iterator, AST_NodePtr()> start;
qi::rule<Iterator, AST_NodePtr(), boost::spirit::ascii::space_type> test;
};
int main() {
AST_NodePtr parsed;
std::string const input ("42");
auto f = input.begin(), l = input.end();
test_grammar<std::string::const_iterator> g;
bool ok = qi::parse(f, l, g, parsed);
if (ok) {
std::cout << "Parsed: " << (parsed? std::to_string(parsed->value()) : "nullptr") << "\n";
} else {
std::cout << "Failed\n";
}
if (f!=l)
{
std::cout << "Remaining input: '" << std::string(f, l) << "'\n";
}
}

Declaration of cross-recursive rules

I declared rules of my grammar as static const. That worked fine till I tried to use cross-recursive rules (rule1 is defined using rule2 which is defined using rule1). The source code still can be built, but segfaults on parsing source containing such cross-recursive case.
Here's a simplified code of the grammar:
template < typename Iterator >
class Skipper : public qi::grammar<Iterator> {
public:
Skipper ( ) : Skipper::base_type(_skip_rule) { }
private:
static qi::rule<Iterator> const
_comment,
_skip_rule;
};
template < typename Iterator >
typename qi::rule<Iterator> const
Skipper<Iterator>::_comment(
boost::spirit::repository::confix("/*", "*/")[*(qi::char_ - "*/")] // Multi-line
| boost::spirit::repository::confix("//", qi::eol)[*(qi::char_ - qi::eol)] // Single-line
);
template < typename Iterator >
typename qi::rule<Iterator> const
Skipper<Iterator>::_skip_rule(qi::ascii::space | _comment);
template < typename Iterator, typename Skipper >
class Grammar : public qi::grammar<Iterator, Skipper > {
public:
Grammar ( ) : Grammar::base_type(expression) { }
private:
static qi::rule<Iterator, Skipper> const
// Tokens
scalar_literal,
identifier,
// Rules
operand,
expression;
};
template < typename Iterator, typename Skipper >
typename qi::rule<Iterator, Skipper> const
Grammar<Iterator, Skipper>::scalar_literal(qi::uint_ | qi::int_);
template < typename Iterator, typename Skipper >
typename qi::rule<Iterator, Skipper> const
Grammar<Iterator, Skipper>::identifier(qi::lexeme[(qi::alpha | '_') >> *(qi::alnum | '_')]);
template < typename Iterator, typename Skipper >
typename qi::rule<Iterator, Skipper> const
Grammar<Iterator, Skipper>::operand((scalar_literal | identifier | ('(' >> expression >> ')')));
template < typename Iterator, typename Skipper >
typename qi::rule<Iterator, Skipper> const
Grammar<Iterator, Skipper>::expression(operand);
(expression rule is made identical to operand to make the code easier to understand; of course it should be more complicated yet based on operand). operand declaration uses expression one and vice versa. That segfaults when trying to parse_phrase for example (123). I suppose that it's because of "forward" using of expression; same happens if I put expression declaration before the operand one. So in what way should these rules be declared to avoid runtime error?
First off, the static has nothing to do with it:
Live On Coliru fails just as badly:
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/repository/include/qi.hpp>
namespace qi = boost::spirit::qi;
template <typename Iterator>
struct Skipper : qi::grammar<Iterator> {
Skipper() : Skipper::base_type(_skip_rule) { }
private:
qi::rule<Iterator> const
_comment {
boost::spirit::repository::confix("/*", "*/") [*(qi::char_ - "*/")] // Multi-line
| boost::spirit::repository::confix("//", qi::eol) [*(qi::char_ - qi::eol)] // Single-line
},
_skip_rule {
qi::ascii::space | _comment
};
};
template <typename Iterator, typename Skipper>
struct Grammar : qi::grammar<Iterator, Skipper> {
Grammar() : Grammar::base_type(expression) { }
private:
qi::rule<Iterator, Skipper> const
// Tokens
scalar_literal { qi::uint_ | qi::int_ },
identifier { qi::lexeme[(qi::alpha | '_') >> *(qi::alnum | '_')] },
// Rules
operand { (scalar_literal | identifier | ('(' >> expression >> ')')) },
expression { operand };
};
int main() {
using It = std::string::const_iterator;
Skipper<It> s;
Grammar<It, Skipper<It> > p;
std::string const input = "(123)";
It f = input.begin(), l = input.end();
bool ok = qi::phrase_parse(f,l,p,s);
if (ok) std::cout << "Parse success\n";
else std::cout << "Parse failed\n";
if (f!=l) std::cout << "Remaining input: '" << std::string(f,l) << "'\n";
}
Secondly, the skipper has nothing to with things:
Live On Coliru fails just as badly:
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/repository/include/qi.hpp>
namespace qi = boost::spirit::qi;
template <typename Iterator, typename Skipper = qi::ascii::space_type>
struct Grammar : qi::grammar<Iterator, Skipper> {
Grammar() : Grammar::base_type(expression) { }
private:
qi::rule<Iterator, Skipper> const
// Tokens
scalar_literal { qi::uint_ | qi::int_ },
identifier { qi::lexeme[(qi::alpha | '_') >> *(qi::alnum | '_')] },
// Rules
operand { (scalar_literal | identifier | ('(' >> expression >> ')')) },
expression { operand };
};
int main() {
using It = std::string::const_iterator;
Grammar<It> p;
std::string const input = "(123)";
It f = input.begin(), l = input.end();
bool ok = qi::phrase_parse(f,l,p,qi::ascii::space);
if (ok) std::cout << "Parse success\n";
else std::cout << "Parse failed\n";
if (f!=l) std::cout << "Remaining input: '" << std::string(f,l) << "'\n";
}
Thirdly, the timing of initialization has to do with it:
Live On Coliru succeeds:
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/repository/include/qi.hpp>
namespace qi = boost::spirit::qi;
template <typename Iterator, typename Skipper = qi::ascii::space_type>
struct Grammar : qi::grammar<Iterator, Skipper> {
Grammar() : Grammar::base_type(expression) {
scalar_literal = qi::uint_ | qi::int_;
identifier = (qi::alpha | '_') >> *(qi::alnum | '_');
// Rules
operand = (scalar_literal | identifier | ('(' >> expression >> ')'));
expression = operand;
}
private:
qi::rule<Iterator> scalar_literal, identifier; // Tokens
qi::rule<Iterator, Skipper> operand, expression; // Rules
};
int main() {
using It = std::string::const_iterator;
Grammar<It> p;
std::string const input = "(123)";
It f = input.begin(), l = input.end();
bool ok = qi::phrase_parse(f,l,p,qi::ascii::space);
if (ok) std::cout << "Parse success\n";
else std::cout << "Parse failed\n";
if (f!=l) std::cout << "Remaining input: '" << std::string(f,l) << "'\n";
}
Prints
Parse success
Finally, you can have all the cake and eat it too:
Live On Coliru
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/repository/include/qi.hpp>
namespace qi = boost::spirit::qi;
namespace parsing {
namespace detail {
template <typename Iterator>
struct Skipper : qi::grammar<Iterator> {
Skipper() : Skipper::base_type(_skip_rule) {
_comment = boost::spirit::repository::confix("/*", "*/") [*(qi::char_ - "*/")] // Multi-line
| boost::spirit::repository::confix("//", qi::eol) [*(qi::char_ - qi::eol)] // Single-line
;
_skip_rule = qi::ascii::space | _comment;
}
private:
qi::rule<Iterator> _comment, _skip_rule;
};
template <typename Iterator, typename Skipper = Skipper<Iterator> >
struct Grammar : qi::grammar<Iterator, Skipper> {
Grammar() : Grammar::base_type(expression) {
scalar_literal = qi::uint_ | qi::int_;
identifier = (qi::alpha | '_') >> *(qi::alnum | '_');
// Rules
operand = (scalar_literal | identifier | ('(' >> expression >> ')'));
expression = operand;
}
private:
qi::rule<Iterator> scalar_literal, identifier; // Tokens
qi::rule<Iterator, Skipper> operand, expression; // Rules
};
}
template <typename Iterator, typename Skipper = detail::Skipper<Iterator> >
struct facade {
template <typename Range> static bool parse(Range const& input) {
Iterator f = boost::begin(input), l = boost::end(input);
bool ok = qi::phrase_parse(f, l, _parser, _skipper);
if (f!=l)
std::cout << "Remaining input: '" << std::string(f,l) << "'\n";
return ok;
}
private:
static const detail::Skipper<Iterator> _skipper;
static const detail::Grammar<Iterator, Skipper> _parser;
};
template <class I, class S> const detail::Skipper<I> facade<I,S>::_skipper = {};
template <class I, class S> const detail::Grammar<I, S> facade<I,S>::_parser = {};
}
int main() {
using It = std::string::const_iterator;
std::string const input = "(123)";
bool ok = parsing::facade<It>::parse(input);
if (ok) std::cout << "Parse success\n";
else std::cout << "Parse failed\n";
}
Note that the result is the same, the parser/skipper are every bit as static and const as in the original code, the code is a lot easier to maintain (and has a bit more structure to it at the same time).
This is basically where the Singletons-are-bad theme meets the inner-const-is-problematic theme. You don't need to make the fields const. You don't need to make the instances static.
Just, create only one instance if you prefer. Also, it's not a problem that the parser is now copyable (you don't have to copy it; but now you can).

boost spirit on_error not triggered

^ No it is not. This was part of the problem, but if review the code as is right now, it already does what the pointed out question/answer shows ... and the errors are still not triggered.
I have this boost spirit parser for string literal. It works. Now I would like to start handle errors when it fail. I copied the on_error handle 1-1 from the mini xml example and it compiles, but it is never triggered (no errors are outputted).
This is the parser:
#define BOOST_SPIRIT_USE_PHOENIX_V3
#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/home/support/iterators/line_pos_iterator.hpp>
#include <boost/spirit/include/phoenix_fusion.hpp>
#include <boost/spirit/include/phoenix_stl.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
namespace qi = boost::spirit::qi;
struct my_handler_f
{
template <typename...> struct result { typedef void type; };
template <typename... T>
void operator()(T&&...) const {
std::cout << "\nmy_handler_f() invoked with " << sizeof...(T) << " arguments\n";
}
};
struct append_utf8_f
{
template <typename, typename>
struct result { typedef void type; };
template <typename INT>
void operator()(INT in, std::string& to) const
{
auto out = std::back_inserter(to);
boost::utf8_output_iterator<decltype(out)> convert(out);
*convert++ = in;
}
};
struct get_line_f
{
template <typename> struct result { typedef size_t type; };
template <typename It> size_t operator()(It const& pos_iter) const
{
return get_line(pos_iter);
}
};
struct RangePosition { size_t beginLine, endLine; };
struct String : public RangePosition
{
String()
: RangePosition()
, value()
, source()
{
}
std::string value;
std::string source;
};
BOOST_FUSION_ADAPT_STRUCT(String,
(std::string, value)
(std::string, source)
(size_t, beginLine)
(size_t, endLine)
)
template <typename Iterator>
struct source_string : qi::grammar<Iterator, String(), qi::space_type>
{
struct escape_symbols : qi::symbols<char, char>
{
escape_symbols()
{
add
("\'" , '\'')
("\"" , '\"')
("\?" , '\?')
("\\" , '\\')
("0" , '\0')
("a" , '\a')
("b" , '\b')
("f" , '\f')
("n" , '\n')
("r" , '\r')
("t" , '\t')
("v" , '\v')
;
}
} escape_symbol;
source_string() : source_string::base_type(start)
{
using qi::raw;
using qi::_val;
using qi::_1;
using qi::_2;
using qi::_3;
using qi::_4;
using qi::space;
using qi::omit;
using qi::no_case;
using qi::print;
using qi::eps;
using qi::on_error;
using qi::fail;
using qi::lit;
namespace phx = boost::phoenix;
using phx::at_c;
using phx::begin;
using phx::end;
using phx::construct;
using phx::ref;
using phx::val;
escape %= escape_symbol;
character %= (no_case["\\x"] > hex12)
| ("\\" > (oct123 | escape))
| (print - (lit('"') | '\\'));
unicode = ("\\u" > hex4[append_utf8(_1, _val)])
| ("\\U" > hex8[append_utf8(_1, _val)]);
string_section %= '"' > *(unicode | character) > '"';
string %= string_section % omit[*space];
main = raw [
string[at_c<0>(_val) = _1]
]
[
at_c<1>(_val) = construct<std::string>(begin(_1), end(_1)),
at_c<2>(_val) = get_line_(begin(_1)),
at_c<3>(_val) = get_line_(end(_1))
];
start %= eps > main;
on_error<fail>(start, my_handler);
}
boost::phoenix::function<my_handler_f> my_handler;
qi::rule<Iterator, std::string()> escape;
qi::uint_parser<char, 16, 1, 2> hex12;
qi::uint_parser<char, 8, 1, 3> oct123;
qi::rule<Iterator, std::string()> character;
qi::uint_parser<uint16_t, 16, 4, 4> hex4;
qi::uint_parser<uint32_t, 16, 8, 8> hex8;
boost::phoenix::function<append_utf8_f> append_utf8;
qi::rule<Iterator, std::string()> unicode;
qi::rule<Iterator, std::string()> string_section;
qi::rule<Iterator, std::string()> string;
boost::phoenix::function<get_line_f> get_line_;
qi::rule<Iterator, String(), qi::space_type> main;
qi::rule<Iterator, String(), qi::space_type> start;
};
and this is the test code
int main()
{
std::string str[] =
{
"\"\\u1234\\U0002345\"",
//"\"te\"\"st\"",
//"\"te\" \"st\"",
//"\"te\" \n \"st\"",
//"\"\"",
//"\"\\\"\"",
//"\"test\"",
//"\"test\" something",
//"\"\\\'\\\"\\\?\\\\\\a\\b\\f\\n\\r\\t\\v\"",
//"\"\\x61cd\\X3012\\x7z\"",
//"\"\\141cd\\06012\\78\\778\"",
"\"te",
//"\"te\nst\"",
//"\"test\\\"",
//"\"te\\st\"",
//
};
typedef boost::spirit::line_pos_iterator<std::string::const_iterator> Iterator;
for (size_t i = 0; i < sizeof(str) / sizeof(str[0]); ++i)
{
source_string<Iterator> g;
Iterator iter(str[i].begin());
Iterator end(str[i].end());
String string;
bool r = phrase_parse(iter, end, g, qi::space, string);
if (r)
std::cout << string.beginLine << "-" << string.endLine << ": " << string.value << " === " << string.source << "\n";
else
std::cout << "Parsing failed\n";
}
}

How do I capture the original input into the synthesized output from a spirit grammar?

I'm working on a boost::spirit::qi::grammar and would like to copy a portion of the original text into the synthesized output structure of the grammar (more specifically, the portion that matched one of the components of the rule). The grammar would ultimately be used as a sub-grammar for a more complicated grammar, so I don't really have access to the original input.
I'm guessing that this can be done through semantic actions or the grammar context, but I can't find an example that does this without access to the original parse().
Here's what I have so far:
#include <iostream>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
namespace qi = boost::spirit::qi;
struct A
{
std::string header;
std::vector<int> ints;
std::string inttext;
};
BOOST_FUSION_ADAPT_STRUCT(
A,
(std::string, header)
(std::vector<int>, ints)
//(std::string, inttext)
)
template <typename Iterator>
struct parser : qi::grammar< Iterator, A() >
{
parser() : parser::base_type(start)
{
header %= qi::lexeme[ +qi::alpha ];
ints %= qi::lexeme[ qi::int_ % qi::char_(",_") ]; // <---- capture the original text that matches this into inttext
start %= header >> ' ' >> ints;
}
qi::rule<Iterator, std::string()> header;
qi::rule<Iterator, std::vector<int>() > ints;
qi::rule<Iterator, A()> start;
};
int main()
{
A output;
std::string input("out 1,2_3");
auto iter = input.begin();
parser<decltype(iter)> p;
bool r = qi::parse(iter, input.end(), p, output);
if( !r || iter != input.end() )
{
std::cout << "did not parse";
}
else
{
// would like output.inttext to be "1,2_3"
std::cout << output.header << ": " << output.inttext << " -> [ ";
for( auto & i: output.ints )
std::cout << i << ' ';
std::cout << ']' << std::endl;
}
}
Something similar to what you asked without using semantic actions:
#include <iostream>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/spirit/repository/include/qi_iter_pos.hpp>
namespace qi = boost::spirit::qi;
using boost::spirit::repository::qi::iter_pos;
struct ints_type
{
std::vector<int> data;
std::string::const_iterator begin;
std::string::const_iterator end;
};
struct A
{
std::string header;
ints_type ints;
};
BOOST_FUSION_ADAPT_STRUCT(
ints_type,
(std::string::const_iterator, begin)
(std::vector<int>, data)
(std::string::const_iterator, end)
)
BOOST_FUSION_ADAPT_STRUCT(
A,
(std::string, header)
(ints_type, ints)
)
template <typename Iterator>
struct parser : qi::grammar< Iterator, A() >
{
parser() : parser::base_type(start)
{
header %= qi::lexeme[ +qi::alpha ];
ints %= qi::lexeme[ iter_pos >> qi::int_ % qi::char_(",_") >> iter_pos ]; // <---- capture the original text that matches this into inttext
start %= header >> ' ' >> ints;
}
qi::rule<Iterator, std::string()> header;
qi::rule<Iterator, ints_type() > ints;
qi::rule<Iterator, A()> start;
};
int main()
{
A output;
std::string input("out 1,2_3");
auto iter = input.begin();
parser<decltype(iter)> p;
bool r = qi::parse(iter, input.end(), p, output);
if( !r || iter != input.end() )
{
std::cout << "did not parse";
}
else
{
// would like output.inttext to be "1,2_3"
std::cout << output.header << ": " << std::string(output.ints.begin,output.ints.end) << " -> [ ";
for( auto & i: output.ints.data )
std::cout << i << ' ';
std::cout << ']' << std::endl;
}
}
Using semantic actions:
#include <iostream>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/spirit/repository/include/qi_iter_pos.hpp>
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;
using boost::spirit::repository::qi::iter_pos;
struct ints_type
{
std::vector<int> data;
std::string inttext;
};
struct A
{
std::string header;
ints_type ints;
};
BOOST_FUSION_ADAPT_STRUCT(
ints_type,
(std::vector<int>, data)
(std::string, inttext)
)
BOOST_FUSION_ADAPT_STRUCT(
A,
(std::string, header)
(ints_type, ints)
)
template <typename Iterator>
struct parser : qi::grammar< Iterator, A() >
{
parser() : parser::base_type(start)
{
header %= qi::lexeme[ +qi::alpha ];
ints = qi::lexeme[
(iter_pos >> qi::int_ % qi::char_(",_") >> iter_pos)
[phx::at_c<0>(qi::_val)=qi::_2,
phx::at_c<1>(qi::_val)=phx::construct<std::string>(qi::_1,qi::_3)]
];
start %= header >> ' ' >> ints;
}
qi::rule<Iterator, std::string()> header;
qi::rule<Iterator, ints_type() > ints;
qi::rule<Iterator, A()> start;
};
int main()
{
A output;
std::string input("out 1,2_3");
auto iter = input.begin();
parser<decltype(iter)> p;
bool r = qi::parse(iter, input.end(), p, output);
if( !r || iter != input.end() )
{
std::cout << "did not parse";
}
else
{
// would like output.inttext to be "1,2_3"
std::cout << output.header << ": " << output.ints.inttext << " -> [ ";
for( auto & i: output.ints.data )
std::cout << i << ' ';
std::cout << ']' << std::endl;
}
}
Another alternative using a custom directive dont_eat that returns the subject attribute but does not consume any input. This is possibly slower since the rule ints is parsed twice, but I believe that the syntax is nicer (and it's a good excuse to try creating your own directive)(It's a slightly modified version of "boost/spirit/home/qi/directive/lexeme.hpp").
dont_eat.hpp
#if !defined(DONT_EAT_HPP)
#define DONT_EAT_HPP
#if defined(_MSC_VER)
#pragma once
#endif
#include <boost/spirit/home/qi/meta_compiler.hpp>
#include <boost/spirit/home/qi/skip_over.hpp>
#include <boost/spirit/home/qi/parser.hpp>
#include <boost/spirit/home/support/unused.hpp>
#include <boost/spirit/home/support/common_terminals.hpp>
#include <boost/spirit/home/qi/detail/attributes.hpp>
#include <boost/spirit/home/support/info.hpp>
#include <boost/spirit/home/support/handles_container.hpp>
namespace custom
{
BOOST_SPIRIT_TERMINAL(dont_eat);
}
namespace boost { namespace spirit
{
///////////////////////////////////////////////////////////////////////////
// Enablers
///////////////////////////////////////////////////////////////////////////
template <>
struct use_directive<qi::domain, custom::tag::dont_eat> // enables dont_eat
: mpl::true_ {};
}}
namespace custom
{
template <typename Subject>
struct dont_eat_directive : boost::spirit::qi::unary_parser<dont_eat_directive<Subject> >
{
typedef Subject subject_type;
dont_eat_directive(Subject const& subject)
: subject(subject) {}
template <typename Context, typename Iterator>
struct attribute
{
typedef typename
boost::spirit::traits::attribute_of<subject_type, Context, Iterator>::type
type;
};
template <typename Iterator, typename Context
, typename Skipper, typename Attribute>
bool parse(Iterator& first, Iterator const& last
, Context& context, Skipper const& skipper
, Attribute& attr) const
{
Iterator temp = first;
boost::spirit::qi::skip_over(temp, last, skipper);
return subject.parse(temp, last, context, skipper, attr);
}
template <typename Context>
boost::spirit::info what(Context& context) const
{
return info("dont_eat", subject.what(context));
}
Subject subject;
};
}//custom
///////////////////////////////////////////////////////////////////////////
// Parser generators: make_xxx function (objects)
///////////////////////////////////////////////////////////////////////////
namespace boost { namespace spirit { namespace qi
{
template <typename Subject, typename Modifiers>
struct make_directive<custom::tag::dont_eat, Subject, Modifiers>
{
typedef custom::dont_eat_directive<Subject> result_type;
result_type operator()(unused_type, Subject const& subject, unused_type) const
{
return result_type(subject);
}
};
}}}
namespace boost { namespace spirit { namespace traits
{
///////////////////////////////////////////////////////////////////////////
template <typename Subject>
struct has_semantic_action<custom::dont_eat_directive<Subject> >
: unary_has_semantic_action<Subject> {};
///////////////////////////////////////////////////////////////////////////
template <typename Subject, typename Attribute, typename Context
, typename Iterator>
struct handles_container<custom::dont_eat_directive<Subject>, Attribute
, Context, Iterator>
: unary_handles_container<Subject, Attribute, Context, Iterator> {};
}}}
#endif
main.cpp
#include <iostream>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include "dont_eat.hpp"
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;
struct ints_type
{
std::vector<int> data;
std::string inttext;
};
struct A
{
std::string header;
ints_type ints;
};
BOOST_FUSION_ADAPT_STRUCT(
ints_type,
(std::vector<int>, data)
(std::string, inttext)
)
BOOST_FUSION_ADAPT_STRUCT(
A,
(std::string, header)
(ints_type, ints)
)
template <typename Iterator>
struct parser : qi::grammar< Iterator, A() >
{
parser() : parser::base_type(start)
{
header %= qi::lexeme[ +qi::alpha ];
ints = qi::lexeme[qi::int_ % qi::char_(",_")];
ints_string = custom::dont_eat[ints] >> qi::as_string[qi::raw[ints]];
start %= header >> ' ' >> ints_string;
}
qi::rule<Iterator, std::string()> header;
qi::rule<Iterator, std::vector<int>() > ints;
qi::rule<Iterator, ints_type() > ints_string;
qi::rule<Iterator, A()> start;
};
int main()
{
A output;
std::string input("out 1,2_3");
auto iter = input.begin();
parser<decltype(iter)> p;
bool r = qi::parse(iter, input.end(), p, output);
if( !r || iter != input.end() )
{
std::cout << "did not parse";
}
else
{
// would like output.inttext to be "1,2_3"
std::cout << output.header << ": " << output.ints.inttext << " -> [ ";
for( auto & i: output.ints.data )
std::cout << i << ' ';
std::cout << ']' << std::endl;
}
}
This directive returns a fusion::vector2<> with the subject's attribute as its first member and the string corresponding to the synthesized attribute as its second. I think this is the easiest method to reuse as long as you adapt your structs adequately. I'm not sure that this fusion::vector2<> is the best way to handle the attributes but in the limited testing I've done it has worked fine. With this directive the ints_string rule would simply be:
ints_string=custom::annotate[ints];
//or ints_string=custom::annotate[qi::lexeme[qi::int_ % qi::char_(",_")]];
Example on LWS.
annotate.hpp
#if !defined(ANNOTATE_HPP)
#define ANNOTATE_HPP
#if defined(_MSC_VER)
#pragma once
#endif
#include <boost/spirit/home/qi/meta_compiler.hpp>
#include <boost/spirit/home/qi/skip_over.hpp>
#include <boost/spirit/home/qi/parser.hpp>
#include <boost/spirit/home/support/unused.hpp>
#include <boost/spirit/home/support/common_terminals.hpp>
#include <boost/spirit/home/qi/detail/attributes.hpp>
#include <boost/spirit/home/support/info.hpp>
#include <boost/spirit/home/support/handles_container.hpp>
namespace custom
{
BOOST_SPIRIT_TERMINAL(annotate);
}
namespace boost { namespace spirit
{
///////////////////////////////////////////////////////////////////////////
// Enablers
///////////////////////////////////////////////////////////////////////////
template <>
struct use_directive<qi::domain, custom::tag::annotate> // enables annotate
: mpl::true_ {};
}}
namespace custom
{
template <typename Subject>
struct annotate_directive : boost::spirit::qi::unary_parser<annotate_directive<Subject> >
{
typedef Subject subject_type;
annotate_directive(Subject const& subject)
: subject(subject) {}
template <typename Context, typename Iterator>
struct attribute
{
typedef
boost::fusion::vector2<
typename boost::spirit::traits::attribute_of<subject_type, Context, Iterator>::type
,std::string
>
type;
};
template <typename Iterator, typename Context
, typename Skipper, typename Attribute>
bool parse(Iterator& first, Iterator const& last
, Context& context, Skipper const& skipper
, Attribute& attr) const
{
boost::spirit::qi::skip_over(first, last, skipper);
Iterator save = first;
typename boost::spirit::traits::attribute_of<subject_type, Context, Iterator>::type attr_;
if(subject.parse(first, last, context, skipper, attr_))
{
boost::spirit::traits::assign_to(attr_,boost::fusion::at_c<0>(attr));
boost::spirit::traits::assign_to(std::string(save,first),boost::fusion::at_c<1>(attr));
return true;
}
first = save;
return false;
}
template <typename Context>
boost::spirit::info what(Context& context) const
{
return info("annotate", subject.what(context));
}
Subject subject;
};
}//custom
///////////////////////////////////////////////////////////////////////////
// Parser generators: make_xxx function (objects)
///////////////////////////////////////////////////////////////////////////
namespace boost { namespace spirit { namespace qi
{
template <typename Subject, typename Modifiers>
struct make_directive<custom::tag::annotate, Subject, Modifiers>
{
typedef custom::annotate_directive<Subject> result_type;
result_type operator()(unused_type, Subject const& subject, unused_type) const
{
return result_type(subject);
}
};
}}}
namespace boost { namespace spirit { namespace traits
{
///////////////////////////////////////////////////////////////////////////
template <typename Subject>
struct has_semantic_action<custom::annotate_directive<Subject> >
: unary_has_semantic_action<Subject> {};
///////////////////////////////////////////////////////////////////////////
template <typename Subject, typename Attribute, typename Context
, typename Iterator>
struct handles_container<custom::annotate_directive<Subject>, Attribute
, Context, Iterator>
: unary_handles_container<Subject, Attribute, Context, Iterator> {};
}}}
#endif
main.cpp
#include <iostream>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include "annotate.hpp"
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;
struct ints_type
{
std::vector<int> data;
std::string inttext;
};
struct A
{
std::string header;
ints_type ints;
};
BOOST_FUSION_ADAPT_STRUCT(
ints_type,
(std::vector<int>, data)
(std::string, inttext)
)
BOOST_FUSION_ADAPT_STRUCT(
A,
(std::string, header)
(ints_type, ints)
)
template <typename Iterator>
struct parser : qi::grammar< Iterator, A() >
{
parser() : parser::base_type(start)
{
header %= qi::lexeme[ +qi::alpha ];
ints = qi::lexeme[qi::int_ % qi::char_(",_")];
ints_string = custom::annotate[ints];
start %= header >> ' ' >> ints_string;
}
qi::rule<Iterator, std::string()> header;
qi::rule<Iterator, std::vector<int>() > ints;
qi::rule<Iterator, ints_type() > ints_string;
qi::rule<Iterator, A()> start;
};
int main()
{
A output;
std::string input("out 1,2_3");
auto iter = input.begin();
parser<decltype(iter)> p;
std::string annotation;
bool r = qi::parse(iter, input.end(), custom::annotate[p], output, annotation);
if( !r || iter != input.end() )
{
std::cout << "did not parse";
}
else
{
// would like output.inttext to be "1,2_3"
std::cout << "annotation: " << annotation << std::endl;
std::cout << output.header << ": " << output.ints.inttext << " -> [ ";
for( auto & i: output.ints.data )
std::cout << i << ' ';
std::cout << ']' << std::endl;
}
}