boost spirit on_error not triggered

boost spirit on_error not triggered - c++

^ No it is not. This was part of the problem, but if review the code as is right now, it already does what the pointed out question/answer shows ... and the errors are still not triggered.
I have this boost spirit parser for string literal. It works. Now I would like to start handle errors when it fail. I copied the on_error handle 1-1 from the mini xml example and it compiles, but it is never triggered (no errors are outputted).
This is the parser:
#define BOOST_SPIRIT_USE_PHOENIX_V3
#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/home/support/iterators/line_pos_iterator.hpp>
#include <boost/spirit/include/phoenix_fusion.hpp>
#include <boost/spirit/include/phoenix_stl.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
namespace qi = boost::spirit::qi;
struct my_handler_f
{
template <typename...> struct result { typedef void type; };
template <typename... T>
void operator()(T&&...) const {
std::cout << "\nmy_handler_f() invoked with " << sizeof...(T) << " arguments\n";
}
};
struct append_utf8_f
{
template <typename, typename>
struct result { typedef void type; };
template <typename INT>
void operator()(INT in, std::string& to) const
{
auto out = std::back_inserter(to);
boost::utf8_output_iterator<decltype(out)> convert(out);
*convert++ = in;
}
};
struct get_line_f
{
template <typename> struct result { typedef size_t type; };
template <typename It> size_t operator()(It const& pos_iter) const
{
return get_line(pos_iter);
}
};
struct RangePosition { size_t beginLine, endLine; };
struct String : public RangePosition
{
String()
: RangePosition()
, value()
, source()
{
}
std::string value;
std::string source;
};
BOOST_FUSION_ADAPT_STRUCT(String,
(std::string, value)
(std::string, source)
(size_t, beginLine)
(size_t, endLine)
)
template <typename Iterator>
struct source_string : qi::grammar<Iterator, String(), qi::space_type>
{
struct escape_symbols : qi::symbols<char, char>
{
escape_symbols()
{
add
("\'" , '\'')
("\"" , '\"')
("\?" , '\?')
("\\" , '\\')
("0" , '\0')
("a" , '\a')
("b" , '\b')
("f" , '\f')
("n" , '\n')
("r" , '\r')
("t" , '\t')
("v" , '\v')
;
}
} escape_symbol;
source_string() : source_string::base_type(start)
{
using qi::raw;
using qi::_val;
using qi::_1;
using qi::_2;
using qi::_3;
using qi::_4;
using qi::space;
using qi::omit;
using qi::no_case;
using qi::print;
using qi::eps;
using qi::on_error;
using qi::fail;
using qi::lit;
namespace phx = boost::phoenix;
using phx::at_c;
using phx::begin;
using phx::end;
using phx::construct;
using phx::ref;
using phx::val;
escape %= escape_symbol;
character %= (no_case["\\x"] > hex12)
| ("\\" > (oct123 | escape))
| (print - (lit('"') | '\\'));
unicode = ("\\u" > hex4[append_utf8(_1, _val)])
| ("\\U" > hex8[append_utf8(_1, _val)]);
string_section %= '"' > *(unicode | character) > '"';
string %= string_section % omit[*space];
main = raw [
string[at_c<0>(_val) = _1]
]
[
at_c<1>(_val) = construct<std::string>(begin(_1), end(_1)),
at_c<2>(_val) = get_line_(begin(_1)),
at_c<3>(_val) = get_line_(end(_1))
];
start %= eps > main;
on_error<fail>(start, my_handler);
}
boost::phoenix::function<my_handler_f> my_handler;
qi::rule<Iterator, std::string()> escape;
qi::uint_parser<char, 16, 1, 2> hex12;
qi::uint_parser<char, 8, 1, 3> oct123;
qi::rule<Iterator, std::string()> character;
qi::uint_parser<uint16_t, 16, 4, 4> hex4;
qi::uint_parser<uint32_t, 16, 8, 8> hex8;
boost::phoenix::function<append_utf8_f> append_utf8;
qi::rule<Iterator, std::string()> unicode;
qi::rule<Iterator, std::string()> string_section;
qi::rule<Iterator, std::string()> string;
boost::phoenix::function<get_line_f> get_line_;
qi::rule<Iterator, String(), qi::space_type> main;
qi::rule<Iterator, String(), qi::space_type> start;
};
and this is the test code
int main()
{
std::string str[] =
{
"\"\\u1234\\U0002345\"",
//"\"te\"\"st\"",
//"\"te\" \"st\"",
//"\"te\" \n \"st\"",
//"\"\"",
//"\"\\\"\"",
//"\"test\"",
//"\"test\" something",
//"\"\\\'\\\"\\\?\\\\\\a\\b\\f\\n\\r\\t\\v\"",
//"\"\\x61cd\\X3012\\x7z\"",
//"\"\\141cd\\06012\\78\\778\"",
"\"te",
//"\"te\nst\"",
//"\"test\\\"",
//"\"te\\st\"",
//
};
typedef boost::spirit::line_pos_iterator<std::string::const_iterator> Iterator;
for (size_t i = 0; i < sizeof(str) / sizeof(str[0]); ++i)
{
source_string<Iterator> g;
Iterator iter(str[i].begin());
Iterator end(str[i].end());
String string;
bool r = phrase_parse(iter, end, g, qi::space, string);
if (r)
std::cout << string.beginLine << "-" << string.endLine << ": " << string.value << " === " << string.source << "\n";
else
std::cout << "Parsing failed\n";
}
}

Related

How to refer to value from symbol table?

I can't figure out how to pass value from symbol table into the function.
template <typename Iterator>
class single_attribute_grammar : public qi::grammar<Iterator, AttributeData(), qi::blank_type>
{
public:
single_attribute_grammar(const word_symbols &words) : single_attribute_grammar::base_type(single_attribute_rule)
{
auto attr_word = phx::bind(&AttributeData::word, qi::_val);
auto grammar_word = phx::bind(&WordGrammar::word, qi::_1);
auto attr_value = phx::bind(&AttributeData::value, qi::_val);
single_attribute_rule = qi::lexeme[words[attr_word = grammar_word] >
qi::int_[attr_value = qi::_1] > (qi::space|qi::eoi)] >>
qi::eps(phx::bind(verify_range, qi::_r1, qi::_val)); // <-- HERE is the problem
BOOST_SPIRIT_DEBUG_NODE(single_attribute_rule);
}
private:
qi::rule<Iterator, AttributeData(), qi::blank_type> single_attribute_rule;
};
I would think I can refer to the value of the found key using qi::_r1 but the code doesn't compile:
main.cpp:72:31: required from ‘single_attribute_grammar<Iterator>::single_attribute_grammar(const word_symbols&) [with Iterator = boost::spirit::classic::position_iterator2<boost::spirit::multi_pass<std::istreambuf_iterator<char, std::char_traits<char> > > >; word_symbols = boost::spirit::qi::symbols<char, WordGrammar>]’
main.cpp:87:16: required from ‘all_attributes_grammar<Iterator>::all_attributes_grammar(const word_symbols&) [with Iterator = boost::spirit::classic::position_iterator2<boost::spirit::multi_pass<std::istreambuf_iterator<char, std::char_traits<char> > > >; word_symbols = boost::spirit::qi::symbols<char, WordGrammar>]’
main.cpp:130:62: required from here
/usr/include/boost/spirit/home/support/context.hpp:180:13: error: static assertion failed: index_is_out_of_bounds
BOOST_SPIRIT_ASSERT_MSG(
^
In file included from /usr/include/boost/spirit/home/qi/domain.hpp:18:0,
from /usr/include/boost/spirit/home/qi/meta_compiler.hpp:15,
from /usr/include/boost/spirit/home/qi/action/action.hpp:14,
from /usr/include/boost/spirit/home/qi/action.hpp:14,
from /usr/include/boost/spirit/home/qi.hpp:14,
from /usr/include/boost/spirit/include/qi.hpp:16,
from main.cpp:11:
/usr/include/boost/spirit/home/support/context.hpp:186:13: error: no type named ‘type’ in ‘struct boost::fusion::result_of::at_c<boost::fusion::cons<AttributeData&, boost::fusion::nil_>, 1>’
type;
^~~~
In case it's helpful here is the MVCE.
#include <iomanip>
#include <string>
#include <vector>
#include <boost/variant.hpp>
#include <boost/optional/optional.hpp>
#define BOOST_SPIRIT_DEBUG
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/qi_symbols.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_object.hpp> // construct
#include <boost/spirit/include/support_multi_pass.hpp>
#include <boost/spirit/include/classic_position_iterator.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/phoenix/bind.hpp>
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace classic = boost::spirit::classic;
namespace phx = boost::phoenix;
namespace fusion = boost::fusion;
struct AttributeData
{
std::string word;
int value;
};
using AttributeVariant = boost::variant<
AttributeData
>;
struct WordGrammar
{
std::string word;
int range_from;
int range_to;
};
BOOST_FUSION_ADAPT_STRUCT(
AttributeData,
(std::string, word)
(int, value)
)
using word_symbols = qi::symbols<char, WordGrammar>;
bool verify_range(const WordGrammar &grammar, const AttributeData &data)
{
if(data.value < grammar.range_from || data.value > grammar.range_to)
{
return false;
}
return true;
}
template <typename Iterator>
class single_attribute_grammar : public qi::grammar<Iterator, AttributeData(), qi::blank_type>
{
public:
single_attribute_grammar(const word_symbols &words) : single_attribute_grammar::base_type(single_attribute_rule)
{
auto attr_word = phx::bind(&AttributeData::word, qi::_val);
auto grammar_word = phx::bind(&WordGrammar::word, qi::_1);
auto attr_value = phx::bind(&AttributeData::value, qi::_val);
single_attribute_rule = qi::lexeme[words[attr_word = grammar_word] >
qi::int_[attr_value = qi::_1] > (qi::space|qi::eoi)] >>
qi::eps(phx::bind(verify_range, qi::_r1, qi::_val)); // <-- HERE is the problem
BOOST_SPIRIT_DEBUG_NODE(single_attribute_rule);
}
private:
qi::rule<Iterator, AttributeData(), qi::blank_type> single_attribute_rule;
};
template <typename Iterator>
class all_attributes_grammar : public qi::grammar<Iterator, std::vector<AttributeVariant>(), qi::blank_type>
{
public:
all_attributes_grammar(const word_symbols &words) : all_attributes_grammar::base_type(line_attribute_vec_rule)
, sag(words)
{
line_attribute_rule = (
sag
);
BOOST_SPIRIT_DEBUG_NODE(line_attribute_rule);
line_attribute_vec_rule = (line_attribute_rule % *qi::blank) > qi::eoi;
BOOST_SPIRIT_DEBUG_NODE(line_attribute_vec_rule);
}
private:
single_attribute_grammar<Iterator> sag;
qi::rule<Iterator, AttributeVariant(), qi::blank_type> line_attribute_rule;
qi::rule<Iterator, std::vector<AttributeVariant>(), qi::blank_type> line_attribute_vec_rule;
};
int main()
{
std::vector<AttributeVariant> value;
std::string data{"N100 X-100 AC5"};
std::istringstream input(data);
// iterate over stream input
typedef std::istreambuf_iterator<char> base_iterator_type;
base_iterator_type in_begin(input);
// convert input iterator to forward iterator, usable by spirit parser
typedef boost::spirit::multi_pass<base_iterator_type> forward_iterator_type;
forward_iterator_type fwd_begin = boost::spirit::make_default_multi_pass(in_begin);
forward_iterator_type fwd_end;
// wrap forward iterator with position iterator, to record the position
typedef classic::position_iterator2<forward_iterator_type> pos_iterator_type;
pos_iterator_type position_begin(fwd_begin, fwd_end);
pos_iterator_type position_end;
word_symbols sym;
sym.add
("N", {"N", 1, 9999})
("X", {"X", -999, 999})
("AC", {"AC", -99, 999})
;
all_attributes_grammar<pos_iterator_type> all_attr_gr(sym);
try
{
qi::phrase_parse(position_begin, position_end, all_attr_gr, qi::blank, value);
}
catch (const qi::expectation_failure<pos_iterator_type>& e)
{
const classic::file_position_base<std::string>& pos = e.first.get_position();
std::cout <<
"Parse error at line " << pos.line << " column " << pos.column << ":" << std::endl <<
"'" << e.first.get_currentline() << "'" << std::endl <<
std::setw(pos.column) << " " << "^- here" << std::endl;
}
return 0;
}
Any ideas?

Lots of things to be simplified.
why is AttributeData adapted, when you use semantic actions instead? (see Boost Spirit: "Semantic actions are evil"?)
skipping whitespace is much more elegant when using a skipper. It's also logically contradictory to skip whitespace inside a lexeme (see Boost spirit skipper issues)
In particular, the skipping of whitespace here:
line_attribute_vec_rule = (line_attribute_rule % *qi::blank) > qi::eoi;
is completely impotent (because blanks are already skipped, the *qi::blank will never match any characters). The whole thing reduces to +line_attribute_rule.
line_attribute_vec_rule = +line_attribute_rule > qi::eoi;
Re: your answer
Indeed, you can use more state in the rule. Instead, I'd simplify the AST to support your case. Say:
struct AttrDef {
std::string word;
std::pair<int,int> range;
};
struct AttributeData {
AttrDef def;
int value;
bool is_valid() const {
return std::minmax({value, def.range.first, def.range.second}) == def.range;
}
};
BOOST_FUSION_ADAPT_STRUCT(AttributeData, def, value)
Now, single_attribute_grammar would not care about skipping, and be like:
using attr_defs = qi::symbols<char, AttrDef>;
template <typename Iterator>
struct single_attribute_grammar : public qi::grammar<Iterator, AttributeData()> {
single_attribute_grammar(const attr_defs &defs)
: single_attribute_grammar::base_type(start)
{
using namespace qi;
attribute_data = defs >> int_;
start %= attribute_data [ _pass = is_valid_(_1) ];
BOOST_SPIRIT_DEBUG_NODES((attribute_data));
}
private:
phx::function<std::function<bool(AttributeData const&)> > is_valid_ {&AttributeData::is_valid};
qi::rule<Iterator, AttributeData()> attribute_data;
qi::rule<Iterator, AttributeData()> start;
};
As you can see, there's no state because I didn't use eps. That's right, I flew right in the face of my own guideline ("avoid semantic actions") for the simple reason that it avoids explicit state (instead using the exsting qi::_pass).
Doing a lot of simplifications in main (specifically, using boost::spirit::istream_iterator rather than roll-your-own multi-pass adapting), I would arrive at this:
Live On Coliru
//#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/include/classic_position_iterator.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>
#include <iomanip>
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;
struct AttrDef {
std::string word;
std::pair<int,int> range;
};
struct AttributeData {
AttrDef def;
int value;
bool is_valid() const {
return std::minmax({value, def.range.first, def.range.second}) == def.range;
}
};
BOOST_FUSION_ADAPT_STRUCT(AttributeData, def, value)
static inline std::ostream& operator<<(std::ostream& os, const AttrDef& def) {
return os << "AttrDef(" << def.word << ", " << def.range.first << ", " << def.range.second << ")";
}
using attr_defs = qi::symbols<char, AttrDef>;
template <typename Iterator>
struct single_attribute_grammar : public qi::grammar<Iterator, AttributeData()> {
single_attribute_grammar(const attr_defs &defs)
: single_attribute_grammar::base_type(start)
{
using namespace qi;
attribute_data = defs >> int_;
start %= attribute_data [ _pass = is_valid_(_1) ];
BOOST_SPIRIT_DEBUG_NODES((attribute_data));
}
private:
phx::function<std::function<bool(AttributeData const&)> > is_valid_ {&AttributeData::is_valid};
qi::rule<Iterator, AttributeData()> attribute_data;
qi::rule<Iterator, AttributeData()> start;
};
using AttributeVariant = boost::variant<AttributeData>;
template <typename Iterator>
class all_attributes_grammar : public qi::grammar<Iterator, std::vector<AttributeVariant>(), qi::blank_type>
{
public:
all_attributes_grammar(const attr_defs &defs)
: all_attributes_grammar::base_type(line_attribute_vec_rule),
sag(defs)
{
line_attribute_rule = (
sag
);
line_attribute_vec_rule = +line_attribute_rule > qi::eoi;
BOOST_SPIRIT_DEBUG_NODES((line_attribute_rule)(line_attribute_vec_rule));
}
private:
single_attribute_grammar<Iterator> sag;
qi::rule<Iterator, AttributeVariant()> line_attribute_rule;
qi::rule<Iterator, std::vector<AttributeVariant>(), qi::blank_type> line_attribute_vec_rule;
};
int main() {
constexpr bool fail = false, succeed = true;
struct _ { bool expect; std::string data; } const tests[] = {
{ succeed, "N100 X-100 AC5" },
{ fail, "" },
{ fail, " " },
{ succeed, "N1" },
{ succeed, " N1" },
{ succeed, "N1 " },
{ succeed, " N1 " },
{ fail, "N 1" },
{ fail, "N0" },
{ succeed, "N9999" },
{ fail, "N10000" },
};
for (auto test : tests) {
std::istringstream input(test.data);
typedef boost::spirit::classic::position_iterator2<boost::spirit::istream_iterator> pos_iterator_type;
pos_iterator_type position_begin(boost::spirit::istream_iterator{input >> std::noskipws}, {}), position_end;
attr_defs sym;
sym.add
("N", {"N", {1, 9999}})
("X", {"X", {-999, 999}})
("AC", {"AC", {-99, 999}})
;
all_attributes_grammar<pos_iterator_type> all_attr_gr(sym);
try {
std::vector<AttributeVariant> value;
std::cout << " --------- '" << test.data << "'\n";
bool actual = qi::phrase_parse(position_begin, position_end, all_attr_gr, qi::blank, value);
std::cout << ((test.expect == actual)?"PASS":"FAIL");
if (actual) {
std::cout << "\t";
for (auto& attr : value)
std::cout << boost::fusion::as_vector(boost::get<AttributeData>(attr)) << " ";
std::cout << "\n";
} else {
std::cout << "\t(no valid parse)\n";
}
}
catch (const qi::expectation_failure<pos_iterator_type>& e) {
auto& pos = e.first.get_position();
std::cout <<
"Parse error at line " << pos.line << " column " << pos.column << ":" << std::endl <<
"'" << e.first.get_currentline() << "'" << std::endl <<
std::setw(pos.column) << " " << "^- here" << std::endl;
}
if (position_begin != position_end)
std::cout << " -> Remaining '" << std::string(position_begin, position_end) << "'\n";
}
}
Prints:
--------- 'N100 X-100 AC5'
PASS (AttrDef(N, 1, 9999) 100) (AttrDef(X, -999, 999) -100) (AttrDef(AC, -99, 999) 5)
--------- ''
PASS (no valid parse)
--------- ' '
PASS (no valid parse)
-> Remaining ' '
--------- 'N1'
PASS (AttrDef(N, 1, 9999) 1)
--------- ' N1'
PASS (AttrDef(N, 1, 9999) 1)
--------- 'N1 '
PASS (AttrDef(N, 1, 9999) 1)
--------- ' N1 '
PASS (AttrDef(N, 1, 9999) 1)
--------- 'N 1'
PASS (no valid parse)
-> Remaining 'N 1'
--------- 'N0'
PASS (no valid parse)
-> Remaining 'N0'
--------- 'N9999'
PASS (AttrDef(N, 1, 9999) 9999)
--------- 'N10000'
PASS (no valid parse)
-> Remaining 'N10000'

After a bit more studying of boost spirit I've found the solution is to use locals and inherited attributes:
template <typename Iterator>
class single_attribute_grammar : public qi::grammar<Iterator, AttributeData(), qi::locals<WordGrammar>, qi::blank_type>
{
public:
single_attribute_grammar(const word_symbols &words) : single_attribute_grammar::base_type(single_attribute_rule)
{
auto attr_word = phx::bind(&AttributeData::word, qi::_val);
auto grammar_word = phx::bind(&WordGrammar::word, qi::_1);
auto attr_value = phx::bind(&AttributeData::value, qi::_val);
word_rule = words;
BOOST_SPIRIT_DEBUG_NODE(word_rule);
range_check_rule = qi::eps(phx::bind(verify_range, qi::_r1, qi::_r2));
BOOST_SPIRIT_DEBUG_NODE(range_check_rule);
single_attribute_rule = qi::lexeme[word_rule[attr_word = grammar_word,qi::_a=qi::_1] >
qi::int_[attr_value = qi::_1] > (qi::space|qi::eoi)] >>
range_check_rule(qi::_a, qi::_val);
BOOST_SPIRIT_DEBUG_NODE(single_attribute_rule);
}
private:
qi::rule<Iterator, WordGrammar()> word_rule;
qi::rule<Iterator, void(const WordGrammar&, const AttributeData&), qi::blank_type> range_check_rule;
qi::rule<Iterator, AttributeData(), qi::locals<WordGrammar>, qi::blank_type> single_attribute_rule;
};
Also because I use BOOST_SPIRIT_DEBUG_NODE overloading of stream operator for WordGrammar is necessary:
std::ostream& operator<<(std::ostream& ostr, const WordGrammar& grammar)
{
ostr << "WordGrammar(" << grammar.word << ", " << grammar.range_from << ", " << grammar.range_to << ")";
return ostr;
}

parse enum using boost spirit qi parser

I am trying to parse char to fill in a C++11 strongly typed enum. I need help with writing a parser for the enums.. it needs to be high performance as well.
I have a string with the following format
Category | Type | Attributes
Example:
std::string str1 = "A|D|name=tim, address=3 infinite loop"
std::string str2 = "A|C|name=poc, address=5 overflow street"
I am representing Category and Type as follows:
enum class CATEGORY : char
{
Animal:'A', Bird:'B'
}
enum class TYPE : char
{
Dog:'D', Bird:'B'
}
struct Zoo
{
Category category;
Type type;
std::string name;
std::string address;
};
namespace qi = boost::spirit::qi;
namespace repo = boost::spirit::repository;
namespace ascii = boost::spirit::ascii;
template <typename Iterator>
struct ZooBuilderGrammar : qi::grammar<Iterator, ascii::space_type>
{
ZooBuilderGrammar():ZooBuilderGrammar::base_type(start_)
{
using qi::char_;
using qi::_1;
using qi::lit
using boost::phoenix::ref;
//need help here
start_=char_[/*how to assign enum */ ]>>'|'
>>char_[ /*how to assign enum */ ]>>'|'
>>lit;
}
qi::rule<Iterator, ascii::space_type> start_;
};
I have problem around creating a parser type like the built in ex: qi::char_ to "parse enums CATEGORY and TYPE".
thanks for the help in advance..

As usual there's several approaches:
The semantic action way (ad-hoc)
The customization points way
The qi::symbols way
Which is the most appropriate depends. All three approaches should be equally efficient. The symbols<> apprach seems to be most safe (not involving casts) and flexible: you can e.g. use it with variable-length enum members, use it inside no_case[] etc.
Case by case:
The semantic action way (ad-hoc):
template <typename Iterator>
struct ZooBuilderGrammar : qi::grammar<Iterator, ascii::space_type>
{
ZooBuilderGrammar():ZooBuilderGrammar::base_type(start_)
{
using namespace qi;
category_ = char_("AB") [ _val = phx::static_cast_<Category>(_1) ];
type_ = char_("DB") [ _val = phx::static_cast_<Type>(_1) ];
start_ = category_ >> '|' > type_;
}
private:
qi::rule<Iterator, Category(), ascii::space_type> category_;
qi::rule<Iterator, Type(), ascii::space_type> type_;
qi::rule<Iterator, ascii::space_type> start_;
};
You can see it Live On Coliru printing:
Parse success: [A, D]
Remaining unparsed input '|name=tim, address=3 infinite loop'
---------------------------
expected: tag: char-set
got: "C|name=poc, address=5 overflow street"
Expectation failure: boost::spirit::qi::expectation_failure at 'C|name=poc, address=5 overflow street'
---------------------------
The customization points way:
namespace boost { namespace spirit { namespace traits {
template <typename Enum, typename RawValue>
struct assign_to_attribute_from_value<Enum, RawValue, typename enable_if<is_enum<Enum>>::type> {
static void call(RawValue const& raw, Enum& cat) {
cat = static_cast<Enum>(raw);
}
};
}}}
template <typename Iterator>
struct ZooBuilderGrammar : qi::grammar<Iterator, Zoo(), ascii::space_type>
{
ZooBuilderGrammar():ZooBuilderGrammar::base_type(start_)
{
start_ = qi::char_("AB") > '|' > qi::char_("DB");
}
private:
qi::rule<Iterator, Zoo(), ascii::space_type> start_;
};
See it Live On Coliru too, with the same output (obviously)
The qi::symbols way:
template <typename Iterator>
struct ZooBuilderGrammar : qi::grammar<Iterator, Zoo(), ascii::space_type>
{
ZooBuilderGrammar():ZooBuilderGrammar::base_type(start_)
{
start_ = category_ > '|' > type_;
}
private:
struct Category_ : qi::symbols<char,Category> {
Category_() {
this->add("A", Category::Animal)("B", Category::Bird);
}
} category_;
struct Type_ : qi::symbols<char,Type> {
Type_() {
this->add("D", Type::Dog)("B", Type::Bird);
}
} type_;
qi::rule<Iterator, Zoo(), ascii::space_type> start_;
};
See it Live On Coliru
Full Demo
This happens to be the traits approach, but you can reuse the skeleton with both other grammars:
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/adapted/struct.hpp>
enum class Category : char { Animal='A', Bird='B' };
enum class Type : char { Dog='D', Bird='B' };
struct Zoo {
Category category;
Type type;
};
BOOST_FUSION_ADAPT_STRUCT(Zoo, (Category,category)(Type,type))
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace phx = boost::phoenix;
namespace boost { namespace spirit { namespace traits {
template <typename Enum, typename RawValue>
struct assign_to_attribute_from_value<Enum, RawValue, typename enable_if<is_enum<Enum>>::type> {
static void call(RawValue const& raw, Enum& cat) {
cat = static_cast<Enum>(raw);
}
};
}}}
template <typename Iterator>
struct ZooBuilderGrammar : qi::grammar<Iterator, Zoo(), ascii::space_type>
{
ZooBuilderGrammar():ZooBuilderGrammar::base_type(start_)
{
start_ = qi::char_("AB") > '|' > qi::char_("DB");
}
private:
qi::rule<Iterator, Zoo(), ascii::space_type> start_;
};
/////////////////////////////////////////////////
// For exception output
struct printer {
typedef boost::spirit::utf8_string string;
void element(string const& tag, string const& value, int depth) const {
for (int i = 0; i < (depth*4); ++i) std::cout << ' '; // indent to depth
std::cout << "tag: " << tag;
if (value != "") std::cout << ", value: " << value;
std::cout << std::endl;
}
};
void print_info(boost::spirit::info const& what) {
using boost::spirit::basic_info_walker;
printer pr;
basic_info_walker<printer> walker(pr, what.tag, 0);
boost::apply_visitor(walker, what.value);
}
//
/////////////////////////////////////////////////
int main()
{
typedef std::string::const_iterator It;
static const ZooBuilderGrammar<It> p;
for (std::string const str1 : {
"A|D|name=tim, address=3 infinite loop",
"A|C|name=poc, address=5 overflow street" })
{
It f(str1.begin()), l(str1.end());
try {
Zoo zoo;
bool ok = qi::phrase_parse(f,l,p,ascii::space,zoo);
if (ok)
std::cout << "Parse success: [" << static_cast<char>(zoo.category) << ", " << static_cast<char>(zoo.type) << "]\n";
else
std::cout << "Failed to parse '" << str1 << "'\n";
if (f!=l)
std::cout << "Remaining unparsed input '" << std::string(f,l) << "'\n";
} catch(qi::expectation_failure<It> const& x)
{
std::cout << "expected: "; print_info(x.what_);
std::cout << "got: \"" << std::string(x.first, x.last) << '"' << std::endl;
}
std::cout << "---------------------------\n";
}
}

I'd use the qi::symbols way as sugested by sehe, but in this way to improve code readability:
template <typename Iterator>
struct ZooBuilderGrammar : qi::grammar<Iterator, Zoo(), ascii::space_type>
{
ZooBuilderGrammar():ZooBuilderGrammar::base_type(start_)
{
category_.add
("A", Category::Animal)
("B", Category::Bird)
;
type_.add
("D", Type::Dog)
("B", Type::Bird)
;
start_ = category_ > '|' > type_;
}
private:
qi::symbols<char,Type> category_;
qi::symbols<char,Category> type_;
qi::rule<Iterator, Zoo(), ascii::space_type> start_;
};

c++ boost spirit attr_cast does not invoke the expected tranform_attribute

I am working on c++ string literal parser with boost spirit.
This is what I have so far:
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/home/support/iterators/line_pos_iterator.hpp>
#include <boost/spirit/repository/include/qi_confix.hpp>
#include <boost/spirit/include/phoenix_fusion.hpp>
#include <boost/spirit/include/phoenix_stl.hpp>
using namespace boost::spirit;
#include <boost/fusion/include/adapt_struct.hpp>
////////////////////////////////
// extra facilities
struct get_line_f
{
template <typename> struct result { typedef size_t type; };
template <typename It> size_t operator()(It const& pos_iter) const
{
return get_line(pos_iter);
}
};
namespace boost { namespace spirit { namespace traits
{
template <>
struct transform_attribute<uint16_t, std::string, qi::domain>
{
typedef std::string& type;
static std::string pre(uint16_t& d) { return "pre16"; }
static void post(uint16_t& val, std::string& attr) { attr = "unicode16"; }
static void fail(uint16_t&) {}
};
}}}
namespace boost { namespace spirit { namespace traits
{
template <>
struct transform_attribute<uint32_t, std::string, qi::domain>
{
typedef std::string& type;
static std::string pre(uint32_t& d) { return "pre32"; }
static void post(uint32_t& val, std::string& attr) { attr = "unicode32"; }
static void fail(uint32_t&) {}
};
}}}
//
////////////////////////////////
struct RangePosition
{
RangePosition()
: beginLine(-1)
, endLine(-1)
{
}
size_t beginLine;
size_t endLine;
};
struct String : public RangePosition
{
String()
: RangePosition()
, value()
, source()
{
}
std::string value;
std::string source;
};
BOOST_FUSION_ADAPT_STRUCT(String,
(std::string, value)
(std::string, source)
(size_t, beginLine)
(size_t, endLine)
)
template <typename Iterator>
struct source_string : qi::grammar<Iterator, String(), qi::space_type>
{
struct escape_symbols : qi::symbols<char, char>
{
escape_symbols()
{
add
("\\\'" , '\'')
("\\\"" , '\"')
("\\\?" , '\?')
("\\\\" , '\\')
("\\0" , '\0')
("\\a" , '\a')
("\\b" , '\b')
("\\f" , '\f')
("\\n" , '\n')
("\\r" , '\r')
("\\t" , '\t')
("\\v" , '\v')
;
}
} escape_symbol;
source_string() : source_string::base_type(start)
{
using qi::raw;
using qi::_val;
using qi::_1;
using qi::space;
using qi::omit;
using qi::no_case;
using qi::attr_cast;
using qi::print;
namespace phx = boost::phoenix;
using phx::at_c;
using phx::begin;
using phx::end;
using phx::construct;
using phx::ref;
escape %= escape_symbol;
character %= (no_case["\\x"] >> hex12)
| ("\\" >> oct123)
| escape
| (print - (lit('"') | '\\'));
unicode %= ("\\u" >> attr_cast(hex4))
| ("\\U" >> attr_cast(hex8));
string_section %= '"' >> *(unicode | character) >> '"';
string %= string_section % omit[*space];
start = raw[
string[at_c<0>(_val) = _1]
]
[
at_c<1>(_val) = construct<std::string>(begin(_1), end(_1)),
at_c<2>(_val) = get_line_(begin(_1)),
at_c<3>(_val) = get_line_(end(_1))
]
;
}
boost::phoenix::function<get_line_f> get_line_;
qi::rule<Iterator, String(), qi::space_type> start;
qi::rule<Iterator, std::string()> escape;
qi::uint_parser<char, 16, 1, 2> hex12;
qi::uint_parser<uint16_t, 16, 4, 4> hex4;
qi::uint_parser<uint32_t, 16, 8, 8> hex8;
qi::uint_parser<char, 8, 1, 3> oct123;
qi::rule<Iterator, std::string()> character;
qi::rule<Iterator, std::string()> unicode;
qi::rule<Iterator, std::string()> string_section;
qi::rule<Iterator, std::string()> string;
};
and my testing code is
std::string str[] =
{
"\"\\u1234\\U12345678\"",
"\"te\"\"st\"",
"\"te\" \"st\"",
"\"te\" \n \"st\"",
"\"\"",
"\"\\\"\"",
"\"test\"",
"\"test\" something",
"\"\\\'\\\"\\\?\\\\\\a\\b\\f\\n\\r\\t\\v\"",
"\"\\x61cd\\X3012\\x7z\"",
"\"\\141cd\\06012\\78\\778\"",
"\"te",
"\"te\nst\"",
"\"test\\\"",
"\"te\\st\"",
//
};
typedef line_pos_iterator<std::string::const_iterator> Iterator;
std::ostringstream result;
for (size_t i = 0; i < sizeof(str) / sizeof(str[0]); ++i)
{
source_string<Iterator> g;
Iterator iter(str[i].begin());
Iterator end(str[i].end());
String string;
bool r = phrase_parse(iter, end, g, qi::space, string);
if (r)
result << string.beginLine << "-" << string.endLine << ": " << string.value << " === " << string.source << "\n";
else
result << "Parsing failed\n";
}
Can somebody help me why in this rule:
unicode %= ("\\u" >> attr_cast(hex4))
| ("\\U" >> attr_cast(hex8));
attr_cast does not invoke transform_attribute that I have defined?
namespace boost { namespace spirit { namespace traits
{
template <>
struct transform_attribute<uint16_t, std::string, qi::domain>
{
typedef std::string& type;
static std::string pre(uint16_t& d) { return "pre16"; }
static void post(uint16_t& val, std::string& attr) { attr = "unicode16"; }
static void fail(uint16_t&) {}
};
}}}
namespace boost { namespace spirit { namespace traits
{
template <>
struct transform_attribute<uint32_t, std::string, qi::domain>
{
typedef std::string& type;
static std::string pre(uint32_t& d) { return "pre32"; }
static void post(uint32_t& val, std::string& attr) { attr = "unicode32"; }
static void fail(uint32_t&) {}
};
}}}

Making builtin primitives types behave "strangely" seems like a VeryBadIdea™.
Assuming you just wish to decode I suggest a simpler approach using semantic actions, e.g.
https://github.com/sehe/spirit-v2-json/blob/master/JSON.cpp#L102
char_ = +(
~encoding::char_(L"\"\\")) [ qi::_val += qi::_1 ] |
qi::lit(L"\x5C") >> ( // \ (reverse solidus)
qi::lit(L"\x22") [ qi::_val += L'"' ] | // " quotation mark U+0022
qi::lit(L"\x5C") [ qi::_val += L'\\' ] | // \ reverse solidus U+005C
qi::lit(L"\x2F") [ qi::_val += L'/' ] | // / solidus U+002F
qi::lit(L"\x62") [ qi::_val += L'\b' ] | // b backspace U+0008
qi::lit(L"\x66") [ qi::_val += L'\f' ] | // f form feed U+000C
qi::lit(L"\x6E") [ qi::_val += L'\n' ] | // n line feed U+000A
qi::lit(L"\x72") [ qi::_val += L'\r' ] | // r carriage return U+000D
qi::lit(L"\x74") [ qi::_val += L'\t' ] | // t tab U+0009
qi::lit(L"\x75") // uXXXX U+XXXX
>> _4HEXDIG [ qi::_val += qi::_1 ]
This appears easily adapted to your use case.
Now if you insist, firstly wrap the types (so you don't "redefine" essential types for Spirit) and secondly, customize the container insertion traits, since std::string (or rather std::vector<char>?) is a container type.
I wouldn't recommend this though. I like to keep things "simple" and the logic in one place. Obviously this is a "funny" thing to say when using a parser generator like Spirit, because so much appears to go on "magically" behind the scenes. However, that is the nature of abstraction. I don't think I'd want to "abstract" decoding unicode escapes here: they feel as they belong in the problem domain, not the tooling.

boost spirit parse c and cpp style comments

I am working on parser for which the comments are meaningful. And before you say it ... the reason is if statement A has a comment in the source in the target statement resulting out of the description of A should be commented with the comment of A. Or comments propagation. Anyways, for this purpose I have to parse and collect the comments not just to skip them.
This is what i have so far:
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/home/support/iterators/line_pos_iterator.hpp>
#include <boost/spirit/repository/include/qi_confix.hpp>
#include <boost/spirit/include/phoenix_fusion.hpp>
#include <boost/spirit/include/phoenix_stl.hpp>
using namespace boost::spirit;
#include <boost/fusion/include/adapt_struct.hpp>
////////////////////////////////
// extra facilities
struct get_line_f
{
template <typename> struct result { typedef size_t type; };
template <typename It> size_t operator()(It const& pos_iter) const
{
return get_line(pos_iter);
}
};
struct Position
{
Position()
: beginLine(-1)
, endLine(-1)
{
}
size_t beginLine;
size_t endLine;
};
struct Comment : public Position
{
Comment()
: Position()
, text()
{
}
std::vector<std::string> text;
std::string source;
};
BOOST_FUSION_ADAPT_STRUCT(Comment,
(std::vector<std::string>, text)
(std::string, source)
(size_t, beginLine)
(size_t, endLine)
)
//
////////////////////////////////
template <typename Iterator>
struct comment : qi::grammar<Iterator, Comment(), qi::space_type>
{
comment() : comment::base_type(start)
{
c_comment %= repository::confix("/*", "*/")[*(char_ - "*/")];
cpp_comment %= repository::confix("//", eol | eoi)[*(char_ - eol)];
comments %= *(c_comment | cpp_comment);
start = raw[ comments[at_c<0>(_val) = _1] ]
[
at_c<1>(_val) = construct<std::string>(begin(_1), end(_1)),
at_c<2>(_val) = get_line_(begin(_1)),
at_c<3>(_val) = get_line_(end(_1))
]
;
}
boost::phoenix::function<get_line_f> get_line_;
qi::rule<Iterator, Comment(), qi::space_type> start;
qi::rule<Iterator, std::string()> c_comment;
qi::rule<Iterator, std::string()> cpp_comment;
qi::rule<Iterator, std::vector<std::string>()> comments;
};
This is the testing code:
int main()
{
std::string str[] =
{
"/*1234*/",
"\n\n/*1234*/",
"// foo bar\n",
"// foo bar",
"\n\n // foo bar\n",
"/*1234\n5678*/",
};
typedef line_pos_iterator<std::string::const_iterator> Iterator;
for (size_t i = 0; i < sizeof(str) / sizeof(str[0]); ++i)
{
comment<Iterator> g;
Iterator iter(str[i].begin());
Iterator end(str[i].end());
Comment comment;
bool r = phrase_parse(iter, end, g, qi::space, comment);
if (r && iter == end)
std::cout << comment.beginLine << comment.source << "\n";
else
std::cout << "Parsing failed\n";
}
}
Edit:
This code now is compiling and working ... thanks for the help.

How do I capture the original input into the synthesized output from a spirit grammar?

I'm working on a boost::spirit::qi::grammar and would like to copy a portion of the original text into the synthesized output structure of the grammar (more specifically, the portion that matched one of the components of the rule). The grammar would ultimately be used as a sub-grammar for a more complicated grammar, so I don't really have access to the original input.
I'm guessing that this can be done through semantic actions or the grammar context, but I can't find an example that does this without access to the original parse().
Here's what I have so far:
#include <iostream>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
namespace qi = boost::spirit::qi;
struct A
{
std::string header;
std::vector<int> ints;
std::string inttext;
};
BOOST_FUSION_ADAPT_STRUCT(
A,
(std::string, header)
(std::vector<int>, ints)
//(std::string, inttext)
)
template <typename Iterator>
struct parser : qi::grammar< Iterator, A() >
{
parser() : parser::base_type(start)
{
header %= qi::lexeme[ +qi::alpha ];
ints %= qi::lexeme[ qi::int_ % qi::char_(",_") ]; // <---- capture the original text that matches this into inttext
start %= header >> ' ' >> ints;
}
qi::rule<Iterator, std::string()> header;
qi::rule<Iterator, std::vector<int>() > ints;
qi::rule<Iterator, A()> start;
};
int main()
{
A output;
std::string input("out 1,2_3");
auto iter = input.begin();
parser<decltype(iter)> p;
bool r = qi::parse(iter, input.end(), p, output);
if( !r || iter != input.end() )
{
std::cout << "did not parse";
}
else
{
// would like output.inttext to be "1,2_3"
std::cout << output.header << ": " << output.inttext << " -> [ ";
for( auto & i: output.ints )
std::cout << i << ' ';
std::cout << ']' << std::endl;
}
}

Something similar to what you asked without using semantic actions:
#include <iostream>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/spirit/repository/include/qi_iter_pos.hpp>
namespace qi = boost::spirit::qi;
using boost::spirit::repository::qi::iter_pos;
struct ints_type
{
std::vector<int> data;
std::string::const_iterator begin;
std::string::const_iterator end;
};
struct A
{
std::string header;
ints_type ints;
};
BOOST_FUSION_ADAPT_STRUCT(
ints_type,
(std::string::const_iterator, begin)
(std::vector<int>, data)
(std::string::const_iterator, end)
)
BOOST_FUSION_ADAPT_STRUCT(
A,
(std::string, header)
(ints_type, ints)
)
template <typename Iterator>
struct parser : qi::grammar< Iterator, A() >
{
parser() : parser::base_type(start)
{
header %= qi::lexeme[ +qi::alpha ];
ints %= qi::lexeme[ iter_pos >> qi::int_ % qi::char_(",_") >> iter_pos ]; // <---- capture the original text that matches this into inttext
start %= header >> ' ' >> ints;
}
qi::rule<Iterator, std::string()> header;
qi::rule<Iterator, ints_type() > ints;
qi::rule<Iterator, A()> start;
};
int main()
{
A output;
std::string input("out 1,2_3");
auto iter = input.begin();
parser<decltype(iter)> p;
bool r = qi::parse(iter, input.end(), p, output);
if( !r || iter != input.end() )
{
std::cout << "did not parse";
}
else
{
// would like output.inttext to be "1,2_3"
std::cout << output.header << ": " << std::string(output.ints.begin,output.ints.end) << " -> [ ";
for( auto & i: output.ints.data )
std::cout << i << ' ';
std::cout << ']' << std::endl;
}
}
Using semantic actions:
#include <iostream>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/spirit/repository/include/qi_iter_pos.hpp>
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;
using boost::spirit::repository::qi::iter_pos;
struct ints_type
{
std::vector<int> data;
std::string inttext;
};
struct A
{
std::string header;
ints_type ints;
};
BOOST_FUSION_ADAPT_STRUCT(
ints_type,
(std::vector<int>, data)
(std::string, inttext)
)
BOOST_FUSION_ADAPT_STRUCT(
A,
(std::string, header)
(ints_type, ints)
)
template <typename Iterator>
struct parser : qi::grammar< Iterator, A() >
{
parser() : parser::base_type(start)
{
header %= qi::lexeme[ +qi::alpha ];
ints = qi::lexeme[
(iter_pos >> qi::int_ % qi::char_(",_") >> iter_pos)
[phx::at_c<0>(qi::_val)=qi::_2,
phx::at_c<1>(qi::_val)=phx::construct<std::string>(qi::_1,qi::_3)]
];
start %= header >> ' ' >> ints;
}
qi::rule<Iterator, std::string()> header;
qi::rule<Iterator, ints_type() > ints;
qi::rule<Iterator, A()> start;
};
int main()
{
A output;
std::string input("out 1,2_3");
auto iter = input.begin();
parser<decltype(iter)> p;
bool r = qi::parse(iter, input.end(), p, output);
if( !r || iter != input.end() )
{
std::cout << "did not parse";
}
else
{
// would like output.inttext to be "1,2_3"
std::cout << output.header << ": " << output.ints.inttext << " -> [ ";
for( auto & i: output.ints.data )
std::cout << i << ' ';
std::cout << ']' << std::endl;
}
}

Another alternative using a custom directive dont_eat that returns the subject attribute but does not consume any input. This is possibly slower since the rule ints is parsed twice, but I believe that the syntax is nicer (and it's a good excuse to try creating your own directive)(It's a slightly modified version of "boost/spirit/home/qi/directive/lexeme.hpp").
dont_eat.hpp
#if !defined(DONT_EAT_HPP)
#define DONT_EAT_HPP
#if defined(_MSC_VER)
#pragma once
#endif
#include <boost/spirit/home/qi/meta_compiler.hpp>
#include <boost/spirit/home/qi/skip_over.hpp>
#include <boost/spirit/home/qi/parser.hpp>
#include <boost/spirit/home/support/unused.hpp>
#include <boost/spirit/home/support/common_terminals.hpp>
#include <boost/spirit/home/qi/detail/attributes.hpp>
#include <boost/spirit/home/support/info.hpp>
#include <boost/spirit/home/support/handles_container.hpp>
namespace custom
{
BOOST_SPIRIT_TERMINAL(dont_eat);
}
namespace boost { namespace spirit
{
///////////////////////////////////////////////////////////////////////////
// Enablers
///////////////////////////////////////////////////////////////////////////
template <>
struct use_directive<qi::domain, custom::tag::dont_eat> // enables dont_eat
: mpl::true_ {};
}}
namespace custom
{
template <typename Subject>
struct dont_eat_directive : boost::spirit::qi::unary_parser<dont_eat_directive<Subject> >
{
typedef Subject subject_type;
dont_eat_directive(Subject const& subject)
: subject(subject) {}
template <typename Context, typename Iterator>
struct attribute
{
typedef typename
boost::spirit::traits::attribute_of<subject_type, Context, Iterator>::type
type;
};
template <typename Iterator, typename Context
, typename Skipper, typename Attribute>
bool parse(Iterator& first, Iterator const& last
, Context& context, Skipper const& skipper
, Attribute& attr) const
{
Iterator temp = first;
boost::spirit::qi::skip_over(temp, last, skipper);
return subject.parse(temp, last, context, skipper, attr);
}
template <typename Context>
boost::spirit::info what(Context& context) const
{
return info("dont_eat", subject.what(context));
}
Subject subject;
};
}//custom
///////////////////////////////////////////////////////////////////////////
// Parser generators: make_xxx function (objects)
///////////////////////////////////////////////////////////////////////////
namespace boost { namespace spirit { namespace qi
{
template <typename Subject, typename Modifiers>
struct make_directive<custom::tag::dont_eat, Subject, Modifiers>
{
typedef custom::dont_eat_directive<Subject> result_type;
result_type operator()(unused_type, Subject const& subject, unused_type) const
{
return result_type(subject);
}
};
}}}
namespace boost { namespace spirit { namespace traits
{
///////////////////////////////////////////////////////////////////////////
template <typename Subject>
struct has_semantic_action<custom::dont_eat_directive<Subject> >
: unary_has_semantic_action<Subject> {};
///////////////////////////////////////////////////////////////////////////
template <typename Subject, typename Attribute, typename Context
, typename Iterator>
struct handles_container<custom::dont_eat_directive<Subject>, Attribute
, Context, Iterator>
: unary_handles_container<Subject, Attribute, Context, Iterator> {};
}}}
#endif
main.cpp
#include <iostream>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include "dont_eat.hpp"
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;
struct ints_type
{
std::vector<int> data;
std::string inttext;
};
struct A
{
std::string header;
ints_type ints;
};
BOOST_FUSION_ADAPT_STRUCT(
ints_type,
(std::vector<int>, data)
(std::string, inttext)
)
BOOST_FUSION_ADAPT_STRUCT(
A,
(std::string, header)
(ints_type, ints)
)
template <typename Iterator>
struct parser : qi::grammar< Iterator, A() >
{
parser() : parser::base_type(start)
{
header %= qi::lexeme[ +qi::alpha ];
ints = qi::lexeme[qi::int_ % qi::char_(",_")];
ints_string = custom::dont_eat[ints] >> qi::as_string[qi::raw[ints]];
start %= header >> ' ' >> ints_string;
}
qi::rule<Iterator, std::string()> header;
qi::rule<Iterator, std::vector<int>() > ints;
qi::rule<Iterator, ints_type() > ints_string;
qi::rule<Iterator, A()> start;
};
int main()
{
A output;
std::string input("out 1,2_3");
auto iter = input.begin();
parser<decltype(iter)> p;
bool r = qi::parse(iter, input.end(), p, output);
if( !r || iter != input.end() )
{
std::cout << "did not parse";
}
else
{
// would like output.inttext to be "1,2_3"
std::cout << output.header << ": " << output.ints.inttext << " -> [ ";
for( auto & i: output.ints.data )
std::cout << i << ' ';
std::cout << ']' << std::endl;
}
}

This directive returns a fusion::vector2<> with the subject's attribute as its first member and the string corresponding to the synthesized attribute as its second. I think this is the easiest method to reuse as long as you adapt your structs adequately. I'm not sure that this fusion::vector2<> is the best way to handle the attributes but in the limited testing I've done it has worked fine. With this directive the ints_string rule would simply be:
ints_string=custom::annotate[ints];
//or ints_string=custom::annotate[qi::lexeme[qi::int_ % qi::char_(",_")]];
Example on LWS.
annotate.hpp
#if !defined(ANNOTATE_HPP)
#define ANNOTATE_HPP
#if defined(_MSC_VER)
#pragma once
#endif
#include <boost/spirit/home/qi/meta_compiler.hpp>
#include <boost/spirit/home/qi/skip_over.hpp>
#include <boost/spirit/home/qi/parser.hpp>
#include <boost/spirit/home/support/unused.hpp>
#include <boost/spirit/home/support/common_terminals.hpp>
#include <boost/spirit/home/qi/detail/attributes.hpp>
#include <boost/spirit/home/support/info.hpp>
#include <boost/spirit/home/support/handles_container.hpp>
namespace custom
{
BOOST_SPIRIT_TERMINAL(annotate);
}
namespace boost { namespace spirit
{
///////////////////////////////////////////////////////////////////////////
// Enablers
///////////////////////////////////////////////////////////////////////////
template <>
struct use_directive<qi::domain, custom::tag::annotate> // enables annotate
: mpl::true_ {};
}}
namespace custom
{
template <typename Subject>
struct annotate_directive : boost::spirit::qi::unary_parser<annotate_directive<Subject> >
{
typedef Subject subject_type;
annotate_directive(Subject const& subject)
: subject(subject) {}
template <typename Context, typename Iterator>
struct attribute
{
typedef
boost::fusion::vector2<
typename boost::spirit::traits::attribute_of<subject_type, Context, Iterator>::type
,std::string
>
type;
};
template <typename Iterator, typename Context
, typename Skipper, typename Attribute>
bool parse(Iterator& first, Iterator const& last
, Context& context, Skipper const& skipper
, Attribute& attr) const
{
boost::spirit::qi::skip_over(first, last, skipper);
Iterator save = first;
typename boost::spirit::traits::attribute_of<subject_type, Context, Iterator>::type attr_;
if(subject.parse(first, last, context, skipper, attr_))
{
boost::spirit::traits::assign_to(attr_,boost::fusion::at_c<0>(attr));
boost::spirit::traits::assign_to(std::string(save,first),boost::fusion::at_c<1>(attr));
return true;
}
first = save;
return false;
}
template <typename Context>
boost::spirit::info what(Context& context) const
{
return info("annotate", subject.what(context));
}
Subject subject;
};
}//custom
///////////////////////////////////////////////////////////////////////////
// Parser generators: make_xxx function (objects)
///////////////////////////////////////////////////////////////////////////
namespace boost { namespace spirit { namespace qi
{
template <typename Subject, typename Modifiers>
struct make_directive<custom::tag::annotate, Subject, Modifiers>
{
typedef custom::annotate_directive<Subject> result_type;
result_type operator()(unused_type, Subject const& subject, unused_type) const
{
return result_type(subject);
}
};
}}}
namespace boost { namespace spirit { namespace traits
{
///////////////////////////////////////////////////////////////////////////
template <typename Subject>
struct has_semantic_action<custom::annotate_directive<Subject> >
: unary_has_semantic_action<Subject> {};
///////////////////////////////////////////////////////////////////////////
template <typename Subject, typename Attribute, typename Context
, typename Iterator>
struct handles_container<custom::annotate_directive<Subject>, Attribute
, Context, Iterator>
: unary_handles_container<Subject, Attribute, Context, Iterator> {};
}}}
#endif
main.cpp
#include <iostream>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include "annotate.hpp"
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;
struct ints_type
{
std::vector<int> data;
std::string inttext;
};
struct A
{
std::string header;
ints_type ints;
};
BOOST_FUSION_ADAPT_STRUCT(
ints_type,
(std::vector<int>, data)
(std::string, inttext)
)
BOOST_FUSION_ADAPT_STRUCT(
A,
(std::string, header)
(ints_type, ints)
)
template <typename Iterator>
struct parser : qi::grammar< Iterator, A() >
{
parser() : parser::base_type(start)
{
header %= qi::lexeme[ +qi::alpha ];
ints = qi::lexeme[qi::int_ % qi::char_(",_")];
ints_string = custom::annotate[ints];
start %= header >> ' ' >> ints_string;
}
qi::rule<Iterator, std::string()> header;
qi::rule<Iterator, std::vector<int>() > ints;
qi::rule<Iterator, ints_type() > ints_string;
qi::rule<Iterator, A()> start;
};
int main()
{
A output;
std::string input("out 1,2_3");
auto iter = input.begin();
parser<decltype(iter)> p;
std::string annotation;
bool r = qi::parse(iter, input.end(), custom::annotate[p], output, annotation);
if( !r || iter != input.end() )
{
std::cout << "did not parse";
}
else
{
// would like output.inttext to be "1,2_3"
std::cout << "annotation: " << annotation << std::endl;
std::cout << output.header << ": " << output.ints.inttext << " -> [ ";
for( auto & i: output.ints.data )
std::cout << i << ' ';
std::cout << ']' << std::endl;
}
}

We Keep Coding

c++ django amazon-web-services regex python-2.7 google-cloud-platform list unit-testing opengl ember.js

boost spirit on_error not triggered - c++

Related

How to refer to value from symbol table?

parse enum using boost spirit qi parser

c++ boost spirit attr_cast does not invoke the expected tranform_attribute

boost spirit parse c and cpp style comments

How do I capture the original input into the synthesized output from a spirit grammar?

Categories

Resources