How to refer to value from symbol table? - c++

I can't figure out how to pass value from symbol table into the function.
template <typename Iterator>
class single_attribute_grammar : public qi::grammar<Iterator, AttributeData(), qi::blank_type>
{
public:
single_attribute_grammar(const word_symbols &words) : single_attribute_grammar::base_type(single_attribute_rule)
{
auto attr_word = phx::bind(&AttributeData::word, qi::_val);
auto grammar_word = phx::bind(&WordGrammar::word, qi::_1);
auto attr_value = phx::bind(&AttributeData::value, qi::_val);
single_attribute_rule = qi::lexeme[words[attr_word = grammar_word] >
qi::int_[attr_value = qi::_1] > (qi::space|qi::eoi)] >>
qi::eps(phx::bind(verify_range, qi::_r1, qi::_val)); // <-- HERE is the problem
BOOST_SPIRIT_DEBUG_NODE(single_attribute_rule);
}
private:
qi::rule<Iterator, AttributeData(), qi::blank_type> single_attribute_rule;
};
I would think I can refer to the value of the found key using qi::_r1 but the code doesn't compile:
main.cpp:72:31: required from ‘single_attribute_grammar<Iterator>::single_attribute_grammar(const word_symbols&) [with Iterator = boost::spirit::classic::position_iterator2<boost::spirit::multi_pass<std::istreambuf_iterator<char, std::char_traits<char> > > >; word_symbols = boost::spirit::qi::symbols<char, WordGrammar>]’
main.cpp:87:16: required from ‘all_attributes_grammar<Iterator>::all_attributes_grammar(const word_symbols&) [with Iterator = boost::spirit::classic::position_iterator2<boost::spirit::multi_pass<std::istreambuf_iterator<char, std::char_traits<char> > > >; word_symbols = boost::spirit::qi::symbols<char, WordGrammar>]’
main.cpp:130:62: required from here
/usr/include/boost/spirit/home/support/context.hpp:180:13: error: static assertion failed: index_is_out_of_bounds
BOOST_SPIRIT_ASSERT_MSG(
^
In file included from /usr/include/boost/spirit/home/qi/domain.hpp:18:0,
from /usr/include/boost/spirit/home/qi/meta_compiler.hpp:15,
from /usr/include/boost/spirit/home/qi/action/action.hpp:14,
from /usr/include/boost/spirit/home/qi/action.hpp:14,
from /usr/include/boost/spirit/home/qi.hpp:14,
from /usr/include/boost/spirit/include/qi.hpp:16,
from main.cpp:11:
/usr/include/boost/spirit/home/support/context.hpp:186:13: error: no type named ‘type’ in ‘struct boost::fusion::result_of::at_c<boost::fusion::cons<AttributeData&, boost::fusion::nil_>, 1>’
type;
^~~~
In case it's helpful here is the MVCE.
#include <iomanip>
#include <string>
#include <vector>
#include <boost/variant.hpp>
#include <boost/optional/optional.hpp>
#define BOOST_SPIRIT_DEBUG
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/qi_symbols.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_object.hpp> // construct
#include <boost/spirit/include/support_multi_pass.hpp>
#include <boost/spirit/include/classic_position_iterator.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/phoenix/bind.hpp>
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace classic = boost::spirit::classic;
namespace phx = boost::phoenix;
namespace fusion = boost::fusion;
struct AttributeData
{
std::string word;
int value;
};
using AttributeVariant = boost::variant<
AttributeData
>;
struct WordGrammar
{
std::string word;
int range_from;
int range_to;
};
BOOST_FUSION_ADAPT_STRUCT(
AttributeData,
(std::string, word)
(int, value)
)
using word_symbols = qi::symbols<char, WordGrammar>;
bool verify_range(const WordGrammar &grammar, const AttributeData &data)
{
if(data.value < grammar.range_from || data.value > grammar.range_to)
{
return false;
}
return true;
}
template <typename Iterator>
class single_attribute_grammar : public qi::grammar<Iterator, AttributeData(), qi::blank_type>
{
public:
single_attribute_grammar(const word_symbols &words) : single_attribute_grammar::base_type(single_attribute_rule)
{
auto attr_word = phx::bind(&AttributeData::word, qi::_val);
auto grammar_word = phx::bind(&WordGrammar::word, qi::_1);
auto attr_value = phx::bind(&AttributeData::value, qi::_val);
single_attribute_rule = qi::lexeme[words[attr_word = grammar_word] >
qi::int_[attr_value = qi::_1] > (qi::space|qi::eoi)] >>
qi::eps(phx::bind(verify_range, qi::_r1, qi::_val)); // <-- HERE is the problem
BOOST_SPIRIT_DEBUG_NODE(single_attribute_rule);
}
private:
qi::rule<Iterator, AttributeData(), qi::blank_type> single_attribute_rule;
};
template <typename Iterator>
class all_attributes_grammar : public qi::grammar<Iterator, std::vector<AttributeVariant>(), qi::blank_type>
{
public:
all_attributes_grammar(const word_symbols &words) : all_attributes_grammar::base_type(line_attribute_vec_rule)
, sag(words)
{
line_attribute_rule = (
sag
);
BOOST_SPIRIT_DEBUG_NODE(line_attribute_rule);
line_attribute_vec_rule = (line_attribute_rule % *qi::blank) > qi::eoi;
BOOST_SPIRIT_DEBUG_NODE(line_attribute_vec_rule);
}
private:
single_attribute_grammar<Iterator> sag;
qi::rule<Iterator, AttributeVariant(), qi::blank_type> line_attribute_rule;
qi::rule<Iterator, std::vector<AttributeVariant>(), qi::blank_type> line_attribute_vec_rule;
};
int main()
{
std::vector<AttributeVariant> value;
std::string data{"N100 X-100 AC5"};
std::istringstream input(data);
// iterate over stream input
typedef std::istreambuf_iterator<char> base_iterator_type;
base_iterator_type in_begin(input);
// convert input iterator to forward iterator, usable by spirit parser
typedef boost::spirit::multi_pass<base_iterator_type> forward_iterator_type;
forward_iterator_type fwd_begin = boost::spirit::make_default_multi_pass(in_begin);
forward_iterator_type fwd_end;
// wrap forward iterator with position iterator, to record the position
typedef classic::position_iterator2<forward_iterator_type> pos_iterator_type;
pos_iterator_type position_begin(fwd_begin, fwd_end);
pos_iterator_type position_end;
word_symbols sym;
sym.add
("N", {"N", 1, 9999})
("X", {"X", -999, 999})
("AC", {"AC", -99, 999})
;
all_attributes_grammar<pos_iterator_type> all_attr_gr(sym);
try
{
qi::phrase_parse(position_begin, position_end, all_attr_gr, qi::blank, value);
}
catch (const qi::expectation_failure<pos_iterator_type>& e)
{
const classic::file_position_base<std::string>& pos = e.first.get_position();
std::cout <<
"Parse error at line " << pos.line << " column " << pos.column << ":" << std::endl <<
"'" << e.first.get_currentline() << "'" << std::endl <<
std::setw(pos.column) << " " << "^- here" << std::endl;
}
return 0;
}
Any ideas?

Lots of things to be simplified.
why is AttributeData adapted, when you use semantic actions instead? (see Boost Spirit: "Semantic actions are evil"?)
skipping whitespace is much more elegant when using a skipper. It's also logically contradictory to skip whitespace inside a lexeme (see Boost spirit skipper issues)
In particular, the skipping of whitespace here:
line_attribute_vec_rule = (line_attribute_rule % *qi::blank) > qi::eoi;
is completely impotent (because blanks are already skipped, the *qi::blank will never match any characters). The whole thing reduces to +line_attribute_rule.
line_attribute_vec_rule = +line_attribute_rule > qi::eoi;
Re: your answer
Indeed, you can use more state in the rule. Instead, I'd simplify the AST to support your case. Say:
struct AttrDef {
std::string word;
std::pair<int,int> range;
};
struct AttributeData {
AttrDef def;
int value;
bool is_valid() const {
return std::minmax({value, def.range.first, def.range.second}) == def.range;
}
};
BOOST_FUSION_ADAPT_STRUCT(AttributeData, def, value)
Now, single_attribute_grammar would not care about skipping, and be like:
using attr_defs = qi::symbols<char, AttrDef>;
template <typename Iterator>
struct single_attribute_grammar : public qi::grammar<Iterator, AttributeData()> {
single_attribute_grammar(const attr_defs &defs)
: single_attribute_grammar::base_type(start)
{
using namespace qi;
attribute_data = defs >> int_;
start %= attribute_data [ _pass = is_valid_(_1) ];
BOOST_SPIRIT_DEBUG_NODES((attribute_data));
}
private:
phx::function<std::function<bool(AttributeData const&)> > is_valid_ {&AttributeData::is_valid};
qi::rule<Iterator, AttributeData()> attribute_data;
qi::rule<Iterator, AttributeData()> start;
};
As you can see, there's no state because I didn't use eps. That's right, I flew right in the face of my own guideline ("avoid semantic actions") for the simple reason that it avoids explicit state (instead using the exsting qi::_pass).
Doing a lot of simplifications in main (specifically, using boost::spirit::istream_iterator rather than roll-your-own multi-pass adapting), I would arrive at this:
Live On Coliru
//#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/include/classic_position_iterator.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>
#include <iomanip>
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;
struct AttrDef {
std::string word;
std::pair<int,int> range;
};
struct AttributeData {
AttrDef def;
int value;
bool is_valid() const {
return std::minmax({value, def.range.first, def.range.second}) == def.range;
}
};
BOOST_FUSION_ADAPT_STRUCT(AttributeData, def, value)
static inline std::ostream& operator<<(std::ostream& os, const AttrDef& def) {
return os << "AttrDef(" << def.word << ", " << def.range.first << ", " << def.range.second << ")";
}
using attr_defs = qi::symbols<char, AttrDef>;
template <typename Iterator>
struct single_attribute_grammar : public qi::grammar<Iterator, AttributeData()> {
single_attribute_grammar(const attr_defs &defs)
: single_attribute_grammar::base_type(start)
{
using namespace qi;
attribute_data = defs >> int_;
start %= attribute_data [ _pass = is_valid_(_1) ];
BOOST_SPIRIT_DEBUG_NODES((attribute_data));
}
private:
phx::function<std::function<bool(AttributeData const&)> > is_valid_ {&AttributeData::is_valid};
qi::rule<Iterator, AttributeData()> attribute_data;
qi::rule<Iterator, AttributeData()> start;
};
using AttributeVariant = boost::variant<AttributeData>;
template <typename Iterator>
class all_attributes_grammar : public qi::grammar<Iterator, std::vector<AttributeVariant>(), qi::blank_type>
{
public:
all_attributes_grammar(const attr_defs &defs)
: all_attributes_grammar::base_type(line_attribute_vec_rule),
sag(defs)
{
line_attribute_rule = (
sag
);
line_attribute_vec_rule = +line_attribute_rule > qi::eoi;
BOOST_SPIRIT_DEBUG_NODES((line_attribute_rule)(line_attribute_vec_rule));
}
private:
single_attribute_grammar<Iterator> sag;
qi::rule<Iterator, AttributeVariant()> line_attribute_rule;
qi::rule<Iterator, std::vector<AttributeVariant>(), qi::blank_type> line_attribute_vec_rule;
};
int main() {
constexpr bool fail = false, succeed = true;
struct _ { bool expect; std::string data; } const tests[] = {
{ succeed, "N100 X-100 AC5" },
{ fail, "" },
{ fail, " " },
{ succeed, "N1" },
{ succeed, " N1" },
{ succeed, "N1 " },
{ succeed, " N1 " },
{ fail, "N 1" },
{ fail, "N0" },
{ succeed, "N9999" },
{ fail, "N10000" },
};
for (auto test : tests) {
std::istringstream input(test.data);
typedef boost::spirit::classic::position_iterator2<boost::spirit::istream_iterator> pos_iterator_type;
pos_iterator_type position_begin(boost::spirit::istream_iterator{input >> std::noskipws}, {}), position_end;
attr_defs sym;
sym.add
("N", {"N", {1, 9999}})
("X", {"X", {-999, 999}})
("AC", {"AC", {-99, 999}})
;
all_attributes_grammar<pos_iterator_type> all_attr_gr(sym);
try {
std::vector<AttributeVariant> value;
std::cout << " --------- '" << test.data << "'\n";
bool actual = qi::phrase_parse(position_begin, position_end, all_attr_gr, qi::blank, value);
std::cout << ((test.expect == actual)?"PASS":"FAIL");
if (actual) {
std::cout << "\t";
for (auto& attr : value)
std::cout << boost::fusion::as_vector(boost::get<AttributeData>(attr)) << " ";
std::cout << "\n";
} else {
std::cout << "\t(no valid parse)\n";
}
}
catch (const qi::expectation_failure<pos_iterator_type>& e) {
auto& pos = e.first.get_position();
std::cout <<
"Parse error at line " << pos.line << " column " << pos.column << ":" << std::endl <<
"'" << e.first.get_currentline() << "'" << std::endl <<
std::setw(pos.column) << " " << "^- here" << std::endl;
}
if (position_begin != position_end)
std::cout << " -> Remaining '" << std::string(position_begin, position_end) << "'\n";
}
}
Prints:
--------- 'N100 X-100 AC5'
PASS (AttrDef(N, 1, 9999) 100) (AttrDef(X, -999, 999) -100) (AttrDef(AC, -99, 999) 5)
--------- ''
PASS (no valid parse)
--------- ' '
PASS (no valid parse)
-> Remaining ' '
--------- 'N1'
PASS (AttrDef(N, 1, 9999) 1)
--------- ' N1'
PASS (AttrDef(N, 1, 9999) 1)
--------- 'N1 '
PASS (AttrDef(N, 1, 9999) 1)
--------- ' N1 '
PASS (AttrDef(N, 1, 9999) 1)
--------- 'N 1'
PASS (no valid parse)
-> Remaining 'N 1'
--------- 'N0'
PASS (no valid parse)
-> Remaining 'N0'
--------- 'N9999'
PASS (AttrDef(N, 1, 9999) 9999)
--------- 'N10000'
PASS (no valid parse)
-> Remaining 'N10000'

After a bit more studying of boost spirit I've found the solution is to use locals and inherited attributes:
template <typename Iterator>
class single_attribute_grammar : public qi::grammar<Iterator, AttributeData(), qi::locals<WordGrammar>, qi::blank_type>
{
public:
single_attribute_grammar(const word_symbols &words) : single_attribute_grammar::base_type(single_attribute_rule)
{
auto attr_word = phx::bind(&AttributeData::word, qi::_val);
auto grammar_word = phx::bind(&WordGrammar::word, qi::_1);
auto attr_value = phx::bind(&AttributeData::value, qi::_val);
word_rule = words;
BOOST_SPIRIT_DEBUG_NODE(word_rule);
range_check_rule = qi::eps(phx::bind(verify_range, qi::_r1, qi::_r2));
BOOST_SPIRIT_DEBUG_NODE(range_check_rule);
single_attribute_rule = qi::lexeme[word_rule[attr_word = grammar_word,qi::_a=qi::_1] >
qi::int_[attr_value = qi::_1] > (qi::space|qi::eoi)] >>
range_check_rule(qi::_a, qi::_val);
BOOST_SPIRIT_DEBUG_NODE(single_attribute_rule);
}
private:
qi::rule<Iterator, WordGrammar()> word_rule;
qi::rule<Iterator, void(const WordGrammar&, const AttributeData&), qi::blank_type> range_check_rule;
qi::rule<Iterator, AttributeData(), qi::locals<WordGrammar>, qi::blank_type> single_attribute_rule;
};
Also because I use BOOST_SPIRIT_DEBUG_NODE overloading of stream operator for WordGrammar is necessary:
std::ostream& operator<<(std::ostream& ostr, const WordGrammar& grammar)
{
ostr << "WordGrammar(" << grammar.word << ", " << grammar.range_from << ", " << grammar.range_to << ")";
return ostr;
}

Related

Parsing a command language using Boost Spirit

I am building a parser for a command language that I've pieced together from various samples. I've read the Boost Spirit Qi and Lex docs, and I think I understand the basics, but from what I've read, I should avoid attributes and use utree. What docs I've found on utree basically suck. Given the code below, I have the following questions:
How do I annotate the parser to create an AST using utree?
How do I walk the utree after it is built, to discover what was parsed? e.g. for token-only commands, such as SET DEBUG ON, as well as commands with values, such as LOAD "file.ext" or SET DRIVE C:
I want to add a comment character, "!". So, how can I ignore everything after that - except when it occurs in a quoted string?
Why doesn't my error handler get called when I give it invalid input?
How can I make the command tokens case insensitive, but not change the contents of a quoted string?
#include <Windows.h>
#include <conio.h>
#include <string>
#include <vector>
#include <iostream>
#define BOOST_SPIRIT_DEBUG
#include <boost\spirit\include\qi.hpp>
#include <boost\spirit\include\phoenix.hpp>
#include <boost\spirit\include\lex.hpp>
#include <boost\spirit\include\lex_lexertl.hpp>
using namespace std;
using namespace boost::spirit;
using boost::spirit::utree;
//
// Tokens used by the command grammar
//
template <typename Lexer>
struct command_tokens : lex::lexer <Lexer>
{
command_tokens () :
//
// Verbs, with abbreviation (just enough characters to make each unique)
//
boot ("B(O(O(T)?)?)?"),
exit ("E(X(I(T)?)?)?"),
help ("H(E(L(P)?)?)?"),
dash_help ("-H(E(L(P)?)?)?"),
slash_help ("\\/H(E(L(P)?)?)?"),
load ("L(O(A(D)?)?)?"),
quit ("Q(U(I(T)?)?)?"),
set ("SE(T)?"),
show ("SH(O(W)?)?"),
//
// Nouns, with abbreviation (the minimum number of characters is usually 3, but may be more to ensure uniqueness)
//
debug ("DEB(U(G)?)?"),
drive ("DRI(V(E)?)?"),
trace ("TRA(C(E)?)?"),
//
// Qualifiers
//
on ("ON"),
off ("OFF"),
//
// Tokens to pass back to the grammar
//
quoted_string ("...")
{
using namespace boost::spirit::lex;
//
// Associate the tokens with the lexer
//
this->self
= boot
| exit
| help
| dash_help
| slash_help
| load
| quit
| set
| show
| debug
| drive
| trace
| off
| on
| quoted_string
;
//
// Define whitespace to ignore: space, tab, newline
//
this->self ("WS")
= lex::token_def <> ("[ \\t\\n]+")
;
}
lex::token_def <> boot;
lex::token_def <> dash_help;
lex::token_def <> debug;
lex::token_def <string> drive;
lex::token_def <> exit;
lex::token_def <> help;
lex::token_def <> load;
lex::token_def <> off;
lex::token_def <> on;
lex::token_def <> quit;
lex::token_def <string> quoted_string;
lex::token_def <> set;
lex::token_def <> show;
lex::token_def <> slash_help;
lex::token_def <> trace;
};
//
// Display parse error
//
struct error_handler_
{
template <typename, typename, typename>
struct result
{
typedef void type;
};
template <typename Iterator>
void operator ()
(
qi::info const& What,
Iterator Err_pos,
Iterator Last
) const
{
cout << "Error! Expecting "
<< What
<< " here: \""
<< string (Err_pos, Last)
<< "\""
<< endl;
}
};
boost::phoenix::function <error_handler_> const error_handler = error_handler_ ();
//
// Grammar describing the valid commands
//
template <typename Iterator, typename Lexer>
struct command_grammar : qi::grammar <Iterator>
{
template <typename Lexer>
command_grammar (command_tokens <Lexer> const& Tok) :
command_grammar::base_type (start)
{
using qi::on_error;
using qi::fail;
using qi::char_;
start
= +commands;
commands
= (
boot_command
| exit_command
| help_command
| load_command
| set_command
| show_command
);
boot_command
= Tok.boot;
exit_command
= Tok.exit
| Tok.quit;
help_command
= Tok.help
| Tok.dash_help
| Tok.slash_help;
load_command
= Tok.load >> Tok.quoted_string;
set_command
= Tok.set;
show_command
= Tok.show;
set_property
= debug_property
| drive_property
| trace_property;
debug_property
= Tok.debug >> on_off;
drive_property
= Tok.drive >> char_ ("A-Z") >> char_ (":");
trace_property
= Tok.trace >> on_off;
on_off
= Tok.on
| Tok.off;
BOOST_SPIRIT_DEBUG_NODE (start);
BOOST_SPIRIT_DEBUG_NODE (commands);
BOOST_SPIRIT_DEBUG_NODE (boot_command);
BOOST_SPIRIT_DEBUG_NODE (exit_command);
BOOST_SPIRIT_DEBUG_NODE (help_command);
BOOST_SPIRIT_DEBUG_NODE (load_command);
BOOST_SPIRIT_DEBUG_NODE (quit_command);
BOOST_SPIRIT_DEBUG_NODE (set_command);
BOOST_SPIRIT_DEBUG_NODE (show_command);
BOOST_SPIRIT_DEBUG_NODE (set_property);
BOOST_SPIRIT_DEBUG_NODE (debug_property);
BOOST_SPIRIT_DEBUG_NODE (drive_property);
BOOST_SPIRIT_DEBUG_NODE (trace_property);
BOOST_SPIRIT_DEBUG_NODE (target_property);
on_error <fail> (start, error_handler (_4, _3, _2));
}
qi::rule <Iterator> start;
qi::rule <Iterator> commands;
qi::rule <Iterator> boot_command;
qi::rule <Iterator> exit_command;
qi::rule <Iterator> help_command;
qi::rule <Iterator> load_command;
qi::rule <Iterator> quit_command;
qi::rule <Iterator> set_command;
qi::rule <Iterator> show_command;
qi::rule <Iterator> set_property;
qi::rule <Iterator> debug_property;
qi::rule <Iterator, string ()> drive_property;
qi::rule <Iterator> target_property;
qi::rule <Iterator> trace_property;
qi::rule <Iterator> on_off;
};
int
main
(
int Argc,
PCHAR Argv
)
{
typedef std::string::iterator base_iterator_type;
typedef lex::lexertl::token <base_iterator_type> token_type;
typedef lex::lexertl::lexer <token_type> lexer_type;
typedef command_tokens <lexer_type> command_tokens;
typedef command_tokens::iterator_type iterator_type;
typedef command_grammar <iterator_type, command_tokens::lexer_def> command_grammar;
command_tokens tokens;
command_grammar commands (tokens);
string input = "SET DRIVE C:";
string::iterator it = input.begin ();
iterator_type iter = tokens.begin (it, input.end ());
iterator_type end = tokens.end ();
string ws ("WS");
bool result = lex::tokenize_and_phrase_parse (it, input.end (), tokens, commands, qi::in_state (ws) [tokens.self]);
if (result)
{
cout << "Parse succeeded" << endl;
}
else
{
string rest (it, input.end ());
cout << "Parse failed" << endl;
cout << "Stopped at " << rest << endl;
}
return 0;
} // End of main
I'm going to side-step the majority of your code, for the simple reasons that experience tells me that Lex and utree are generally not what you want to use.
What you do want is define an AST to represent your command language and then come up with a grammar to build it.
AST
namespace Ast {
struct NoValue {
bool operator==(NoValue const &) const { return true; }
};
template <typename Tag> struct GenericCommand {};
namespace tag {
struct boot;
struct help;
struct load;
struct exit;
struct set;
struct show;
};
template <> struct GenericCommand<tag::load> { std::string name; };
template <> struct GenericCommand<tag::set> {
std::string property;
boost::variant<NoValue, std::string, bool> value; // optional
};
using BootCmd = GenericCommand<tag::boot>;
using HelpCmd = GenericCommand<tag::help>;
using ExitCmd = GenericCommand<tag::exit>;
using ShowCmd = GenericCommand<tag::show>;
using LoadCmd = GenericCommand<tag::load>;
using SetCmd = GenericCommand<tag::set>;
using Command = boost::variant<BootCmd, HelpCmd, ExitCmd, ShowCmd, LoadCmd, SetCmd>;
using Commands = std::list<Command>;
}
The full code only adds debug output helpers. And here's the full Fusion Adaption:
BOOST_FUSION_ADAPT_TPL_STRUCT((Tag), (Ast::GenericCommand) (Tag), )
BOOST_FUSION_ADAPT_STRUCT(Ast::LoadCmd, name)
BOOST_FUSION_ADAPT_STRUCT(Ast::SetCmd, property, value)
Grammar
Here I make some choices:
let's make things white-space and case insensitive, allowing line-separated commands: (see also Boost spirit skipper issues)
start = skip(blank) [lazy_command % eol];
let's use Nabialek Trick to associate commands with prefixes. I used a very simple snippet of code to generate the unique prefixes:
std::set<std::string> const verbs { "boot", "exit", "help", "-help", "/help", "load", "quit", "set", "show", };
for (auto const full : verbs)
for (auto partial=full; partial.length(); partial.resize(partial.size()-1)) {
auto n = std::distance(verbs.lower_bound(partial), verbs.upper_bound(full));
if (n < 2) std::cout << "(\"" << partial << "\", &" << full << "_command)\n";
}
you could do the same for properties, but I thought the current setup is simpler:
template <typename Iterator>
struct command_grammar : qi::grammar<Iterator, Ast::Commands()> {
command_grammar() : command_grammar::base_type(start) {
using namespace qi;
start = skip(blank) [lazy_command % eol];
// nabialek trick
lazy_command = no_case [ commands [ _a = _1 ] > lazy(*_a) [ _val = _1 ] ];
on_off.add("on", true)("off", false);
commands.add
("-help", &help_command) ("-hel", &help_command) ("-he", &help_command) ("-h", &help_command)
("/help", &help_command) ("/hel", &help_command) ("/he", &help_command) ("/h", &help_command)
("help", &help_command) ("hel", &help_command) ("he", &help_command) ("h", &help_command)
("boot", &boot_command) ("boo", &boot_command) ("bo", &boot_command) ("b", &boot_command)
("exit", &exit_command) ("exi", &exit_command) ("ex", &exit_command) ("e", &exit_command)
("quit", &exit_command) ("qui", &exit_command) ("qu", &exit_command) ("q", &exit_command)
("load", &load_command) ("loa", &load_command) ("lo", &load_command) ("l", &load_command)
("set", &set_command) ("se", &set_command)
("show", &show_command) ("sho", &show_command) ("sh", &show_command);
quoted_string = '"' >> +~char_('"') >> '"';
// nullary commands
boot_command_ = eps;
exit_command_ = eps;
help_command_ = eps;
show_command_ = eps;
// non-nullary commands
load_command_ = quoted_string;
drive_ = char_("A-Z") >> ':';
set_command_ = no_case[lit("drive")|"driv"|"dri"|"dr"] >> attr("DRIVE") >> drive_
| no_case[ (lit("debug")|"debu"|"deb"|"de") >> attr("DEBUG") >> on_off ]
| no_case[ (lit("trace")|"trac"|"tra"|"tr"|"t") >> attr("TRACE") >> on_off ]
;
BOOST_SPIRIT_DEBUG_NODES(
(start)(lazy_command)
(boot_command) (exit_command) (help_command) (show_command) (set_command) (load_command)
(boot_command_)(exit_command_)(help_command_)(show_command_)(set_command_)(load_command_)
(quoted_string)(drive_)
)
on_error<fail>(start, error_handler_(_4, _3, _2));
on_error<fail>(lazy_command, error_handler_(_4, _3, _2));
boot_command = boot_command_;
exit_command = exit_command_;
help_command = help_command_;
load_command = load_command_;
exit_command = exit_command_;
set_command = set_command_;
show_command = show_command_;
}
private:
struct error_handler_t {
template <typename...> struct result { typedef void type; };
void operator()(qi::info const &What, Iterator Err_pos, Iterator Last) const {
std::cout << "Error! Expecting " << What << " here: \"" << std::string(Err_pos, Last) << "\"" << std::endl;
}
};
boost::phoenix::function<error_handler_t> const error_handler_ = error_handler_t {};
qi::rule<Iterator, Ast::Commands()> start;
using Skipper = qi::blank_type;
using CommandRule = qi::rule<Iterator, Ast::Command(), Skipper>;
qi::symbols<char, bool> on_off;
qi::symbols<char, CommandRule const*> commands;
qi::rule<Iterator, std::string()> drive_property, quoted_string, drive_;
qi::rule<Iterator, Ast::Command(), Skipper, qi::locals<CommandRule const*> > lazy_command;
CommandRule boot_command, exit_command, help_command, load_command, set_command, show_command;
qi::rule<Iterator, Ast::BootCmd(), Skipper> boot_command_;
qi::rule<Iterator, Ast::ExitCmd(), Skipper> exit_command_;
qi::rule<Iterator, Ast::HelpCmd(), Skipper> help_command_;
qi::rule<Iterator, Ast::LoadCmd(), Skipper> load_command_;
qi::rule<Iterator, Ast::SetCmd(), Skipper> set_command_;
qi::rule<Iterator, Ast::ShowCmd(), Skipper> show_command_;
};
Test Cases
Live On Coliru
int main() {
typedef std::string::const_iterator It;
command_grammar<It> const commands;
for (std::string const input : {
"help",
"set drive C:",
"SET DRIVE C:",
"loAD \"XYZ\"",
"load \"anything \nat all\"",
// multiline
"load \"ABC\"\nhelp\n-he\n/H\nsh\nse t off\nse debug ON\nb\nq"
})
{
std::cout << "----- '" << input << "' -----\n";
It f = input.begin(), l = input.end();
Ast::Commands parsed;
bool result = parse(f, l, commands, parsed);
if (result) {
for (auto& cmd : parsed) {
std::cout << "Parsed " << cmd << "\n";
}
} else {
std::cout << "Parse failed\n";
}
if (f != l) {
std::cout << "Remaining unparsed '" << std::string(f, l) << "'\n";
}
}
}
Prints:
----- 'help' -----
Parsed HELP ()
----- 'set drive C:' -----
Parsed SET (DRIVE C)
----- 'SET DRIVE C:' -----
Parsed SET (DRIVE C)
----- 'loAD "XYZ"' -----
Parsed LOAD (XYZ)
----- 'load "anything
at all"' -----
Parsed LOAD (anything
at all)
----- 'load "ABC"
help
-he
/H
sh
se t off
se debug ON
b
q' -----
Parsed LOAD (ABC)
Parsed HELP ()
Parsed HELP ()
Parsed HELP ()
Parsed SHOW ()
Parsed SET (TRACE 0)
Parsed SET (DEBUG 1)
Parsed BOOT ()
Parsed EXIT ()
Full Listing
Live On Coliru
//#define BOOST_SPIRIT_DEBUG
#include <boost/fusion/include/io.hpp>
#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/include/qi.hpp>
namespace qi = boost::spirit::qi;
namespace Ast {
struct NoValue {
bool operator==(NoValue const &) const { return true; }
friend std::ostream& operator<<(std::ostream& os, NoValue) { return os; }
};
template <typename Tag> struct GenericCommand {};
namespace tag {
struct boot {};
struct help {};
struct load {};
struct exit {};
struct set {};
struct show {};
static std::ostream& operator<<(std::ostream& os, boot) { return os << "BOOT"; }
static std::ostream& operator<<(std::ostream& os, help) { return os << "HELP"; }
static std::ostream& operator<<(std::ostream& os, load) { return os << "LOAD"; }
static std::ostream& operator<<(std::ostream& os, exit) { return os << "EXIT"; }
static std::ostream& operator<<(std::ostream& os, set ) { return os << "SET"; }
static std::ostream& operator<<(std::ostream& os, show) { return os << "SHOW"; }
};
template <> struct GenericCommand<tag::load> { std::string name; };
template <> struct GenericCommand<tag::set> {
std::string property;
boost::variant<NoValue, std::string, bool> value; // optional
};
using BootCmd = GenericCommand<tag::boot>;
using HelpCmd = GenericCommand<tag::help>;
using ExitCmd = GenericCommand<tag::exit>;
using ShowCmd = GenericCommand<tag::show>;
using LoadCmd = GenericCommand<tag::load>;
using SetCmd = GenericCommand<tag::set>;
using Command = boost::variant<BootCmd, HelpCmd, ExitCmd, ShowCmd, LoadCmd, SetCmd>;
using Commands = std::list<Command>;
template <typename Tag>
static inline std::ostream& operator<<(std::ostream& os, Ast::GenericCommand<Tag> const& command) {
return os << Tag{} << " " << boost::fusion::as_vector(command);
}
}
BOOST_FUSION_ADAPT_TPL_STRUCT((Tag), (Ast::GenericCommand) (Tag), )
BOOST_FUSION_ADAPT_STRUCT(Ast::LoadCmd, name)
BOOST_FUSION_ADAPT_STRUCT(Ast::SetCmd, property, value)
template <typename Iterator>
struct command_grammar : qi::grammar<Iterator, Ast::Commands()> {
command_grammar() : command_grammar::base_type(start) {
using namespace qi;
start = skip(blank) [lazy_command % eol];
// nabialek trick
lazy_command = no_case [ commands [ _a = _1 ] > lazy(*_a) [ _val = _1 ] ];
on_off.add("on", true)("off", false);
commands.add
("-help", &help_command) ("-hel", &help_command) ("-he", &help_command) ("-h", &help_command)
("/help", &help_command) ("/hel", &help_command) ("/he", &help_command) ("/h", &help_command)
("help", &help_command) ("hel", &help_command) ("he", &help_command) ("h", &help_command)
("boot", &boot_command) ("boo", &boot_command) ("bo", &boot_command) ("b", &boot_command)
("exit", &exit_command) ("exi", &exit_command) ("ex", &exit_command) ("e", &exit_command)
("quit", &exit_command) ("qui", &exit_command) ("qu", &exit_command) ("q", &exit_command)
("load", &load_command) ("loa", &load_command) ("lo", &load_command) ("l", &load_command)
("set", &set_command) ("se", &set_command)
("show", &show_command) ("sho", &show_command) ("sh", &show_command);
quoted_string = '"' >> +~char_('"') >> '"';
// nullary commands
boot_command_ = eps;
exit_command_ = eps;
help_command_ = eps;
show_command_ = eps;
// non-nullary commands
load_command_ = quoted_string;
drive_ = char_("A-Z") >> ':';
set_command_ = no_case[lit("drive")|"driv"|"dri"|"dr"] >> attr("DRIVE") >> drive_
| no_case[ (lit("debug")|"debu"|"deb"|"de") >> attr("DEBUG") >> on_off ]
| no_case[ (lit("trace")|"trac"|"tra"|"tr"|"t") >> attr("TRACE") >> on_off ]
;
BOOST_SPIRIT_DEBUG_NODES(
(start)(lazy_command)
(boot_command) (exit_command) (help_command) (show_command) (set_command) (load_command)
(boot_command_)(exit_command_)(help_command_)(show_command_)(set_command_)(load_command_)
(quoted_string)(drive_)
)
on_error<fail>(start, error_handler_(_4, _3, _2));
on_error<fail>(lazy_command, error_handler_(_4, _3, _2));
boot_command = boot_command_;
exit_command = exit_command_;
help_command = help_command_;
load_command = load_command_;
exit_command = exit_command_;
set_command = set_command_;
show_command = show_command_;
}
private:
struct error_handler_t {
template <typename...> struct result { typedef void type; };
void operator()(qi::info const &What, Iterator Err_pos, Iterator Last) const {
std::cout << "Error! Expecting " << What << " here: \"" << std::string(Err_pos, Last) << "\"" << std::endl;
}
};
boost::phoenix::function<error_handler_t> const error_handler_ = error_handler_t {};
qi::rule<Iterator, Ast::Commands()> start;
using Skipper = qi::blank_type;
using CommandRule = qi::rule<Iterator, Ast::Command(), Skipper>;
qi::symbols<char, bool> on_off;
qi::symbols<char, CommandRule const*> commands;
qi::rule<Iterator, std::string()> drive_property, quoted_string, drive_;
qi::rule<Iterator, Ast::Command(), Skipper, qi::locals<CommandRule const*> > lazy_command;
CommandRule boot_command, exit_command, help_command, load_command, set_command, show_command;
qi::rule<Iterator, Ast::BootCmd(), Skipper> boot_command_;
qi::rule<Iterator, Ast::ExitCmd(), Skipper> exit_command_;
qi::rule<Iterator, Ast::HelpCmd(), Skipper> help_command_;
qi::rule<Iterator, Ast::LoadCmd(), Skipper> load_command_;
qi::rule<Iterator, Ast::SetCmd(), Skipper> set_command_;
qi::rule<Iterator, Ast::ShowCmd(), Skipper> show_command_;
};
int main() {
typedef std::string::const_iterator It;
command_grammar<It> const commands;
for (std::string const input : {
"help",
"set drive C:",
"SET DRIVE C:",
"loAD \"XYZ\"",
"load \"anything \nat all\"",
// multiline
"load \"ABC\"\nhelp\n-he\n/H\nsh\nse t off\nse debug ON\nb\nq"
})
{
std::cout << "----- '" << input << "' -----\n";
It f = input.begin(), l = input.end();
Ast::Commands parsed;
bool result = parse(f, l, commands, parsed);
if (result) {
for (auto& cmd : parsed) {
std::cout << "Parsed " << cmd << "\n";
}
} else {
std::cout << "Parse failed\n";
}
if (f != l) {
std::cout << "Remaining unparsed '" << std::string(f, l) << "'\n";
}
}
}
POST-SCRIPT
Q. How do I annotate the parser to create an AST using utree?
See above
Q. How do I walk the utree after it is built, to discover what was parsed?
See above, see also http://www.boost.org/doc/libs/1_64_0/doc/html/variant/tutorial.html
Q. I want to add a comment character, "!". So, how can I ignore everything after that - except when it occurs in a quoted string?
Simply make the Skipper type a rule that parses e.g.:
qi::rule<Iterator> my_skipper;
my_skipper = blank | '!' >> *(char_ - eol) >> (eol|eoi);
Then use it instead of skip(blank) like skip(my_skipper)
Q. Why doesn't my error handler get called when I give it invalid input?
Because you didn't mark expectation points (operator> instead of operator>>). If you don't, a failure to match a sub-expression simply backtracks.
Q. How can I make the command tokens case insensitive, but not change the contents of a quoted string?
See above

Why can I not access the value in a semantic action?

I'm trying to write a parser to create an AST using boost::spirit. As a first step I'm trying to wrap numerical values in an AST node. This is the code I'm using:
AST_NodePtr make_AST_NodePtr(const int& i) {
return std::make_shared<AST_Node>(i);
}
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace l = qi::labels;
template<typename Iterator>
struct test_grammar : qi::grammar<Iterator, AST_NodePtr(), ascii::space_type> {
test_grammar() : test_grammar::base_type(test) {
test = qi::int_ [qi::_val = make_AST_NodePtr(qi::_1)];
}
qi::rule<Iterator, AST_NodePtr(), ascii::space_type> test;
};
As far as I understood it from the documentation q::_1 should contain the value parsed by qi::int_, but the above code always gives me an error along the lines
invalid initialization of reference of type ‘const int&’ from expression of type ‘const _1_type {aka const boost::phoenix::actor<boost::spirit::argument<0> >}
Why does this not work even though qi::_1 is supposed to hold the parsed valued? How else would I parse the input into a custom AST?
You're using a regular function inside the semantic action.
This means that in the contructor the compiler will try to invoke that make_AST_NodePtr function with the argument supplied: qi::_1.
Q. Why does this not work even though qi::_1 is supposed to hold the parsed valued?
A. qi::_1 does not hold the parsed value. It represents (is-a-placeholder-for) the first unbound argument in the function call
This can /obviously/ never work. The function expects an integer.
So what gives?
You need to make a "lazy" or "deferred" function for use in the semantic action. Using only pre-supplied Boost Phoenix functors, you could spell it out:
test = qi::int_ [ qi::_val = px::construct<AST_NodePtr>(px::new_<AST_Node>(qi::_1)) ];
You don't need the helper function this way. But the result is both ugly and suboptimal. So, let's do better!
Using a Phoenix Function wrapper
struct make_shared_f {
std::shared_ptr<AST_Node> operator()(int v) const {
return std::make_shared<AST_Node>(v);
}
};
px::function<make_shared_f> make_shared_;
With this defined, you can simplify the semantic action to:
test = qi::int_ [ qi::_val = make_shared_(qi::_1) ];
Actually, if you make it generic you can reuse it for many types:
template <typename T>
struct make_shared_f {
template <typename... Args>
std::shared_ptr<T> operator()(Args&&... args) const {
return std::make_shared<T>(std::forward<Args>(args)...);
}
};
px::function<make_shared_f<AST_Node> > make_shared_;
DEMO
Here's a self-contained example showing some style fixes in the process:
Live On Coliru
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <memory>
struct AST_Node {
AST_Node(int v) : _value(v) {}
int value() const { return _value; }
private:
int _value;
};
using AST_NodePtr = std::shared_ptr<AST_Node>;
AST_NodePtr make_AST_NodePtr(const int& i) {
return std::make_shared<AST_Node>(i);
}
namespace qi = boost::spirit::qi;
namespace px = boost::phoenix;
template<typename Iterator>
struct test_grammar : qi::grammar<Iterator, AST_NodePtr()> {
test_grammar() : test_grammar::base_type(start) {
using boost::spirit::ascii::space;
start = qi::skip(space) [ test ];
test = qi::int_ [ qi::_val = make_shared_(qi::_1) ];
}
private:
struct make_shared_f {
std::shared_ptr<AST_Node> operator()(int v) const {
return std::make_shared<AST_Node>(v);
}
};
px::function<make_shared_f> make_shared_;
//
qi::rule<Iterator, AST_NodePtr()> start;
qi::rule<Iterator, AST_NodePtr(), boost::spirit::ascii::space_type> test;
};
int main() {
AST_NodePtr parsed;
std::string const input ("42");
auto f = input.begin(), l = input.end();
test_grammar<std::string::const_iterator> g;
bool ok = qi::parse(f, l, g, parsed);
if (ok) {
std::cout << "Parsed: " << (parsed? std::to_string(parsed->value()) : "nullptr") << "\n";
} else {
std::cout << "Failed\n";
}
if (f!=l)
{
std::cout << "Remaining input: '" << std::string(f, l) << "'\n";
}
}
Prints
Parsed: 42
BONUS: Alternative using BOOST_PHOENIX_ADAPT_FUNCTION
You can actually use your free function if you wish, and use it as follows:
Live On Coliru
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <memory>
struct AST_Node {
AST_Node(int v) : _value(v) {}
int value() const { return _value; }
private:
int _value;
};
using AST_NodePtr = std::shared_ptr<AST_Node>;
AST_NodePtr make_AST_NodePtr(int i) {
return std::make_shared<AST_Node>(i);
}
BOOST_PHOENIX_ADAPT_FUNCTION(AST_NodePtr, make_AST_NodePtr_, make_AST_NodePtr, 1)
namespace qi = boost::spirit::qi;
namespace px = boost::phoenix;
template<typename Iterator>
struct test_grammar : qi::grammar<Iterator, AST_NodePtr()> {
test_grammar() : test_grammar::base_type(start) {
using boost::spirit::ascii::space;
start = qi::skip(space) [ test ] ;
test = qi::int_ [ qi::_val = make_AST_NodePtr_(qi::_1) ] ;
}
private:
qi::rule<Iterator, AST_NodePtr()> start;
qi::rule<Iterator, AST_NodePtr(), boost::spirit::ascii::space_type> test;
};
int main() {
AST_NodePtr parsed;
std::string const input ("42");
auto f = input.begin(), l = input.end();
test_grammar<std::string::const_iterator> g;
bool ok = qi::parse(f, l, g, parsed);
if (ok) {
std::cout << "Parsed: " << (parsed? std::to_string(parsed->value()) : "nullptr") << "\n";
} else {
std::cout << "Failed\n";
}
if (f!=l)
{
std::cout << "Remaining input: '" << std::string(f, l) << "'\n";
}
}

parse enum using boost spirit qi parser

I am trying to parse char to fill in a C++11 strongly typed enum. I need help with writing a parser for the enums.. it needs to be high performance as well.
I have a string with the following format
Category | Type | Attributes
Example:
std::string str1 = "A|D|name=tim, address=3 infinite loop"
std::string str2 = "A|C|name=poc, address=5 overflow street"
I am representing Category and Type as follows:
enum class CATEGORY : char
{
Animal:'A', Bird:'B'
}
enum class TYPE : char
{
Dog:'D', Bird:'B'
}
struct Zoo
{
Category category;
Type type;
std::string name;
std::string address;
};
namespace qi = boost::spirit::qi;
namespace repo = boost::spirit::repository;
namespace ascii = boost::spirit::ascii;
template <typename Iterator>
struct ZooBuilderGrammar : qi::grammar<Iterator, ascii::space_type>
{
ZooBuilderGrammar():ZooBuilderGrammar::base_type(start_)
{
using qi::char_;
using qi::_1;
using qi::lit
using boost::phoenix::ref;
//need help here
start_=char_[/*how to assign enum */ ]>>'|'
>>char_[ /*how to assign enum */ ]>>'|'
>>lit;
}
qi::rule<Iterator, ascii::space_type> start_;
};
I have problem around creating a parser type like the built in ex: qi::char_ to "parse enums CATEGORY and TYPE".
thanks for the help in advance..
As usual there's several approaches:
The semantic action way (ad-hoc)
The customization points way
The qi::symbols way
Which is the most appropriate depends. All three approaches should be equally efficient. The symbols<> apprach seems to be most safe (not involving casts) and flexible: you can e.g. use it with variable-length enum members, use it inside no_case[] etc.
Case by case:
The semantic action way (ad-hoc):
template <typename Iterator>
struct ZooBuilderGrammar : qi::grammar<Iterator, ascii::space_type>
{
ZooBuilderGrammar():ZooBuilderGrammar::base_type(start_)
{
using namespace qi;
category_ = char_("AB") [ _val = phx::static_cast_<Category>(_1) ];
type_ = char_("DB") [ _val = phx::static_cast_<Type>(_1) ];
start_ = category_ >> '|' > type_;
}
private:
qi::rule<Iterator, Category(), ascii::space_type> category_;
qi::rule<Iterator, Type(), ascii::space_type> type_;
qi::rule<Iterator, ascii::space_type> start_;
};
You can see it Live On Coliru printing:
Parse success: [A, D]
Remaining unparsed input '|name=tim, address=3 infinite loop'
---------------------------
expected: tag: char-set
got: "C|name=poc, address=5 overflow street"
Expectation failure: boost::spirit::qi::expectation_failure at 'C|name=poc, address=5 overflow street'
---------------------------
The customization points way:
namespace boost { namespace spirit { namespace traits {
template <typename Enum, typename RawValue>
struct assign_to_attribute_from_value<Enum, RawValue, typename enable_if<is_enum<Enum>>::type> {
static void call(RawValue const& raw, Enum& cat) {
cat = static_cast<Enum>(raw);
}
};
}}}
template <typename Iterator>
struct ZooBuilderGrammar : qi::grammar<Iterator, Zoo(), ascii::space_type>
{
ZooBuilderGrammar():ZooBuilderGrammar::base_type(start_)
{
start_ = qi::char_("AB") > '|' > qi::char_("DB");
}
private:
qi::rule<Iterator, Zoo(), ascii::space_type> start_;
};
See it Live On Coliru too, with the same output (obviously)
The qi::symbols way:
template <typename Iterator>
struct ZooBuilderGrammar : qi::grammar<Iterator, Zoo(), ascii::space_type>
{
ZooBuilderGrammar():ZooBuilderGrammar::base_type(start_)
{
start_ = category_ > '|' > type_;
}
private:
struct Category_ : qi::symbols<char,Category> {
Category_() {
this->add("A", Category::Animal)("B", Category::Bird);
}
} category_;
struct Type_ : qi::symbols<char,Type> {
Type_() {
this->add("D", Type::Dog)("B", Type::Bird);
}
} type_;
qi::rule<Iterator, Zoo(), ascii::space_type> start_;
};
See it Live On Coliru
Full Demo
This happens to be the traits approach, but you can reuse the skeleton with both other grammars:
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/adapted/struct.hpp>
enum class Category : char { Animal='A', Bird='B' };
enum class Type : char { Dog='D', Bird='B' };
struct Zoo {
Category category;
Type type;
};
BOOST_FUSION_ADAPT_STRUCT(Zoo, (Category,category)(Type,type))
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace phx = boost::phoenix;
namespace boost { namespace spirit { namespace traits {
template <typename Enum, typename RawValue>
struct assign_to_attribute_from_value<Enum, RawValue, typename enable_if<is_enum<Enum>>::type> {
static void call(RawValue const& raw, Enum& cat) {
cat = static_cast<Enum>(raw);
}
};
}}}
template <typename Iterator>
struct ZooBuilderGrammar : qi::grammar<Iterator, Zoo(), ascii::space_type>
{
ZooBuilderGrammar():ZooBuilderGrammar::base_type(start_)
{
start_ = qi::char_("AB") > '|' > qi::char_("DB");
}
private:
qi::rule<Iterator, Zoo(), ascii::space_type> start_;
};
/////////////////////////////////////////////////
// For exception output
struct printer {
typedef boost::spirit::utf8_string string;
void element(string const& tag, string const& value, int depth) const {
for (int i = 0; i < (depth*4); ++i) std::cout << ' '; // indent to depth
std::cout << "tag: " << tag;
if (value != "") std::cout << ", value: " << value;
std::cout << std::endl;
}
};
void print_info(boost::spirit::info const& what) {
using boost::spirit::basic_info_walker;
printer pr;
basic_info_walker<printer> walker(pr, what.tag, 0);
boost::apply_visitor(walker, what.value);
}
//
/////////////////////////////////////////////////
int main()
{
typedef std::string::const_iterator It;
static const ZooBuilderGrammar<It> p;
for (std::string const str1 : {
"A|D|name=tim, address=3 infinite loop",
"A|C|name=poc, address=5 overflow street" })
{
It f(str1.begin()), l(str1.end());
try {
Zoo zoo;
bool ok = qi::phrase_parse(f,l,p,ascii::space,zoo);
if (ok)
std::cout << "Parse success: [" << static_cast<char>(zoo.category) << ", " << static_cast<char>(zoo.type) << "]\n";
else
std::cout << "Failed to parse '" << str1 << "'\n";
if (f!=l)
std::cout << "Remaining unparsed input '" << std::string(f,l) << "'\n";
} catch(qi::expectation_failure<It> const& x)
{
std::cout << "expected: "; print_info(x.what_);
std::cout << "got: \"" << std::string(x.first, x.last) << '"' << std::endl;
}
std::cout << "---------------------------\n";
}
}
I'd use the qi::symbols way as sugested by sehe, but in this way to improve code readability:
template <typename Iterator>
struct ZooBuilderGrammar : qi::grammar<Iterator, Zoo(), ascii::space_type>
{
ZooBuilderGrammar():ZooBuilderGrammar::base_type(start_)
{
category_.add
("A", Category::Animal)
("B", Category::Bird)
;
type_.add
("D", Type::Dog)
("B", Type::Bird)
;
start_ = category_ > '|' > type_;
}
private:
qi::symbols<char,Type> category_;
qi::symbols<char,Category> type_;
qi::rule<Iterator, Zoo(), ascii::space_type> start_;
};

boost spirit on_error not triggered

^ No it is not. This was part of the problem, but if review the code as is right now, it already does what the pointed out question/answer shows ... and the errors are still not triggered.
I have this boost spirit parser for string literal. It works. Now I would like to start handle errors when it fail. I copied the on_error handle 1-1 from the mini xml example and it compiles, but it is never triggered (no errors are outputted).
This is the parser:
#define BOOST_SPIRIT_USE_PHOENIX_V3
#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/home/support/iterators/line_pos_iterator.hpp>
#include <boost/spirit/include/phoenix_fusion.hpp>
#include <boost/spirit/include/phoenix_stl.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
namespace qi = boost::spirit::qi;
struct my_handler_f
{
template <typename...> struct result { typedef void type; };
template <typename... T>
void operator()(T&&...) const {
std::cout << "\nmy_handler_f() invoked with " << sizeof...(T) << " arguments\n";
}
};
struct append_utf8_f
{
template <typename, typename>
struct result { typedef void type; };
template <typename INT>
void operator()(INT in, std::string& to) const
{
auto out = std::back_inserter(to);
boost::utf8_output_iterator<decltype(out)> convert(out);
*convert++ = in;
}
};
struct get_line_f
{
template <typename> struct result { typedef size_t type; };
template <typename It> size_t operator()(It const& pos_iter) const
{
return get_line(pos_iter);
}
};
struct RangePosition { size_t beginLine, endLine; };
struct String : public RangePosition
{
String()
: RangePosition()
, value()
, source()
{
}
std::string value;
std::string source;
};
BOOST_FUSION_ADAPT_STRUCT(String,
(std::string, value)
(std::string, source)
(size_t, beginLine)
(size_t, endLine)
)
template <typename Iterator>
struct source_string : qi::grammar<Iterator, String(), qi::space_type>
{
struct escape_symbols : qi::symbols<char, char>
{
escape_symbols()
{
add
("\'" , '\'')
("\"" , '\"')
("\?" , '\?')
("\\" , '\\')
("0" , '\0')
("a" , '\a')
("b" , '\b')
("f" , '\f')
("n" , '\n')
("r" , '\r')
("t" , '\t')
("v" , '\v')
;
}
} escape_symbol;
source_string() : source_string::base_type(start)
{
using qi::raw;
using qi::_val;
using qi::_1;
using qi::_2;
using qi::_3;
using qi::_4;
using qi::space;
using qi::omit;
using qi::no_case;
using qi::print;
using qi::eps;
using qi::on_error;
using qi::fail;
using qi::lit;
namespace phx = boost::phoenix;
using phx::at_c;
using phx::begin;
using phx::end;
using phx::construct;
using phx::ref;
using phx::val;
escape %= escape_symbol;
character %= (no_case["\\x"] > hex12)
| ("\\" > (oct123 | escape))
| (print - (lit('"') | '\\'));
unicode = ("\\u" > hex4[append_utf8(_1, _val)])
| ("\\U" > hex8[append_utf8(_1, _val)]);
string_section %= '"' > *(unicode | character) > '"';
string %= string_section % omit[*space];
main = raw [
string[at_c<0>(_val) = _1]
]
[
at_c<1>(_val) = construct<std::string>(begin(_1), end(_1)),
at_c<2>(_val) = get_line_(begin(_1)),
at_c<3>(_val) = get_line_(end(_1))
];
start %= eps > main;
on_error<fail>(start, my_handler);
}
boost::phoenix::function<my_handler_f> my_handler;
qi::rule<Iterator, std::string()> escape;
qi::uint_parser<char, 16, 1, 2> hex12;
qi::uint_parser<char, 8, 1, 3> oct123;
qi::rule<Iterator, std::string()> character;
qi::uint_parser<uint16_t, 16, 4, 4> hex4;
qi::uint_parser<uint32_t, 16, 8, 8> hex8;
boost::phoenix::function<append_utf8_f> append_utf8;
qi::rule<Iterator, std::string()> unicode;
qi::rule<Iterator, std::string()> string_section;
qi::rule<Iterator, std::string()> string;
boost::phoenix::function<get_line_f> get_line_;
qi::rule<Iterator, String(), qi::space_type> main;
qi::rule<Iterator, String(), qi::space_type> start;
};
and this is the test code
int main()
{
std::string str[] =
{
"\"\\u1234\\U0002345\"",
//"\"te\"\"st\"",
//"\"te\" \"st\"",
//"\"te\" \n \"st\"",
//"\"\"",
//"\"\\\"\"",
//"\"test\"",
//"\"test\" something",
//"\"\\\'\\\"\\\?\\\\\\a\\b\\f\\n\\r\\t\\v\"",
//"\"\\x61cd\\X3012\\x7z\"",
//"\"\\141cd\\06012\\78\\778\"",
"\"te",
//"\"te\nst\"",
//"\"test\\\"",
//"\"te\\st\"",
//
};
typedef boost::spirit::line_pos_iterator<std::string::const_iterator> Iterator;
for (size_t i = 0; i < sizeof(str) / sizeof(str[0]); ++i)
{
source_string<Iterator> g;
Iterator iter(str[i].begin());
Iterator end(str[i].end());
String string;
bool r = phrase_parse(iter, end, g, qi::space, string);
if (r)
std::cout << string.beginLine << "-" << string.endLine << ": " << string.value << " === " << string.source << "\n";
else
std::cout << "Parsing failed\n";
}
}

How do I capture the original input into the synthesized output from a spirit grammar?

I'm working on a boost::spirit::qi::grammar and would like to copy a portion of the original text into the synthesized output structure of the grammar (more specifically, the portion that matched one of the components of the rule). The grammar would ultimately be used as a sub-grammar for a more complicated grammar, so I don't really have access to the original input.
I'm guessing that this can be done through semantic actions or the grammar context, but I can't find an example that does this without access to the original parse().
Here's what I have so far:
#include <iostream>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
namespace qi = boost::spirit::qi;
struct A
{
std::string header;
std::vector<int> ints;
std::string inttext;
};
BOOST_FUSION_ADAPT_STRUCT(
A,
(std::string, header)
(std::vector<int>, ints)
//(std::string, inttext)
)
template <typename Iterator>
struct parser : qi::grammar< Iterator, A() >
{
parser() : parser::base_type(start)
{
header %= qi::lexeme[ +qi::alpha ];
ints %= qi::lexeme[ qi::int_ % qi::char_(",_") ]; // <---- capture the original text that matches this into inttext
start %= header >> ' ' >> ints;
}
qi::rule<Iterator, std::string()> header;
qi::rule<Iterator, std::vector<int>() > ints;
qi::rule<Iterator, A()> start;
};
int main()
{
A output;
std::string input("out 1,2_3");
auto iter = input.begin();
parser<decltype(iter)> p;
bool r = qi::parse(iter, input.end(), p, output);
if( !r || iter != input.end() )
{
std::cout << "did not parse";
}
else
{
// would like output.inttext to be "1,2_3"
std::cout << output.header << ": " << output.inttext << " -> [ ";
for( auto & i: output.ints )
std::cout << i << ' ';
std::cout << ']' << std::endl;
}
}
Something similar to what you asked without using semantic actions:
#include <iostream>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/spirit/repository/include/qi_iter_pos.hpp>
namespace qi = boost::spirit::qi;
using boost::spirit::repository::qi::iter_pos;
struct ints_type
{
std::vector<int> data;
std::string::const_iterator begin;
std::string::const_iterator end;
};
struct A
{
std::string header;
ints_type ints;
};
BOOST_FUSION_ADAPT_STRUCT(
ints_type,
(std::string::const_iterator, begin)
(std::vector<int>, data)
(std::string::const_iterator, end)
)
BOOST_FUSION_ADAPT_STRUCT(
A,
(std::string, header)
(ints_type, ints)
)
template <typename Iterator>
struct parser : qi::grammar< Iterator, A() >
{
parser() : parser::base_type(start)
{
header %= qi::lexeme[ +qi::alpha ];
ints %= qi::lexeme[ iter_pos >> qi::int_ % qi::char_(",_") >> iter_pos ]; // <---- capture the original text that matches this into inttext
start %= header >> ' ' >> ints;
}
qi::rule<Iterator, std::string()> header;
qi::rule<Iterator, ints_type() > ints;
qi::rule<Iterator, A()> start;
};
int main()
{
A output;
std::string input("out 1,2_3");
auto iter = input.begin();
parser<decltype(iter)> p;
bool r = qi::parse(iter, input.end(), p, output);
if( !r || iter != input.end() )
{
std::cout << "did not parse";
}
else
{
// would like output.inttext to be "1,2_3"
std::cout << output.header << ": " << std::string(output.ints.begin,output.ints.end) << " -> [ ";
for( auto & i: output.ints.data )
std::cout << i << ' ';
std::cout << ']' << std::endl;
}
}
Using semantic actions:
#include <iostream>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/spirit/repository/include/qi_iter_pos.hpp>
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;
using boost::spirit::repository::qi::iter_pos;
struct ints_type
{
std::vector<int> data;
std::string inttext;
};
struct A
{
std::string header;
ints_type ints;
};
BOOST_FUSION_ADAPT_STRUCT(
ints_type,
(std::vector<int>, data)
(std::string, inttext)
)
BOOST_FUSION_ADAPT_STRUCT(
A,
(std::string, header)
(ints_type, ints)
)
template <typename Iterator>
struct parser : qi::grammar< Iterator, A() >
{
parser() : parser::base_type(start)
{
header %= qi::lexeme[ +qi::alpha ];
ints = qi::lexeme[
(iter_pos >> qi::int_ % qi::char_(",_") >> iter_pos)
[phx::at_c<0>(qi::_val)=qi::_2,
phx::at_c<1>(qi::_val)=phx::construct<std::string>(qi::_1,qi::_3)]
];
start %= header >> ' ' >> ints;
}
qi::rule<Iterator, std::string()> header;
qi::rule<Iterator, ints_type() > ints;
qi::rule<Iterator, A()> start;
};
int main()
{
A output;
std::string input("out 1,2_3");
auto iter = input.begin();
parser<decltype(iter)> p;
bool r = qi::parse(iter, input.end(), p, output);
if( !r || iter != input.end() )
{
std::cout << "did not parse";
}
else
{
// would like output.inttext to be "1,2_3"
std::cout << output.header << ": " << output.ints.inttext << " -> [ ";
for( auto & i: output.ints.data )
std::cout << i << ' ';
std::cout << ']' << std::endl;
}
}
Another alternative using a custom directive dont_eat that returns the subject attribute but does not consume any input. This is possibly slower since the rule ints is parsed twice, but I believe that the syntax is nicer (and it's a good excuse to try creating your own directive)(It's a slightly modified version of "boost/spirit/home/qi/directive/lexeme.hpp").
dont_eat.hpp
#if !defined(DONT_EAT_HPP)
#define DONT_EAT_HPP
#if defined(_MSC_VER)
#pragma once
#endif
#include <boost/spirit/home/qi/meta_compiler.hpp>
#include <boost/spirit/home/qi/skip_over.hpp>
#include <boost/spirit/home/qi/parser.hpp>
#include <boost/spirit/home/support/unused.hpp>
#include <boost/spirit/home/support/common_terminals.hpp>
#include <boost/spirit/home/qi/detail/attributes.hpp>
#include <boost/spirit/home/support/info.hpp>
#include <boost/spirit/home/support/handles_container.hpp>
namespace custom
{
BOOST_SPIRIT_TERMINAL(dont_eat);
}
namespace boost { namespace spirit
{
///////////////////////////////////////////////////////////////////////////
// Enablers
///////////////////////////////////////////////////////////////////////////
template <>
struct use_directive<qi::domain, custom::tag::dont_eat> // enables dont_eat
: mpl::true_ {};
}}
namespace custom
{
template <typename Subject>
struct dont_eat_directive : boost::spirit::qi::unary_parser<dont_eat_directive<Subject> >
{
typedef Subject subject_type;
dont_eat_directive(Subject const& subject)
: subject(subject) {}
template <typename Context, typename Iterator>
struct attribute
{
typedef typename
boost::spirit::traits::attribute_of<subject_type, Context, Iterator>::type
type;
};
template <typename Iterator, typename Context
, typename Skipper, typename Attribute>
bool parse(Iterator& first, Iterator const& last
, Context& context, Skipper const& skipper
, Attribute& attr) const
{
Iterator temp = first;
boost::spirit::qi::skip_over(temp, last, skipper);
return subject.parse(temp, last, context, skipper, attr);
}
template <typename Context>
boost::spirit::info what(Context& context) const
{
return info("dont_eat", subject.what(context));
}
Subject subject;
};
}//custom
///////////////////////////////////////////////////////////////////////////
// Parser generators: make_xxx function (objects)
///////////////////////////////////////////////////////////////////////////
namespace boost { namespace spirit { namespace qi
{
template <typename Subject, typename Modifiers>
struct make_directive<custom::tag::dont_eat, Subject, Modifiers>
{
typedef custom::dont_eat_directive<Subject> result_type;
result_type operator()(unused_type, Subject const& subject, unused_type) const
{
return result_type(subject);
}
};
}}}
namespace boost { namespace spirit { namespace traits
{
///////////////////////////////////////////////////////////////////////////
template <typename Subject>
struct has_semantic_action<custom::dont_eat_directive<Subject> >
: unary_has_semantic_action<Subject> {};
///////////////////////////////////////////////////////////////////////////
template <typename Subject, typename Attribute, typename Context
, typename Iterator>
struct handles_container<custom::dont_eat_directive<Subject>, Attribute
, Context, Iterator>
: unary_handles_container<Subject, Attribute, Context, Iterator> {};
}}}
#endif
main.cpp
#include <iostream>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include "dont_eat.hpp"
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;
struct ints_type
{
std::vector<int> data;
std::string inttext;
};
struct A
{
std::string header;
ints_type ints;
};
BOOST_FUSION_ADAPT_STRUCT(
ints_type,
(std::vector<int>, data)
(std::string, inttext)
)
BOOST_FUSION_ADAPT_STRUCT(
A,
(std::string, header)
(ints_type, ints)
)
template <typename Iterator>
struct parser : qi::grammar< Iterator, A() >
{
parser() : parser::base_type(start)
{
header %= qi::lexeme[ +qi::alpha ];
ints = qi::lexeme[qi::int_ % qi::char_(",_")];
ints_string = custom::dont_eat[ints] >> qi::as_string[qi::raw[ints]];
start %= header >> ' ' >> ints_string;
}
qi::rule<Iterator, std::string()> header;
qi::rule<Iterator, std::vector<int>() > ints;
qi::rule<Iterator, ints_type() > ints_string;
qi::rule<Iterator, A()> start;
};
int main()
{
A output;
std::string input("out 1,2_3");
auto iter = input.begin();
parser<decltype(iter)> p;
bool r = qi::parse(iter, input.end(), p, output);
if( !r || iter != input.end() )
{
std::cout << "did not parse";
}
else
{
// would like output.inttext to be "1,2_3"
std::cout << output.header << ": " << output.ints.inttext << " -> [ ";
for( auto & i: output.ints.data )
std::cout << i << ' ';
std::cout << ']' << std::endl;
}
}
This directive returns a fusion::vector2<> with the subject's attribute as its first member and the string corresponding to the synthesized attribute as its second. I think this is the easiest method to reuse as long as you adapt your structs adequately. I'm not sure that this fusion::vector2<> is the best way to handle the attributes but in the limited testing I've done it has worked fine. With this directive the ints_string rule would simply be:
ints_string=custom::annotate[ints];
//or ints_string=custom::annotate[qi::lexeme[qi::int_ % qi::char_(",_")]];
Example on LWS.
annotate.hpp
#if !defined(ANNOTATE_HPP)
#define ANNOTATE_HPP
#if defined(_MSC_VER)
#pragma once
#endif
#include <boost/spirit/home/qi/meta_compiler.hpp>
#include <boost/spirit/home/qi/skip_over.hpp>
#include <boost/spirit/home/qi/parser.hpp>
#include <boost/spirit/home/support/unused.hpp>
#include <boost/spirit/home/support/common_terminals.hpp>
#include <boost/spirit/home/qi/detail/attributes.hpp>
#include <boost/spirit/home/support/info.hpp>
#include <boost/spirit/home/support/handles_container.hpp>
namespace custom
{
BOOST_SPIRIT_TERMINAL(annotate);
}
namespace boost { namespace spirit
{
///////////////////////////////////////////////////////////////////////////
// Enablers
///////////////////////////////////////////////////////////////////////////
template <>
struct use_directive<qi::domain, custom::tag::annotate> // enables annotate
: mpl::true_ {};
}}
namespace custom
{
template <typename Subject>
struct annotate_directive : boost::spirit::qi::unary_parser<annotate_directive<Subject> >
{
typedef Subject subject_type;
annotate_directive(Subject const& subject)
: subject(subject) {}
template <typename Context, typename Iterator>
struct attribute
{
typedef
boost::fusion::vector2<
typename boost::spirit::traits::attribute_of<subject_type, Context, Iterator>::type
,std::string
>
type;
};
template <typename Iterator, typename Context
, typename Skipper, typename Attribute>
bool parse(Iterator& first, Iterator const& last
, Context& context, Skipper const& skipper
, Attribute& attr) const
{
boost::spirit::qi::skip_over(first, last, skipper);
Iterator save = first;
typename boost::spirit::traits::attribute_of<subject_type, Context, Iterator>::type attr_;
if(subject.parse(first, last, context, skipper, attr_))
{
boost::spirit::traits::assign_to(attr_,boost::fusion::at_c<0>(attr));
boost::spirit::traits::assign_to(std::string(save,first),boost::fusion::at_c<1>(attr));
return true;
}
first = save;
return false;
}
template <typename Context>
boost::spirit::info what(Context& context) const
{
return info("annotate", subject.what(context));
}
Subject subject;
};
}//custom
///////////////////////////////////////////////////////////////////////////
// Parser generators: make_xxx function (objects)
///////////////////////////////////////////////////////////////////////////
namespace boost { namespace spirit { namespace qi
{
template <typename Subject, typename Modifiers>
struct make_directive<custom::tag::annotate, Subject, Modifiers>
{
typedef custom::annotate_directive<Subject> result_type;
result_type operator()(unused_type, Subject const& subject, unused_type) const
{
return result_type(subject);
}
};
}}}
namespace boost { namespace spirit { namespace traits
{
///////////////////////////////////////////////////////////////////////////
template <typename Subject>
struct has_semantic_action<custom::annotate_directive<Subject> >
: unary_has_semantic_action<Subject> {};
///////////////////////////////////////////////////////////////////////////
template <typename Subject, typename Attribute, typename Context
, typename Iterator>
struct handles_container<custom::annotate_directive<Subject>, Attribute
, Context, Iterator>
: unary_handles_container<Subject, Attribute, Context, Iterator> {};
}}}
#endif
main.cpp
#include <iostream>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include "annotate.hpp"
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;
struct ints_type
{
std::vector<int> data;
std::string inttext;
};
struct A
{
std::string header;
ints_type ints;
};
BOOST_FUSION_ADAPT_STRUCT(
ints_type,
(std::vector<int>, data)
(std::string, inttext)
)
BOOST_FUSION_ADAPT_STRUCT(
A,
(std::string, header)
(ints_type, ints)
)
template <typename Iterator>
struct parser : qi::grammar< Iterator, A() >
{
parser() : parser::base_type(start)
{
header %= qi::lexeme[ +qi::alpha ];
ints = qi::lexeme[qi::int_ % qi::char_(",_")];
ints_string = custom::annotate[ints];
start %= header >> ' ' >> ints_string;
}
qi::rule<Iterator, std::string()> header;
qi::rule<Iterator, std::vector<int>() > ints;
qi::rule<Iterator, ints_type() > ints_string;
qi::rule<Iterator, A()> start;
};
int main()
{
A output;
std::string input("out 1,2_3");
auto iter = input.begin();
parser<decltype(iter)> p;
std::string annotation;
bool r = qi::parse(iter, input.end(), custom::annotate[p], output, annotation);
if( !r || iter != input.end() )
{
std::cout << "did not parse";
}
else
{
// would like output.inttext to be "1,2_3"
std::cout << "annotation: " << annotation << std::endl;
std::cout << output.header << ": " << output.ints.inttext << " -> [ ";
for( auto & i: output.ints.data )
std::cout << i << ' ';
std::cout << ']' << std::endl;
}
}