How to get error position using the spirit parser - c++

I wrote a simple parser with spirit, akin to json (but simpler and more specialised). By following the advice in here, I tried to implement error handling by tracking the error position. In particular, my parsing function is as follows
bool parse_properties(std::istream& is, const std::string &filename, PropertyList &pset)
{
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace classic = boost::spirit::classic;
typedef std::istreambuf_iterator<char> base_iterator_type;
base_iterator_type in_begin(is);
// convert input iterator to forward iterator, usable by spirit parser
typedef boost::spirit::multi_pass<base_iterator_type> forward_iterator_type;
forward_iterator_type fwd_begin = boost::spirit::make_default_multi_pass(in_begin);
forward_iterator_type fwd_end;
// wrap forward iterator with position iterator, to record the position
typedef classic::position_iterator2<forward_iterator_type> pos_iterator_type;
pos_iterator_type position_begin(fwd_begin, fwd_end, filename);
pos_iterator_type position_end;
qi::rule<pos_iterator_type> skipper = ascii::space |
'#' >> *(ascii::char_ - qi::eol) >> qi::eol;
property_set_grammar<pos_iterator_type, qi::rule<pos_iterator_type> > g;
bool r = false;
try {
r = phrase_parse(position_begin,
position_end,
g, skipper, pset);
}
catch(const qi::expectation_failure<pos_iterator_type>& e) {
const classic::file_position_base<std::string>& pos = e.first.get_position();
std::stringstream msg;
msg <<
"parse error at file " << pos.file <<
" line " << pos.line << " column " << pos.column << std::endl <<
"'" << e.first.get_currentline() << "'" << std::endl <<
std::setw(pos.column) << " " << "^- here";
throw std::runtime_error(msg.str());
}
return r;
}
Unfortunately, it does not work. Function phrase_parse always returns false immediately, both for correct and for incorrect files, and never raises any exception.
However, when I modify the above code to use a simple forward_iterator instead of the classic::position_iterator2 it works fine, but of course it does not track the error position. The very strange thing is that the original example in here works fine. So maybe the problem is related to my grammar. Here it follows:
template <typename Iterator, typename Skipper>
struct property_set_grammar : qi::grammar<Iterator, PropertyList(),
Skipper>
{
qi::rule<Iterator, Property(), Skipper> prop;
qi::rule<Iterator, std::string(), Skipper> name;
qi::rule<Iterator, std::string(), Skipper> type;
qi::rule<Iterator, std::string(), Skipper> value;
qi::rule<Iterator, std::string(), Skipper> value_simple;
qi::rule<Iterator, std::string(), Skipper> value_quoted;
qi::rule<Iterator, PropertyList(), Skipper> plist;
property_set_grammar() :
property_set_grammar::base_type(plist, "Set of Properties") {
using qi::lit;
using qi::alpha;
using qi::alnum;
using qi::lexeme;
using qi::char_;
name = lexeme[alpha >> *alnum];
type = lexeme[alpha >> *alnum];
value_simple = lexeme[*(alnum - lit('"'))];
value_quoted = lit('"') > lexeme[*(char_ - lit('"'))] > lit('"');
value = (value_quoted | value_simple);
prop = name >> '=' > value > ';';
plist = type >> '(' > name > ')' > '{' >> *(prop | plist) > '}' > ';';
}
};
I am using g++ (Ubuntu/Linaro 4.7.2-2ubuntu1) 4.7.2 and version 1.50 of the boost libraries.
Is there anything stupid that I am overlooking? If needed, I can provide the complete code (it's just a few files).

It seems that boost::spirit::position_iterator is bugged.
You can edit the headers of position_iterator as suggested in the answer here.
Or you can implement your own position_iterator, I did this by basically copy-and-paste the original code of boost::spirit::position_iterator, then remove some unneeded stuff.
Also, when parsing from istream, make sure to set the noskipws manip:
//is >> std::noskipws;
EDIT: Not needed with istreambuf_iterator

Related

Boost Spirit template specialization failure

Below is a very compact version of a grammar I'm trying to write using boost::spirit::qi.
Environment: VS2013, x86, Boost1.64
When #including the header file, the compiler complains about the line
rBlock = "{" >> +(rInvocation) >> "}";
with a very long log (I've only copied the beginning and the end):
more than one partial specialization matches the template argument list
...
...
see reference to function template instantiation
'boost::spirit::qi::rule
&boost::spirit::qi::rule::operator =>(const Expr &)' being compiled
Where is my mistake?
The header file:
//mygrammar.h
#pragma once
#include <boost/spirit/include/qi.hpp>
namespace myNS
{
typedef std::string Identifier;
typedef ::boost::spirit::qi::rule <const char*, Identifier()> myIdentifierRule;
typedef ::boost::variant<char, int> Expression;
typedef ::boost::spirit::qi::rule <const char*, Expression()> myExpressionRule;
struct IdntifierEqArgument
{
Identifier ident;
Expression arg;
};
typedef ::boost::variant < IdntifierEqArgument, Expression > Argument;
typedef ::boost::spirit::qi::rule <const char*, Argument()> myArgumentRule;
typedef ::std::vector<Argument> ArgumentList;
typedef ::boost::spirit::qi::rule <const char*, myNS::ArgumentList()> myArgumentListRule;
struct Invocation
{
Identifier identifier;
::boost::optional<ArgumentList> args;
};
typedef ::boost::spirit::qi::rule <const char*, Invocation()> myInvocationRule;
typedef ::std::vector<Invocation> Block;
typedef ::boost::spirit::qi::rule <const char*, myNS::Block()> myBlockRule;
}
BOOST_FUSION_ADAPT_STRUCT(
myNS::IdntifierEqArgument,
(auto, ident)
(auto, arg)
);
BOOST_FUSION_ADAPT_STRUCT(
myNS::Invocation,
(auto, identifier)
(auto, args)
);
namespace myNS
{
struct myRules
{
myIdentifierRule rIdentifier;
myExpressionRule rExpression;
myArgumentRule rArgument;
myArgumentListRule rArgumentList;
myInvocationRule rInvocation;
myBlockRule rBlock;
myRules()
{
using namespace ::boost::spirit;
using namespace ::boost::spirit::qi;
rIdentifier = as_string[((qi::alpha | '_') >> *(qi::alnum | '_'))];
rExpression = char_ | int_;
rArgument = (rIdentifier >> "=" >> rExpression) | rExpression;
rArgumentList = rArgument >> *("," >> rArgument);
rInvocation = rIdentifier >> "(" >> -rArgumentList >> ")";
rBlock = "{" >> +(rInvocation) >> "}";
}
};
}
I'm not exactly sure where the issue is triggered, but it clearly is a symptom of too many ambiguities in the attribute forwarding rules.
Conceptually this could be triggered by your attribute types having similar/compatible layouts. In language theory, you're looking at a mismatch between C++'s nominative type system versus the approximation of structural typing in the attribute propagation system. But enough theorism :)
I don't think attr_cast<> will save you here as it probably uses the same mechanics and heuristics under the hood.
It drew my attention that making the ArgumentList optional is ... not very useful (as an empty list already accurately reflects absense of arguments).
So I tried simplifying the rules:
rArgumentList = -(rArgument % ',');
rInvocation = rIdentifier >> '(' >> rArgumentList >> ')';
And the declared attribute type can be simply ArgumentList instead of boost::optional::ArgumentList.
This turns out to remove the ambiguity when propagating into the vector<Invocation>, so ... you're saved.
If this feels "accidental" to you, you should! What would I do if this hadn't removed the ambiguity "by chance"? I'd have created a semantic action to propagate the Invocation by simpler mechanics. There's a good chance that fusion::push_back(_val, _1) or similar would have worked.
See also Boost Spirit: "Semantic actions are evil"?
Review And Demo
In the cleaned up review here I present a few fixes/improvements and a test run that dumps the parsed AST.
Separate AST from parser (you don't want use qi in the AST types. You specifically do not want using namespace directives in the face of generic template libraries)
Do not use auto in the adapt macros. That's not a feature. Instead, since you can ostensibly use C++11, use the C++11 (decltype) based macros
BOOST_FUSION_ADAPT_STRUCT(myAST::IdntifierEqArgument, ident,arg);
BOOST_FUSION_ADAPT_STRUCT(myAST::Invocation, identifier,args);
AST is leading (also, prefer c++11 for clarity):
namespace myAST {
using Identifier = std::string;
using Expression = boost::variant<char, int>;
struct IdntifierEqArgument {
Identifier ident;
Expression arg;
};
using Argument = boost::variant<IdntifierEqArgument, Expression>;
using ArgumentList = std::vector<Argument>;
struct Invocation {
Identifier identifier;
ArgumentList args;
};
using Block = std::vector<Invocation>;
}
It's nice to have the definitions separate
Regarding the parser,
I'd prefer the qi::grammar convention. Also,
You didn't declare any of the rules with a skipper. I "guessed" from context that whitespace is insignificant outside of the rules for Expression and Identifier.
Expression ate every char_, so also would eat ')' or even '3'. I noticed this only when testing and after debugging with:
//#define BOOST_SPIRIT_DEBUG
BOOST_SPIRIT_DEBUG_NODES((start)(rBlock)(rInvocation)(rIdentifier)(rArgumentList)(rArgument)(rExpression))
I highly recommend using these facilities
All in all the parser comes down to
namespace myNS {
namespace qi = boost::spirit::qi;
template <typename Iterator = char const*>
struct myRules : qi::grammar<Iterator, myAST::Block()> {
myRules() : myRules::base_type(start) {
rIdentifier = qi::raw [(qi::alpha | '_') >> *(qi::alnum | '_')];
rExpression = qi::alpha | qi::int_;
rArgument = (rIdentifier >> '=' >> rExpression) | rExpression;
rArgumentList = -(rArgument % ',');
rInvocation = rIdentifier >> '(' >> rArgumentList >> ')';
rBlock = '{' >> +rInvocation >> '}';
start = qi::skip(qi::space) [ rBlock ];
BOOST_SPIRIT_DEBUG_NODES((start)(rBlock)(rInvocation)(rIdentifier)(rArgumentList)(rArgument)(rExpression))
}
private:
qi::rule<Iterator, myAST::Block()> start;
using Skipper = qi::space_type;
qi::rule<Iterator, myAST::Argument(), Skipper> rArgument;
qi::rule<Iterator, myAST::ArgumentList(), Skipper> rArgumentList;
qi::rule<Iterator, myAST::Invocation(), Skipper> rInvocation;
qi::rule<Iterator, myAST::Block(), Skipper> rBlock;
// implicit lexemes
qi::rule<Iterator, myAST::Identifier()> rIdentifier;
qi::rule<Iterator, myAST::Expression()> rExpression;
};
}
Adding a test driver
int main() {
std::string const input = R"(
{
foo()
bar(a, b, 42)
qux(someThing_awful01 = 9)
}
)";
auto f = input.data(), l = f + input.size();
myAST::Block block;
bool ok = parse(f, l, myNS::myRules<>{}, block);
if (ok) {
std::cout << "Parse success\n";
for (auto& invocation : block) {
std::cout << invocation.identifier << "(";
for (auto& arg : invocation.args) std::cout << arg << ",";
std::cout << ")\n";
}
}
else
std::cout << "Parse failed\n";
if (f!=l)
std::cout << "Remaining unparsed input: '" << std::string(f,l) << "'\n";
}
Complete Demo
See it Live On Coliru
//#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
namespace myAST {
using Identifier = std::string;
using Expression = boost::variant<char, int>;
struct IdntifierEqArgument {
Identifier ident;
Expression arg;
};
using Argument = boost::variant<IdntifierEqArgument, Expression>;
using ArgumentList = std::vector<Argument>;
struct Invocation {
Identifier identifier;
ArgumentList args;
};
using Block = std::vector<Invocation>;
// for debug printing
static inline std::ostream& operator<<(std::ostream& os, myAST::IdntifierEqArgument const& named) {
return os << named.ident << "=" << named.arg;
}
}
BOOST_FUSION_ADAPT_STRUCT(myAST::IdntifierEqArgument, ident,arg);
BOOST_FUSION_ADAPT_STRUCT(myAST::Invocation, identifier,args);
namespace myNS {
namespace qi = boost::spirit::qi;
template <typename Iterator = char const*>
struct myRules : qi::grammar<Iterator, myAST::Block()> {
myRules() : myRules::base_type(start) {
rIdentifier = qi::raw [(qi::alpha | '_') >> *(qi::alnum | '_')];
rExpression = qi::alpha | qi::int_;
rArgument = (rIdentifier >> '=' >> rExpression) | rExpression;
rArgumentList = -(rArgument % ',');
rInvocation = rIdentifier >> '(' >> rArgumentList >> ')';
rBlock = '{' >> +rInvocation >> '}';
start = qi::skip(qi::space) [ rBlock ];
BOOST_SPIRIT_DEBUG_NODES((start)(rBlock)(rInvocation)(rIdentifier)(rArgumentList)(rArgument)(rExpression))
}
private:
qi::rule<Iterator, myAST::Block()> start;
using Skipper = qi::space_type;
qi::rule<Iterator, myAST::Argument(), Skipper> rArgument;
qi::rule<Iterator, myAST::ArgumentList(), Skipper> rArgumentList;
qi::rule<Iterator, myAST::Invocation(), Skipper> rInvocation;
qi::rule<Iterator, myAST::Block(), Skipper> rBlock;
// implicit lexemes
qi::rule<Iterator, myAST::Identifier()> rIdentifier;
qi::rule<Iterator, myAST::Expression()> rExpression;
};
}
int main() {
std::string const input = R"(
{
foo()
bar(a, b, 42)
qux(someThing_awful01 = 9)
}
)";
auto f = input.data(), l = f + input.size();
myAST::Block block;
bool ok = parse(f, l, myNS::myRules<>{}, block);
if (ok) {
std::cout << "Parse success\n";
for (auto& invocation : block) {
std::cout << invocation.identifier << "(";
for (auto& arg : invocation.args) std::cout << arg << ",";
std::cout << ")\n";
}
}
else
std::cout << "Parse failed\n";
if (f!=l)
std::cout << "Remaining unparsed input: '" << std::string(f,l) << "'\n";
}
Prints output
Parse success
foo()
bar(a,b,42,)
qux(someThing_awful01=9,)
Remaining unparsed input: '
'

Boost spirit parsing objective-C like language

I'm trying to use boost's spirit to reimplement the logos parsing perl script from iPhone jailbreaking development.
An example of input is:
%hook SBLockScreenView
-(void)setCustomSlideToUnlockText:(id)arg1
{
arg1 = #"Changed the slider";
%orig(arg1);
}
%end
I so far have:
namespace logos
{
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
struct class_hook
{
std::string class_name;
std::string method_signature;
std::string method_body;
};
template <typename Iterator>
struct class_hook_parser : qi::grammar<Iterator, class_hook(), ascii::space_type>
{
class_hook_parser() : class_hook_parser::base_type(start)
{
using qi::int_;
using qi::lit;
using qi::on_error;
using qi::fail;
using qi::double_;
using qi::lexeme;
using ascii::char_;
hooked_class %= lexeme[+(char_("a-zA-Z") - '-')];
method_sig %= lexeme[+(char_) - '{'];
method_body %= lexeme[+(char_ - '}')];
start %=
lit("%hook")
>> hooked_class
>> method_sig
>> method_body
>> lit("%end")
;
on_error<fail>
(
start,
boost::phoenix::ref(std::cout) << "Something errored!" << std::endl);
}
qi::rule<Iterator, std::string(), ascii::space_type> hooked_class;
qi::rule<Iterator, std::string(), ascii::space_type> method_sig;
qi::rule<Iterator, std::string(), ascii::space_type> method_body;
qi::rule<Iterator, class_hook(), ascii::space_type> start;
};
}
BOOST_FUSION_ADAPT_STRUCT(logos::class_hook,
(std::string, class_name)
(std::string, method_signature)
(std::string, method_body))
typedef std::string::const_iterator iterator_type;
typedef logos::class_hook_parser<iterator_type> class_hook_parser;
using boost::spirit::ascii::space;
std::string::const_iterator
iter = std::begin(tweak_source_code),
end = std::end(tweak_source_code);
class_hook_parser g;
logos::class_hook emp;
bool r = phrase_parse(iter, end, g, space, emp);
if (r) {
std::cout << "Got: " << boost::fusion::as_vector(emp) << std::endl;
}
else std::cout << "Something isn't working" << std::endl;
But this oddly only prints out the Something isn't working message, not the on_fail callback. Where is my mistake in the parsing and how can I get actually working and informative parse error messages?
Did you mean
+(char_ - '{')
instead of
+(char_) - '{'
And likely, you'd require the body to begin with that { that was rejected as part of the signature. Here's my fixed version:
hooked_class = +char_("a-zA-Z");
method_sig = +(char_ - '{');
method_body = '{' >> +(char_ - '}') >> '}';
Notes:
Dropping the skipper allows you to drop the lexeme[] directive too.
Rejecting - from the "a-zA-Z" set is useless (it's not in it...).
method_sig now includes all whitespace (including the trailing newline)
Use BOOST_SPIRIT_DEBUG to get insight in why your grammar works in mysterious ways
See also: Boost spirit skipper issues
Live On Coliru
//#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
namespace logos
{
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
struct class_hook
{
std::string class_name;
std::string method_signature;
std::string method_body;
};
template <typename Iterator>
struct class_hook_parser : qi::grammar<Iterator, class_hook(), ascii::space_type>
{
class_hook_parser() : class_hook_parser::base_type(start)
{
using qi::int_;
using qi::lit;
using qi::on_error;
using qi::fail;
using qi::double_;
using qi::lexeme;
using ascii::char_;
hooked_class = +char_("a-zA-Z");
method_sig = +(char_ - '{');
method_body = '{' >> +(char_ - '}') >> '}';
start = "%hook"
>> hooked_class
>> method_sig
>> method_body
>> "%end"
;
on_error<fail> (start,
boost::phoenix::ref(std::cout) << "Something errored\n"
);
BOOST_SPIRIT_DEBUG_NODES((hooked_class)(method_sig)(method_body)(start))
}
private:
qi::rule<Iterator, std::string()> hooked_class, method_sig, method_body;
qi::rule<Iterator, class_hook(), ascii::space_type> start;
};
}
BOOST_FUSION_ADAPT_STRUCT(logos::class_hook, class_name, method_signature, method_body)
int main() {
typedef std::string::const_iterator iterator_type;
typedef logos::class_hook_parser<iterator_type> class_hook_parser;
std::string const tweak_source_code = R"(
%hook SBLockScreenView
-(void)setCustomSlideToUnlockText:(id)arg1
{
arg1 = #"Changed the slider";
%orig(arg1);
}
%end
)";
using boost::spirit::ascii::space;
iterator_type iter = std::begin(tweak_source_code), end = std::end(tweak_source_code);
class_hook_parser g;
logos::class_hook emp;
bool r = phrase_parse(iter, end, g, space, emp);
if (r) {
std::cout << "Got: " << boost::fusion::as_vector(emp) << "\n";
} else {
std::cout << "Something isn't working\n";
}
}
Prints
Got: (SBLockScreenView -(void)setCustomSlideToUnlockText:(id)arg1
arg1 = #"Changed the slider";
%orig(arg1);
)

What is the correct way to use boost::qi::rule with BOOST_FUSION_ADAPT_STRUCT?

I am attempting to get a qi::rule<> to emit a struct with BOOST_FUSION_ADAPT_STRUCT based on the boost employee example.
I have the following struct and its associated fusion macro:
struct LineOnCommand
{
int lineNum;
std::vector<char> humpType;
};
BOOST_FUSION_ADAPT_STRUCT(
LineOnCommand,
(int, lineNum)
(std::vector<char>, humpType)
)
The associated parsing rules are:
qi::rule<Iterator, std::vector<char> ascii::space_type> humpIdentifer = qi::lit("BH") | qi::lit("DH");
qi::rule<Iterator, LineOnCommand(), ascii::space_type> Cmd_LNON = qi::int_ >> -humpIdentifier >> qi::lit("LNON");
I then have a compound rule, of which all others (including this simple test case) are a part which is passed to the parser:
qi::rule<Iterator, qi::unused_type, ascii::space_type> commands =
+( /* other rules | */ Cmd_LNON /*| other rules */);
bool success = qi::phrase_parse(StartIterator, EndIterator, commands, ascii::space);
The problem comes when I attempt to compile, and I get the error:
<boostsource>/spirit/home/qi/detail/assign_to.hpp(152): error: no suitable constructor exists to convert form "const int" to "LineOnCommand"
attr = static_cast<Attribute>(val);
Clearly I'm doing something wrong, but I'm not sure what. If I understand the way spirit works, the 2nd argument to the template of the rule represents the attribute (i.e. the data type emitted by the rule), and the BOOST_FUSION_ADAPT_STRUCT macro will adapt my struct so that boost knows how to convert a stream that is "int, std::vector" to it.
The only difference between what I'm doing here and the boost employee example is that I'm not using an explicit grammar to do the parsing. My understanding is this is not necessary, and that a rule by itself is sufficient.
What am I doing wrong?
I'm not sure. I think I'm missing the problem. Perhaps, I "naturally" sidestep the problem because your sample is not self-contained.
So, here's my take on it: See it Live On Coliru, in the hope that just comparing things helps you:
I fixed the obvious typos in your rule declaration
I suggested something other than qi::unused_type; if there's no attribute, there's no need to state it; beyond the iterator type, the template arguments to qi::rule and qi::grammar are not positional. So
qi::rule<It, qi::unused_type(), ascii::space_type> r;
qi::rule<It, ascii::space_type, qi::unused_type()> r;
qi::rule<It, ascii::space_type> r;
are all /logically/ equivalent.
Full listing:
#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/include/qi.hpp>
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
struct LineOnCommand
{
int lineNum;
std::vector<char> humpType;
};
BOOST_FUSION_ADAPT_STRUCT(
LineOnCommand,
(int, lineNum)
(std::vector<char>, humpType)
)
template <typename It, typename Skipper = ascii::space_type>
struct parser : qi::grammar<It, std::vector<LineOnCommand>(), Skipper>
{
parser() : parser::base_type(commands)
{
using namespace qi;
humpIdentifier = string("BH") | string("DH");
Cmd_LNON = int_ >> -humpIdentifier >> "LNON";
commands = +( /* other rules | */ Cmd_LNON /*| other rules */ );
}
private:
qi::rule<It, std::vector<char>(), Skipper> humpIdentifier;
qi::rule<It, LineOnCommand(), Skipper> Cmd_LNON;
qi::rule<It, std::vector<LineOnCommand>(), Skipper> commands;
};
int main()
{
typedef std::string::const_iterator Iterator;
parser<Iterator> p;
std::string const input =
"123 BH LNON\n"
"124 LNON\t\t\t"
"125 DH LNON\n"
"126 INVALID LNON";
auto f(input.begin()), l(input.end());
std::vector<LineOnCommand> data;
bool success = qi::phrase_parse(f, l, p, ascii::space, data);
std::cout << "success:" << std::boolalpha << success << ", "
<< "elements: " << data.size() << "\n";
if (success)
{
for (auto& el : data)
{
std::cout << "Item: " << el.lineNum << ", humpType '" << std::string(el.humpType.begin(), el.humpType.end()) << "'\n";
}
}
if (f!=l)
std::cout << "Trailing unparsed: '" << std::string(f,l) << "'\n";
return success? 0 : 1;
}
Output:
success:true, elements: 3
Item: 123, humpType 'BH'
Item: 124, humpType ''
Item: 125, humpType 'DH'
Trailing unparsed: '126 INVALID LNON'

Prevent the Boost Spirit Symbol parser from accepting a keyword too early

How can I prevent the Boost Spirit Symbol parser from accepting a keyword (symbol) when starts with a valid keyword (symbol). I would like the construct to fail parsing ‘ONEMORE’ as a whole and not succeed in parsing ‘ONE’ because that is a valid keyword and then fail on ‘MORE”.
Here is the actual output of the code below:
Keyword as a number: 1
Keyword as a number: 2
Keyword as a number: 1
Invalid keyword: MORETHREE
And this is what I like it to be:
Keyword as a number: 1
Keyword as a number: 2
Invalid keyword: ONEMORE
Keyword as a number: 3
The code is just a sample to get the point across.
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <iostream>
#include <string>
using namespace std;
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
void printNumber( unsigned u )
{
cout << "Keyword as a number: " << u << endl;
}
void printInvalidKeyword( const string &s )
{
cout << "Invalid keyword: " << s << endl;
}
template <typename Iterator>
struct keyword_parser : qi::grammar<Iterator, ascii::space_type>
{
struct mySymbols_ : qi::symbols<char, unsigned>
{
mySymbols_()
{
add
("ONE" , 1)
("TWO" , 2)
("THREE" , 2)
;
}
} mySymbols;
keyword_parser() : keyword_parser::base_type(start)
{
using qi::_1;
using qi::raw;
using ascii::char_;
start %= *(
mySymbols[&printNumber]
|
invalid[&printInvalidKeyword]
);
invalid = +char_;
}
qi::rule<Iterator, ascii::space_type> start;
qi::rule<Iterator, std::string(), ascii::space_type> invalid;
};
int main()
{
using boost::spirit::ascii::space;
typedef std::string::const_iterator iterator_type;
typedef keyword_parser<iterator_type> keyword_parser;
std::string s = "ONE TWO ONEMORE THREE";
iterator_type b = s.begin();
iterator_type e = s.end();
phrase_parse(b, e, keyword_parser(), space);
return 0;
}
Look at qi::repository::distinct or take some measures yourself:
start %= *(
keyword [cout << val("Keyword as a number: ") << _1 << endl]
| invalid [cout << val("Invalid keyword: ") << _1 << endl]
);
keyword = mySymbols >> !(char_("a-zA-Z0-9_"));
invalid = +ascii::graph;
The rules being declared as
qi::rule<Iterator, ascii::space_type> start;
// lexemes do not ignore embedded skippables
qi::rule<Iterator, int()> keyword;
qi::rule<Iterator, std::string()> invalid;
See it Live On Coliru
Prints:
Keyword as a number: 1
Keyword as a number: 2
Invalid keyword: ONEMORE
Keyword as a number: 2
Full source:
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <iostream>
#include <string>
using namespace std;
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;
namespace ascii = boost::spirit::ascii;
template <typename Iterator>
struct keyword_parser : qi::grammar<Iterator, ascii::space_type>
{
struct mySymbols_ : qi::symbols<char, unsigned>
{
mySymbols_()
{
add
("ONE" , 1)
("TWO" , 2)
("THREE" , 2)
;
}
} mySymbols;
keyword_parser() : keyword_parser::base_type(start)
{
using qi::_1;
using ascii::char_;
using phx::val;
start %= *(
keyword [cout << val("Keyword as a number: ") << _1 << endl]
| invalid [cout << val("Invalid keyword: ") << _1 << endl]
);
keyword = mySymbols >> !(char_("a-zA-Z0-9_"));
invalid = +ascii::graph;
}
qi::rule<Iterator, ascii::space_type> start;
// lexemes do not ignore embedded skippables
qi::rule<Iterator, int()> keyword;
qi::rule<Iterator, std::string()/*IMPLICIT LEXEME:, ascii::space_type*/> invalid;
};
int main()
{
using boost::spirit::ascii::space;
typedef std::string::const_iterator iterator_type;
typedef keyword_parser<iterator_type> keyword_parser;
std::string s = "ONE TWO ONEMORE THREE";
iterator_type b = s.begin();
iterator_type e = s.end();
phrase_parse(b, e, keyword_parser(), space);
return 0;
}

boost::spirit binding function providing parameteres as spirit:qi::_val

There is a need of providing the values from an object of type boost::variant for an std::pair object. How would you implement this idea using other resources? Any other way than this is done below?
struct aggr_pair_visitor : public ::boost::static_visitor<void>
{
public:
explicit aggr_pair_visitor( column_and_aggregate & pair_ ) : pair(pair_)
{
}
void operator()(column_name_t const & column)
{
pair.first = column;
}
void operator()(unsigned const & faggr)
{
if ( faggr > static_cast<unsigned>(sql_faggregate::SUM) || faggr < static_cast<unsigned>(sql_faggregate::AVG) )
throw std::runtime_error("Failed to parse aggregate type : Not valid integer");
else pair.second = static_cast<sql_faggregate>(faggr);
}
private:
column_and_aggregate & pair;
};
void apply_col_and_aggr_visitor( column_and_aggregate & col_and_aggr_pair, ::boost::variant< column_name_t, unsigned > const & val )
{
aggr_pair_visitor pair_visitor( col_and_aggr_pair );
::boost::apply_visitor( pair_visitor, val ); // N.B.!!! Runtime execution of operator()!
}
spirit::qi::rule< iterator, column_and_aggregate(), ascii::space_type > agg_pair =
quoted_string[::boost::bind( &apply_col_and_aggr_visitor, spirit::qi::_val, spirit::qi::_1 )]
> ':'
> spirit::int_[::boost::bind( &apply_col_and_aggr_visitor, spirit::qi::_val, spirit::qi::_1 )];
spirit::qi::rule< iterator, column_and_aggregate_container(), ascii::space_type > aggregates_parser =
'{'
> agg_pair[phoenix::push_back(spirit::qi::_val, spirit::qi::_1)] % ',' // N.B.!!! list-redux technic
> '}';
Okay, on second glance I think you just missed the ability to fusion adapt std::pair:
#include <boost/fusion/adapted/std_pair.hpp>
// Or:
#include <boost/fusion/adapted.hpp>
Using this, the whole complexity vanishes and there is no need for anything involving the variant. Let's assume the following types:
typedef std::string column_name_t;
enum sql_faggregate
{
SUM,
// ....
AVG,
};
typedef std::pair<column_name_t, sql_faggregate> column_and_aggregate;
typedef std::vector<column_and_aggregate> column_and_aggregate_container;
A simple grammar would be:
template <typename It, typename Skipper = qi::space_type>
struct parser : qi::grammar<It, column_and_aggregate_container(), Skipper>
{
parser() : parser::base_type(aggregates_parser)
{
using namespace qi;
// using phx::bind; using phx::ref; using phx::val;
quoted_string = lexeme [ "'" >> *~qi::char_("'") >> "'" ];
faggr = int_;
agg_pair = quoted_string > ':' > faggr;
aggregates_parser = '{' > agg_pair % ',' > '}';
}
private:
qi::rule<It, std::string(), qi::space_type> quoted_string;
qi::rule<It, sql_faggregate(), qi::space_type> faggr;
qi::rule<It, column_and_aggregate(), qi::space_type> agg_pair;
qi::rule<It, column_and_aggregate_container(), qi::space_type> aggregates_parser;
};
You could add the input validation too:
faggr %= int_ [ qi::_pass = (qi::_1 >=SUM and qi::_1<=AVG) ];
Note the %= to ensure attribute propagation.
Full Working Demonstration
Program code:
#include <boost/fusion/adapted/std_pair.hpp>
#include <boost/spirit/include/qi.hpp>
// #include <boost/spirit/include/phoenix.hpp>
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;
typedef std::string column_name_t;
enum sql_faggregate
{
SUM,
// ....
AVG,
};
typedef std::pair<column_name_t, sql_faggregate> column_and_aggregate;
typedef std::vector<column_and_aggregate> column_and_aggregate_container;
template <typename It, typename Skipper = qi::space_type>
struct parser : qi::grammar<It, column_and_aggregate_container(), Skipper>
{
parser() : parser::base_type(aggregates_parser)
{
using namespace qi;
// using phx::bind; using phx::ref; using phx::val;
quoted_string = lexeme [ "'" >> *~qi::char_("'") >> "'" ];
faggr = int_;
agg_pair = quoted_string > ':' > faggr;
aggregates_parser = '{' > agg_pair % ',' > '}';
BOOST_SPIRIT_DEBUG_NODE(aggregates_parser);
}
private:
qi::rule<It, std::string(), qi::space_type> quoted_string;
qi::rule<It, sql_faggregate(), qi::space_type> faggr;
qi::rule<It, column_and_aggregate(), qi::space_type> agg_pair;
qi::rule<It, column_and_aggregate_container(), qi::space_type> aggregates_parser;
};
bool doParse(const std::string& input)
{
typedef std::string::const_iterator It;
auto f(begin(input)), l(end(input));
parser<It, qi::space_type> p;
column_and_aggregate_container data;
try
{
bool ok = qi::phrase_parse(f,l,p,qi::space,data);
if (ok)
{
std::cout << "parse success\n";
for (auto& pair : data)
std::cout << "result: '" << pair.first << "' : " << (int) pair.second << "\n";
}
else std::cerr << "parse failed: '" << std::string(f,l) << "'\n";
if (f!=l) std::cerr << "trailing unparsed: '" << std::string(f,l) << "'\n";
return ok;
} catch(const qi::expectation_failure<It>& e)
{
std::string frag(e.first, e.last);
std::cerr << e.what() << "'" << frag << "'\n";
}
return false;
}
int main()
{
bool ok = doParse("{ 'column 1' : 1, 'column 2' : 0, 'column 3' : 1 }");
return ok? 0 : 255;
}
Print output:
parse success
result: 'column 1' : 1
result: 'column 2' : 0
result: 'column 3' : 1
PS: If you wanted to do the same in semantic actions, you'd probably want to write it like:
agg_pair =
quoted_string [ phx::bind(&column_and_aggregate::first, _val) = _1 ]
> ':'
> faggr [ phx::bind(&column_and_aggregate::second, _val) = _1 ];
You'll see that you can just drop it in the above sample and it works exactly the same. For this particular grammar, it's just more verbose, so I don't recommend it :)