This is a followup question from a previous question.
I can parse into vectors of strings from my grammar, but I cannot seem to parse into a vector of shared pointers to strings; i.e. std::vector<std::shared_ptr<std::string> >, and need a bit of help.
My compiling header:
#define BOOST_SPIRIT_USE_PHOENIX_V3 1
#include <boost/spirit/include/qi_core.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <iostream>
#include <string>
#include <boost/spirit/include/phoenix_stl.hpp>
#include <boost/phoenix/bind/bind_member_function.hpp>
#include <boost/spirit/include/phoenix_fusion.hpp>
// this solution for lazy make shared comes from the SO forum, user sehe.
// https://stackoverflow.com/questions/21516201/how-to-create-boost-phoenix-make-shared
// post found using google search terms `phoenix construct shared_ptr`
// changed from boost::shared_ptr to std::shared_ptr
namespace {
template <typename T>
struct make_shared_f
{
template <typename... A> struct result
{ typedef std::shared_ptr<T> type; };
template <typename... A>
typename result<A...>::type operator()(A&&... a) const {
return std::make_shared<T>(std::forward<A>(a)...);
}
};
template <typename T>
using make_shared_ = boost::phoenix::function<make_shared_f<T> >;
}
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
template<typename Iterator, typename Skipper = ascii::space_type>
struct SystemParser : qi::grammar<Iterator, std::vector<std::shared_ptr<std::string> >(), Skipper>
{
SystemParser() : SystemParser::base_type(variable_group_)
{
namespace phx = boost::phoenix;
using qi::_1;
using qi::_val;
using qi::eps;
using qi::lit;
var_counter = 0;
declarative_symbols.add("variable_group",0);
variable_group_ = "variable_group" > genericvargp_ > ';';
genericvargp_ = new_variable_ % ','; //
new_variable_ = unencountered_symbol_ [_val = make_shared_<std::string>() (_1)];
unencountered_symbol_ = valid_variable_name_ - ( encountered_variables | declarative_symbols );
valid_variable_name_ = +qi::alpha >> *(qi::alnum | qi::char_("[]_") );
// debug(variable_group_); debug(unencountered_symbol_); debug(new_variable_); debug(genericvargp_);
// BOOST_SPIRIT_DEBUG_NODES((variable_group_) (valid_variable_name_) (unencountered_symbol_) (new_variable_) (genericvargp_))
}
// rule declarations. these are member variables for the parser.
qi::rule<Iterator, std::vector<std::shared_ptr<std::string> >(), Skipper > variable_group_;
qi::rule<Iterator, std::vector<std::shared_ptr<std::string> >(), Skipper > genericvargp_;
qi::rule<Iterator, std::shared_ptr<std::string()> > new_variable_;
qi::rule<Iterator, std::string()> unencountered_symbol_;
qi::rule<Iterator, std::string()> valid_variable_name_;
unsigned var_counter;
qi::symbols<char,int> encountered_variables;
qi::symbols<char,int> declarative_symbols;
};
with driver code:
int main(int argc, char** argv)
{
std::vector<std::shared_ptr<std::string> > V;
std::string str = "variable_group x, y, z; ";
std::string::const_iterator iter = str.begin();
std::string::const_iterator end = str.end();
SystemParser<std::string::const_iterator> S;
bool s = phrase_parse(iter, end, S, boost::spirit::ascii::space, V);
if (s)
{
std::cout << "Parse succeeded: " << V.size() << " variables\n";
for (auto& s : V)
std::cout << " - '" << s << "'\n";
}
else
std::cout << "Parse failed\n";
if (iter!=end)
std::cout << "Remaining unparsed: '" << std::string(iter, end) << "'\n";
return 0;
}
The text is parsed correctly, but the resulting vector is of length 0, while it should be of length 3. Somehow, the std::shared_ptr<string> is not pushed onto the back of the vector resulting from the rule genericvargp_.
I've tried many things, including reading all the debug information from a test parse, and placement of the %= signs for rule definitions, which should be used for rules for which there is a semantic action that does not assign _val unless I am mistaken. I've also played all night and day with using phx::bind to manually push onto the back of _val, but got nowhere. I've further verified that the make_shared_ provided by sehe in another answer is in fact lazy for std::shared_ptr.
As an aside, I have also struggled with getting the result of an unencountered_symbol_ to add to encountered_variables so as to enforce uniqueness of variable names...
The problem seems to be the propagation of the result of the new_variable_ rule onto the desired vector of shared pointers in the genericvargp_ rule.
This declaration
qi::rule<Iterator, std::shared_ptr<std::string()> > new_variable_;
Doesn't match the desired type:
qi::rule<Iterator, std::shared_ptr<std::string>() > new_variable_;
Sadly, in old SpiritV2 this attribute is silently ignored and no attribute propagation is done. This also explains why it didn't error out on compile time.
Live On Coliru
#define BOOST_SPIRIT_USE_PHOENIX_V3 1
#define BOOST_SPIRIT_DEBUG 1
#include <boost/spirit/include/qi_core.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <iostream>
#include <string>
#include <boost/spirit/include/phoenix_stl.hpp>
#include <boost/phoenix/bind/bind_member_function.hpp>
#include <boost/spirit/include/phoenix_fusion.hpp>
// this solution for lazy make shared comes from the SO forum, user sehe.
// https://stackoverflow.com/questions/21516201/how-to-create-boost-phoenix-make-shared
// post found using google search terms `phoenix construct shared_ptr`
// changed from boost::shared_ptr to std::shared_ptr
namespace {
template <typename T> struct make_shared_f {
template <typename... A> struct result { typedef std::shared_ptr<T> type; };
template <typename... A> typename result<A...>::type operator()(A &&... a) const {
return std::make_shared<T>(std::forward<A>(a)...);
}
};
template <typename T> using make_shared_ = boost::phoenix::function<make_shared_f<T> >;
}
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
template <typename Iterator, typename Skipper = ascii::space_type>
struct SystemParser : qi::grammar<Iterator, std::vector<std::shared_ptr<std::string> >(), Skipper> {
SystemParser() : SystemParser::base_type(variable_group_) {
namespace phx = boost::phoenix;
using qi::_1;
using qi::_val;
using qi::eps;
using qi::lit;
var_counter = 0;
declarative_symbols.add("variable_group", 0);
variable_group_ = "variable_group" > genericvargp_ > ';';
genericvargp_ = new_variable_ % ','; //
new_variable_ = unencountered_symbol_ [_val = make_shared_<std::string>()(_1)];
unencountered_symbol_ = valid_variable_name_ - (encountered_variables | declarative_symbols);
valid_variable_name_ = +qi::alpha >> *(qi::alnum | qi::char_("[]_"));
BOOST_SPIRIT_DEBUG_NODES((variable_group_) (valid_variable_name_) (unencountered_symbol_) (new_variable_) (genericvargp_))
}
// rule declarations. these are member variables for the parser.
qi::rule<Iterator, std::vector<std::shared_ptr<std::string> >(), Skipper> variable_group_;
qi::rule<Iterator, std::vector<std::shared_ptr<std::string> >(), Skipper> genericvargp_;
qi::rule<Iterator, std::shared_ptr<std::string>() > new_variable_;
qi::rule<Iterator, std::string()> unencountered_symbol_;
qi::rule<Iterator, std::string()> valid_variable_name_;
unsigned var_counter;
qi::symbols<char, qi::unused_type> encountered_variables;
qi::symbols<char, qi::unused_type> declarative_symbols;
};
int main()
{
std::vector<std::shared_ptr<std::string> > V;
std::string str = "variable_group x, y, z; ";
std::string::const_iterator iter = str.begin();
std::string::const_iterator end = str.end();
SystemParser<std::string::const_iterator> S;
bool s = phrase_parse(iter, end, S, boost::spirit::ascii::space, V);
if (s)
{
std::cout << "Parse succeeded: " << V.size() << " variables\n";
for (auto& s : V)
std::cout << " - '" << *s << "'\n";
}
else
std::cout << "Parse failed\n";
if (iter!=end)
std::cout << "Remaining unparsed: '" << std::string(iter, end) << "'\n";
}
Prints
Parse succeeded: 3 variables
- 'x'
- 'y'
- 'z'
As well as a lot of debug information
Related
I want to parse a function (with an arbitrary name and an arbitrary numbers af arguments) in this form:
function(bye, 1, 3, 4, foo)
The arguments could be generic strings comma separated.
And I want to copy the name of the function and the arguments in a vector of strings.
like this
std::vector<std::string> F;
std::string fun = "function(bye, 1, 3, 4, foo)";
// The parser must produce this vector from the example
F[0] == "function"
F[1] == "1"
F[2] == "3"
F[3] == "4"
F[4] == "foo"
I've written the following code by after reading some tutorial but it does not work (In the sense that it not compile).
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_object.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>
#include <iostream>
#include <string>
namespace client
{
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
///////////////////////////////////////////////////////////////////////////////
template <typename Iterator>
struct command_parser : qi::grammar<Iterator, std::vector<std::string>(), ascii::space_type>
{
command_parser() : command_parser::base_type(start)
{
using qi::int_;
using qi::lit;
using qi::double_;
using qi::lexeme;
using ascii::char_;
fn_name = +qi::char_("a-zA-Z");
string = +qi::char_("a-zA-Z_0-9");
rec = *( lit(",") >> string );
start %= fn_name >> lit("(") >> string >> rec >> lit(")") ;
}
qi::rule<Iterator, std::string(), ascii::space_type> fn_name;
qi::rule<Iterator, std::string(), ascii::space_type> string;
qi::rule<Iterator, std::string(), ascii::space_type> rec;
qi::rule<Iterator, std::vector<std::string>, ascii::space_type> start;
};
}
////////////////////////////////////////////////////////////////////////////
// Main program
////////////////////////////////////////////////////////////////////////////
int
main()
{
namespace qi = boost::spirit::qi;
std::cout << "/////////////////////////////////////////////////////////\n\n";
client::command_parser<std::string::iterator> CP;
std::string cmd("fun(1,2,3,4 , 5, foo) ");
std::vector<std::string> VV;
bool result = qi::parse(cmd.begin(), cmd.end(), CP, VV);
if (result) {
for ( auto sss : VV ){
std::cout << sss << std::endl;
}
} else {
std::cout << "Fail" << std::endl;
}
return 0 ;
}
Just for fun, here's my minimalist take on this grammar:
using CallList = std::vector<std::string>;
struct ParseError : std::runtime_error {
ParseError() : std::runtime_error("ParseError") {}
};
// The parse implementation
CallList parse_function_call(std::string const& fun) {
CallList elements;
using namespace boost::spirit::qi;
using It = decltype(begin(fun));
static const rule<It, std::string()> identifier = alpha >> +(alnum | char_('_'));
if (!phrase_parse(begin(fun), end(fun),
identifier >> '(' >> -(lexeme[+~char_(",)")] % ",") >> ')' >> eoi,
space, elements))
throw ParseError{};
return elements;
}
With a little bit of plumbing
// just for test output
using TestResult = std::variant<CallList, ParseError>;
// exceptions are equivalent
static constexpr bool operator==(ParseError const&, ParseError const&)
{ return true; }
static inline std::ostream& operator<<(std::ostream& os, TestResult const& tr) {
using namespace std;
if (holds_alternative<ParseError>(tr)) {
return os << "ParseError";
} else {
auto& list = get<CallList>(tr);
copy(begin(list), end(list), std::experimental::make_ostream_joiner(os << "{", ","));
return os << "}";
}
}
TestResult try_parse(std::string const& fun) {
try { return parse_function_call(fun); }
catch(ParseError const& e) { return e; }
}
Here's a test runner:
for (auto const& [input, expected]: {
Case("function(bye, 1, 3, 4, foo)", CallList{"function", "1", "3", "4", "foo"}),
{"liar(pants on fire)", CallList{"liar", "pants on fire"}},
{"liar('pants on fire')", CallList{"liar", "'pants on fire'"}},
{"nullary()", CallList{"nullary"}},
{"nullary( )", CallList{"nullary"}},
{"zerolength(a,,b)", ParseError{}},
{"zerolength(a, ,b)", ParseError{}},
{"noarglust", ParseError{}},
{"", ParseError{}},
{"()", ParseError{}},
{"1(invalidfunctionname)", ParseError{}},
{"foo(bar) BOGUS", ParseError{}},
})
{
auto const actual = try_parse(input);
bool const ok = (actual == expected);
cout << std::quoted(input) << ": " << (ok? "PASS":"FAIL") << "\n";
if (!ok) {
std::cout << " -- expected: " << expected << "\n";
std::cout << " -- actual: " << actual << "\n";
}
}
Which prints Live On Coliru
"function(bye, 1, 3, 4, foo)": FAIL
-- expected: {function,1,3,4,foo}
-- actual: {function,bye,1,3,4,foo}
"liar(pants on fire)": PASS
"liar('pants on fire')": PASS
"nullary()": PASS
"nullary( )": PASS
"zerolength(a,,b)": PASS
"zerolength(a, ,b)": PASS
"noarglust": PASS
"": PASS
"()": PASS
"1(invalidfunctionname)": PASS
"foo(bar) BOGUS": PASS
Note that your example test-case doesn't pass, but I think that was a mistake in the test case.
Full Listing
Live On Coliru
//#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <experimental/iterator>
#include <variant>
#include <iomanip>
using CallList = std::vector<std::string>;
struct ParseError : std::runtime_error {
ParseError() : std::runtime_error("ParseError") {}
};
// The parse implementation
CallList parse_function_call(std::string const& fun) {
CallList elements;
using namespace boost::spirit::qi;
using It = decltype(begin(fun));
static const rule<It, std::string()> identifier = alpha >> +(alnum | char_('_'));
if (!phrase_parse(begin(fun), end(fun),
identifier >> '(' >> -(lexeme[+~char_(",)")] % ",") >> ')' >> eoi,
space, elements))
throw ParseError{};
return elements;
}
// just for test output
using TestResult = std::variant<CallList, ParseError>;
// exceptions are equivalent
static constexpr bool operator==(ParseError const&, ParseError const&)
{ return true; }
static inline std::ostream& operator<<(std::ostream& os, TestResult const& tr) {
using namespace std;
if (holds_alternative<ParseError>(tr)) {
return os << "ParseError";
} else {
auto& list = get<CallList>(tr);
copy(begin(list), end(list), std::experimental::make_ostream_joiner(os << "{", ","));
return os << "}";
}
}
TestResult try_parse(std::string const& fun) {
try { return parse_function_call(fun); }
catch(ParseError const& e) { return e; }
}
int main() {
using namespace std;
using Case = pair<std::string, TestResult>;
for (auto const& [input, expected]: {
Case("function(bye, 1, 3, 4, foo)", CallList{"function", "1", "3", "4", "foo"}),
{"liar(pants on fire)", CallList{"liar", "pants on fire"}},
{"liar('pants on fire')", CallList{"liar", "'pants on fire'"}},
{"nullary()", CallList{"nullary"}},
{"nullary( )", CallList{"nullary"}},
{"zerolength(a,,b)", ParseError{}},
{"zerolength(a, ,b)", ParseError{}},
{"noarglust", ParseError{}},
{"", ParseError{}},
{"()", ParseError{}},
{"1(invalidfunctionname)", ParseError{}},
{"foo(bar) BOGUS", ParseError{}},
})
{
auto const actual = try_parse(input);
bool const ok = (actual == expected);
cout << std::quoted(input) << ": " << (ok? "PASS":"FAIL") << "\n";
if (!ok) {
std::cout << " -- expected: " << expected << "\n";
std::cout << " -- actual: " << actual << "\n";
}
}
}
I'm correcting my answer per suggestions made by #sehe. All the credit for these corrections go to him. I am referencing your line numbers below. So the first error is from spirit and it says:
incompatible_start_rule:
// If you see the assertion below failing then the start rule
// passed to the constructor of the grammar is not compatible with
// the grammar (i.e. it uses different template parameters).
The signature of the start parser does not match that of the parser deceleration.
22. struct command_parser : qi::grammar<Iterator, std::vector<std::string>(), ascii::space_type>
43. qi::rule<Iterator, std::vector<std::string>, ascii::space_type> start;
I googled this and could not find an explanation but using an object rather than a type is preferable. I did it the other way in my first answer. The proper fix is at line 43:
43. qi::rule<Iterator, std::vector<std::string>(), ascii::space_type> start;
The next spirit error is:
The rule was instantiated with a skipper type but you have not pass
any. Did you use parse instead of phrase_parse?");
So a phrase_parse is required with a skipper. Note that we need a skipper to pass along.
64. using qi::ascii::space;
65. bool result = qi::phrase_parse(cmd.begin(), cmd.end(), CP, space, VV);
Now it compiles and the output is:
fun
1
2345foo
I see that won't do and you are looking to stuff the vector with each of the passed parameters. So you need a rule that is compatible with your attribute and intention. The kleene operator working with a std::string will put all the data into one string. So use your attribute:
41. qi::rule<Iterator, std::vector<std::string>(), ascii::space_type> rec;``
Now as #sehe points out, the skipper with fn_name and string will just concatenate names with spaces and newlines. So don't use skippers there.
39. qi::rule<Iterator, std::string()> fn_name;
40. qi::rule<Iterator, std::string()> string;
The other error I made was to see the %= and call it a list operator. From here, it is a definition operator. I'm not sure why there are two but playing around, it seems you need to use %= with semantic action. Here is the corrected code:
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_object.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>
#include <iostream>
#include <string>
namespace client
{
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
template <typename Iterator>
struct command_parser : qi::grammar<Iterator, std::vector<std::string>(), ascii::space_type>
{
command_parser() : command_parser::base_type(start)
{
using qi::int_;
using qi::lit;
using qi::double_;
using qi::lexeme;
using ascii::char_;
fn_name = +qi::char_("a-zA-Z");
string = +qi::char_("a-zA-Z_0-9");
rec = *(lit(",") >> string);
start %= fn_name >> lit("(") >> string >> rec >> lit(")");
}
qi::rule<Iterator, std::string()> fn_name;
qi::rule<Iterator, std::string()> string;
qi::rule<Iterator, std::vector<std::string>(), ascii::space_type> rec;
qi::rule<Iterator, std::vector<std::string>(), ascii::space_type> start;
};
}
int main()
{
namespace qi = boost::spirit::qi;
client::command_parser<std::string::iterator> CP;
std::string cmd("function(1,2,3,4 , 5, foo) ");
std::vector<std::string> VV;
bool result = qi::phrase_parse(cmd.begin(), cmd.end(), CP, qi::ascii::space, VV);
if (result) {
for (auto sss : VV) {
std::cout << sss << std::endl;
}
}
else {
std::cout << "Fail" << std::endl;
}
return 0;
}
And here is an example using X3:
#include <boost/spirit/home/x3.hpp>
#include <iostream>
#include <vector>
//your attribute, could be more complex, might use namespace
using attr = std::vector<std::string>;
namespace parser {
namespace x3 = boost::spirit::x3;
const auto fn_name = +x3::char_("a-zA-Z");
const auto string = +x3::char_("a-zA-Z_0-9");
const auto start = x3::rule<struct _, attr>() = fn_name >> "(" >> string % ',' >> ")";
}
int main()
{
namespace x3 = boost::spirit::x3;
std::string cmd("fun(1,.2,3,4 , 5, foo) ");
attr VV;
auto it = cmd.begin();
bool result = phrase_parse(it, cmd.end(), parser::start, x3::space, VV);
if (result) {
for (auto sss : VV) {
std::cout << "-> " << sss << std::endl;
}
}
else
std::cout << "Fail at" << std::endl;
return 0;
}
I have the following working Qi code:
struct query_grammar
: public boost::spirit::qi::grammar<Iterator, string_map<std::string>()>
{
query_grammar() : query_grammar::base_type(query)
{
query = pair >> *(boost::spirit::qi::lit('&') >> pair);
pair = +qchar >> -(boost::spirit::qi::lit('=') >> +qchar);
qchar = ~boost::spirit::qi::char_("&=");
}
boost::spirit::qi::rule<Iterator, std::map<std::string,std::string>()> query;
boost::spirit::qi::rule<Iterator, std::map<std::string,std::string>::value_type()> pair;
boost::spirit::qi::rule<Iterator, char()> qchar;
};
I tried porting it to x3:
namespace x3 = boost::spirit::x3;
const x3::rule<class query_char_, char> query_char_ = "query_char";
const x3::rule<class string_pair_, std::map<std::string,std::string>::value_type> string_pair_ = "string_pair";
const x3::rule<class string_map_, std::map<std::string,std::string>> string_map_ = "string_map";
const auto query_char__def = ~boost::spirit::x3::char_("&=");
const auto string_pair__def = +query_char_ >> -(boost::spirit::x3::lit('=') >> +query_char_);
const auto string_map__def = string_pair_ >> *(boost::spirit::x3::lit('&') >> string_pair_);
BOOST_SPIRIT_DEFINE(string_map_)
BOOST_SPIRIT_DEFINE(string_pair_)
BOOST_SPIRIT_DEFINE(query_char_)
but I am getting the following error when trying to parse a string with string_map_ :
/usr/include/boost/spirit/home/x3/support/traits/move_to.hpp:209: erreur : no matching function for call to move_to(const char*&, const char*&, std::pair<std::__cxx11::basic_string<char>, std::__cxx11::basic_string<char> >&, boost::mpl::identity<boost::spirit::x3::traits::plain_attribute>::type)
detail::move_to(first, last, dest, typename attribute_category<Dest>::type());
~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
I saw this answer: Parsing pair of strings fails. Bad spirit x3 grammar and tried to make my string_pair raw but to no avail.
Edit:
this example code from the spirit examples does not compile either so I guess the problem is a bit deeper:
#include <boost/spirit/home/x3.hpp>
namespace x3 = boost::spirit::x3;
int main()
{
std::string input( "cosmic pizza " );
auto iter = input.begin();
auto end_iter = input.end();
std::pair<std::string, std::string> result;
x3::parse( iter, end_iter, *(~x3::char_(' ')) >> ' ' >> *x3::char_, result);
}
Qi Fixes
First off, I had to fix the rule declaration with the Qi variant before it could work:
qi::rule<Iterator, std::pair<std::string,std::string>()> pair;
For the simple reason that value_type has pair<key_type const, mapped_type> which is never assignable.
Here's a Qi SSCCE:
Live On Coliru
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/adapted/std_pair.hpp>
#include <map>
namespace qi = boost::spirit::qi;
template <typename T> using string_map = std::map<T, T>;
template <typename Iterator>
struct query_grammar : public qi::grammar<Iterator, string_map<std::string>()>
{
query_grammar() : query_grammar::base_type(query)
{
qchar = ~qi::char_("&=");
pair = +qchar >> -(qi::lit('=') >> +qchar);
query = pair >> *(qi::lit('&') >> pair);
}
private:
qi::rule<Iterator, std::map<std::string,std::string>()> query;
qi::rule<Iterator, std::pair<std::string,std::string>()> pair;
qi::rule<Iterator, char()> qchar;
};
int main() {
using It = std::string::const_iterator;
for (std::string const input : { "foo=bar&baz=boo" })
{
std::cout << "======= " << input << "\n";
It f = input.begin(), l = input.end();
string_map<std::string> sm;
if (parse(f, l, query_grammar<It>{}, sm)) {
std::cout << "Parsed " << sm.size() << " pairs\n";
} else {
std::cout << "Parse failed\n";
}
if (f != l)
std::cout << "Remaining unparsed: '" << std::string(f,l) << "'\n";
}
}
Prints
======= foo=bar&baz=boo
Parsed 2 pairs
Qi Improvements
The following simpler grammar seems better:
Live On Coliru
template <typename Iterator, typename T = std::string>
struct query_grammar : public qi::grammar<Iterator, string_map<T>()>
{
query_grammar() : query_grammar::base_type(query) {
using namespace qi;
pair = +~char_("&=") >> '=' >> *~char_("&");
query = pair % '&';
}
private:
qi::rule<Iterator, std::pair<T,T>()> pair;
qi::rule<Iterator, std::map<T,T>()> query;
};
It accepts empty values (e.g. &q=&x=) and values containing additional =: &q=7==8&rt=bool. It could be significantly more efficient (untested).
X3 version
Without looking at your code, I translated it directly into an X3 version:
Live On Coliru
#include <boost/spirit/home/x3.hpp>
#include <boost/fusion/adapted/std_pair.hpp>
#include <iostream>
#include <map>
namespace x3 = boost::spirit::x3;
template <typename T> using string_map = std::map<T, T>;
namespace grammar {
using namespace x3;
auto pair = +~char_("&=") >> '=' >> *~char_("&");
auto query = pair % '&';
}
int main() {
using It = std::string::const_iterator;
for (std::string const input : { "foo=bar&baz=boo" })
{
std::cout << "======= " << input << "\n";
It f = input.begin(), l = input.end();
string_map<std::string> sm;
if (parse(f, l, grammar::query, sm)) {
std::cout << "Parsed " << sm.size() << " pairs\n";
} else {
std::cout << "Parse failed\n";
}
if (f != l)
std::cout << "Remaining unparsed: '" << std::string(f,l) << "'\n";
}
}
Which, obviously ( --- ) prints
======= foo=bar&baz=boo
Parsed 2 pairs
X3 Improvements
You should probably want to coerce the attribute types for the rules because automatic attribute propagation can have surprising heuristics.
namespace grammar {
template <typename T = std::string> auto& query() {
using namespace x3;
static const auto s_pair
= rule<struct pair_, std::pair<T, T> > {"pair"}
= +~char_("&=") >> -('=' >> *~char_("&"));
static const auto s_query
= rule<struct query_, std::map<T, T> > {"query"}
= s_pair % '&';
return s_query;
};
}
See it Live On Coliru
What Went wrong?
The X3 version suffered the same problem with const key type in std::map<>::value_type
How can I prevent the Boost Spirit Symbol parser from accepting a keyword (symbol) when starts with a valid keyword (symbol). I would like the construct to fail parsing ‘ONEMORE’ as a whole and not succeed in parsing ‘ONE’ because that is a valid keyword and then fail on ‘MORE”.
Here is the actual output of the code below:
Keyword as a number: 1
Keyword as a number: 2
Keyword as a number: 1
Invalid keyword: MORETHREE
And this is what I like it to be:
Keyword as a number: 1
Keyword as a number: 2
Invalid keyword: ONEMORE
Keyword as a number: 3
The code is just a sample to get the point across.
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <iostream>
#include <string>
using namespace std;
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
void printNumber( unsigned u )
{
cout << "Keyword as a number: " << u << endl;
}
void printInvalidKeyword( const string &s )
{
cout << "Invalid keyword: " << s << endl;
}
template <typename Iterator>
struct keyword_parser : qi::grammar<Iterator, ascii::space_type>
{
struct mySymbols_ : qi::symbols<char, unsigned>
{
mySymbols_()
{
add
("ONE" , 1)
("TWO" , 2)
("THREE" , 2)
;
}
} mySymbols;
keyword_parser() : keyword_parser::base_type(start)
{
using qi::_1;
using qi::raw;
using ascii::char_;
start %= *(
mySymbols[&printNumber]
|
invalid[&printInvalidKeyword]
);
invalid = +char_;
}
qi::rule<Iterator, ascii::space_type> start;
qi::rule<Iterator, std::string(), ascii::space_type> invalid;
};
int main()
{
using boost::spirit::ascii::space;
typedef std::string::const_iterator iterator_type;
typedef keyword_parser<iterator_type> keyword_parser;
std::string s = "ONE TWO ONEMORE THREE";
iterator_type b = s.begin();
iterator_type e = s.end();
phrase_parse(b, e, keyword_parser(), space);
return 0;
}
Look at qi::repository::distinct or take some measures yourself:
start %= *(
keyword [cout << val("Keyword as a number: ") << _1 << endl]
| invalid [cout << val("Invalid keyword: ") << _1 << endl]
);
keyword = mySymbols >> !(char_("a-zA-Z0-9_"));
invalid = +ascii::graph;
The rules being declared as
qi::rule<Iterator, ascii::space_type> start;
// lexemes do not ignore embedded skippables
qi::rule<Iterator, int()> keyword;
qi::rule<Iterator, std::string()> invalid;
See it Live On Coliru
Prints:
Keyword as a number: 1
Keyword as a number: 2
Invalid keyword: ONEMORE
Keyword as a number: 2
Full source:
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <iostream>
#include <string>
using namespace std;
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;
namespace ascii = boost::spirit::ascii;
template <typename Iterator>
struct keyword_parser : qi::grammar<Iterator, ascii::space_type>
{
struct mySymbols_ : qi::symbols<char, unsigned>
{
mySymbols_()
{
add
("ONE" , 1)
("TWO" , 2)
("THREE" , 2)
;
}
} mySymbols;
keyword_parser() : keyword_parser::base_type(start)
{
using qi::_1;
using ascii::char_;
using phx::val;
start %= *(
keyword [cout << val("Keyword as a number: ") << _1 << endl]
| invalid [cout << val("Invalid keyword: ") << _1 << endl]
);
keyword = mySymbols >> !(char_("a-zA-Z0-9_"));
invalid = +ascii::graph;
}
qi::rule<Iterator, ascii::space_type> start;
// lexemes do not ignore embedded skippables
qi::rule<Iterator, int()> keyword;
qi::rule<Iterator, std::string()/*IMPLICIT LEXEME:, ascii::space_type*/> invalid;
};
int main()
{
using boost::spirit::ascii::space;
typedef std::string::const_iterator iterator_type;
typedef keyword_parser<iterator_type> keyword_parser;
std::string s = "ONE TWO ONEMORE THREE";
iterator_type b = s.begin();
iterator_type e = s.end();
phrase_parse(b, e, keyword_parser(), space);
return 0;
}
I started to learn Boost.Spirit and finish reading Qi - Writing Parsers section. When reading, everything is easy and understandable. But when I try to do something, there are a lot of errors, because there are too many includes and namespaces and I need to know when to include/use them. As the practice, I want to write simple INI parser.
Here is the code (includes are from one of examples inside Spirit lib as almost everything else):
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_stl.hpp>
#include <boost/fusion/adapted/std_pair.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/spirit/include/phoenix_object.hpp>
#include <iostream>
#include <string>
#include <vector>
#include <map>
namespace client
{
typedef std::map<std::string, std::string> key_value_map_t;
struct mini_ini
{
std::string name;
key_value_map_t key_values_map;
};
} // client
BOOST_FUSION_ADAPT_STRUCT(
client::mini_ini,
(std::string, name)
(client::key_value_map_t, key_values_map)
)
namespace client
{
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace phoenix = boost::phoenix;
template <typename Iterator>
struct ini_grammar : qi::grammar<Iterator, mini_ini(), ascii::space_type>
{
ini_grammar() : ini_grammar::base_type(section_, "section")
{
using qi::char_;
using qi::on_error;
using qi::fail;
using namespace qi::labels;
using phoenix::construct;
using phoenix::val;
key_ = +char_("a-zA-Z_0-9");
pair_ = key_ >> '=' >> *char_;
section_ = '[' >> key_ >> ']' >> '\n' >> *(pair_ >> '\n');
key_.name("key");
pair_.name("pair");
section_.name("section");
on_error<fail>
(
section_
, std::cout
<< val("Error! Expecting ")
<< _4 // what failed?
<< val(" here: \"")
<< construct<std::string>(_3, _2) // iterators to error-pos, end
<< val("\"")
<< std::endl
);
}
qi::rule<Iterator, std::string(), ascii::space_type> key_;
qi::rule<Iterator, mini_ini(), ascii::space_type> section_;
qi::rule<Iterator, std::pair<std::string, std::string>(), ascii::space_type> pair_;
};
} // client
int
main()
{
std::string storage =
"[section]\n"
"key1=val1\n"
"key2=val2\n";
client::mini_ini ini;
typedef client::ini_grammar<std::string::const_iterator> ini_grammar;
ini_grammar grammar;
using boost::spirit::ascii::space;
std::string::const_iterator iter = storage.begin();
std::string::const_iterator end = storage.end();
bool r = phrase_parse(iter, end, grammar, space, ini);
if (r && iter == end)
{
std::cout << "-------------------------\n";
std::cout << "Parsing succeeded\n";
std::cout << "-------------------------\n";
return 0;
}
else
{
std::cout << "-------------------------\n";
std::cout << "Parsing failed\n";
std::cout << "-------------------------\n";
std::cout << std::string(iter, end) << "\n";
return 1;
}
return 0;
}
As u can see I want to parse next text into mini_ini struct:
"[section]"
"key1=val1"
"key2=val2";
I have the fail and std::string(iter, end) is full input string.
My questions:
Why I see fail but don't see on_error<fail> handler ?
Have you any recommendations how to learn Boost.Spirit (I have good understanding of documentation in theory, but in practice I have a lot of WHY ???) ?
Thanks
Q. Why I see fail but don't see on_error handler
The on_error handler is only fired for the registered rule (section_) and if an expectation point is failed.
Your grammar doesn't contain expectation points (only >> are used, not >).
Q. Have you any recommendations how to learn Boost.Spirit (I have good understanding of documentation in theory, but in practice I have a lot of WHY ???) ?
Just build the parsers you need. Copy good conventions from the docs and SO answers. There are a lot of them. As you have seen, quite a number contain full examples of Ini parsers with varying levels of error reporting too.
Bonus hints:
Do more detailed status reporting:
bool ok = phrase_parse(iter, end, grammar, space, ini);
if (ok) {
std::cout << "Parse success\n";
} else {
std::cout << "Parse failure\n";
}
if (iter != end) {
std::cout << "Remaining unparsed: '" << std::string(iter, end) << "'\n";
}
return ok && (iter==end)? 0 : 1;
Use BOOST_SPIRIT_DEBUG:
#define BOOST_SPIRIT_DEBUG
// and later
BOOST_SPIRIT_DEBUG_NODES((key_)(pair_)(section_))
Prints:
<section_>
<try>[section]\nkey1=val1\n</try>
<key_>
<try>section]\nkey1=val1\nk</try>
<success>]\nkey1=val1\nkey2=val</success>
<attributes>[[s, e, c, t, i, o, n]]</attributes>
</key_>
<fail/>
</section_>
Parse failure
Remaining unparsed: '[section]
key1=val1
key2=val2
'
You'll notice that the section header isn't parsed because the newline is not matched. Your skipper (space_type) skips the newline, hence it will never match: Boost spirit skipper issues
Fix skipper
When using blank_type as the skipper you'll get a successful parse:
<section_>
<try>[section]\nkey1=val1\n</try>
<key_>
<try>section]\nkey1=val1\nk</try>
<success>]\nkey1=val1\nkey2=val</success>
<attributes>[[s, e, c, t, i, o, n]]</attributes>
</key_>
<pair_>
<try>key1=val1\nkey2=val2\n</try>
<key_>
<try>key1=val1\nkey2=val2\n</try>
<success>=val1\nkey2=val2\n</success>
<attributes>[[k, e, y, 1]]</attributes>
</key_>
<success></success>
<attributes>[[[k, e, y, 1], [v, a, l, 1,
, k, e, y, 2, =, v, a, l, 2,
]]]</attributes>
</pair_>
<success>key1=val1\nkey2=val2\n</success>
<attributes>[[[s, e, c, t, i, o, n], []]]</attributes>
</section_>
Parse success
Remaining unparsed: 'key1=val1
key2=val2
NOTE: The parse succeeds but doesn't do what you want. This is because *char_ includes newlines. So make that
pair_ = key_ >> '=' >> *(char_ - qi::eol); // or
pair_ = key_ >> '=' >> *~char_("\r\n"); // etc
Full code
Live On Coliru
#define BOOST_SPIRIT_DEBUG
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_stl.hpp>
#include <boost/fusion/adapted/std_pair.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/spirit/include/phoenix_object.hpp>
#include <iostream>
#include <string>
#include <vector>
#include <map>
namespace client
{
typedef std::map<std::string, std::string> key_value_map_t;
struct mini_ini
{
std::string name;
key_value_map_t key_values_map;
};
} // client
BOOST_FUSION_ADAPT_STRUCT(
client::mini_ini,
(std::string, name)
(client::key_value_map_t, key_values_map)
)
namespace client
{
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace phoenix = boost::phoenix;
template <typename Iterator>
struct ini_grammar : qi::grammar<Iterator, mini_ini(), ascii::blank_type>
{
ini_grammar() : ini_grammar::base_type(section_, "section")
{
using qi::char_;
using qi::on_error;
using qi::fail;
using namespace qi::labels;
using phoenix::construct;
using phoenix::val;
key_ = +char_("a-zA-Z_0-9");
pair_ = key_ >> '=' >> *char_;
section_ = '[' >> key_ >> ']' >> '\n' >> *(pair_ >> '\n');
BOOST_SPIRIT_DEBUG_NODES((key_)(pair_)(section_))
on_error<fail>
(
section_
, std::cout
<< val("Error! Expecting ")
<< _4 // what failed?
<< val(" here: \"")
<< construct<std::string>(_3, _2) // iterators to error-pos, end
<< val("\"")
<< std::endl
);
}
qi::rule<Iterator, std::string(), ascii::blank_type> key_;
qi::rule<Iterator, mini_ini(), ascii::blank_type> section_;
qi::rule<Iterator, std::pair<std::string, std::string>(), ascii::blank_type> pair_;
};
} // client
int
main()
{
std::string storage =
"[section]\n"
"key1=val1\n"
"key2=val2\n";
client::mini_ini ini;
typedef client::ini_grammar<std::string::const_iterator> ini_grammar;
ini_grammar grammar;
using boost::spirit::ascii::blank;
std::string::const_iterator iter = storage.begin();
std::string::const_iterator end = storage.end();
bool ok = phrase_parse(iter, end, grammar, blank, ini);
if (ok) {
std::cout << "Parse success\n";
} else {
std::cout << "Parse failure\n";
}
if (iter != end) {
std::cout << "Remaining unparsed: '" << std::string(iter, end) << "'\n";
}
return ok && (iter==end)? 0 : 1;
}
How can I prevent the Boost Spirit Symbol parser from accepting a keyword (symbol) when starts with a valid keyword (symbol). I would like the construct to fail parsing ‘ONEMORE’ as a whole and not succeed in parsing ‘ONE’ because that is a valid keyword and then fail on ‘MORE”.
Here is the actual output of the code below:
Keyword as a number: 1
Keyword as a number: 2
Keyword as a number: 1
Invalid keyword: MORETHREE
And this is what I like it to be:
Keyword as a number: 1
Keyword as a number: 2
Invalid keyword: ONEMORE
Keyword as a number: 3
The code is just a sample to get the point across.
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <iostream>
#include <string>
using namespace std;
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
void printNumber( unsigned u )
{
cout << "Keyword as a number: " << u << endl;
}
void printInvalidKeyword( const string &s )
{
cout << "Invalid keyword: " << s << endl;
}
template <typename Iterator>
struct keyword_parser : qi::grammar<Iterator, ascii::space_type>
{
struct mySymbols_ : qi::symbols<char, unsigned>
{
mySymbols_()
{
add
("ONE" , 1)
("TWO" , 2)
("THREE" , 2)
;
}
} mySymbols;
keyword_parser() : keyword_parser::base_type(start)
{
using qi::_1;
using qi::raw;
using ascii::char_;
start %= *(
mySymbols[&printNumber]
|
invalid[&printInvalidKeyword]
);
invalid = +char_;
}
qi::rule<Iterator, ascii::space_type> start;
qi::rule<Iterator, std::string(), ascii::space_type> invalid;
};
int main()
{
using boost::spirit::ascii::space;
typedef std::string::const_iterator iterator_type;
typedef keyword_parser<iterator_type> keyword_parser;
std::string s = "ONE TWO ONEMORE THREE";
iterator_type b = s.begin();
iterator_type e = s.end();
phrase_parse(b, e, keyword_parser(), space);
return 0;
}
Look at qi::repository::distinct or take some measures yourself:
start %= *(
keyword [cout << val("Keyword as a number: ") << _1 << endl]
| invalid [cout << val("Invalid keyword: ") << _1 << endl]
);
keyword = mySymbols >> !(char_("a-zA-Z0-9_"));
invalid = +ascii::graph;
The rules being declared as
qi::rule<Iterator, ascii::space_type> start;
// lexemes do not ignore embedded skippables
qi::rule<Iterator, int()> keyword;
qi::rule<Iterator, std::string()> invalid;
See it Live On Coliru
Prints:
Keyword as a number: 1
Keyword as a number: 2
Invalid keyword: ONEMORE
Keyword as a number: 2
Full source:
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <iostream>
#include <string>
using namespace std;
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;
namespace ascii = boost::spirit::ascii;
template <typename Iterator>
struct keyword_parser : qi::grammar<Iterator, ascii::space_type>
{
struct mySymbols_ : qi::symbols<char, unsigned>
{
mySymbols_()
{
add
("ONE" , 1)
("TWO" , 2)
("THREE" , 2)
;
}
} mySymbols;
keyword_parser() : keyword_parser::base_type(start)
{
using qi::_1;
using ascii::char_;
using phx::val;
start %= *(
keyword [cout << val("Keyword as a number: ") << _1 << endl]
| invalid [cout << val("Invalid keyword: ") << _1 << endl]
);
keyword = mySymbols >> !(char_("a-zA-Z0-9_"));
invalid = +ascii::graph;
}
qi::rule<Iterator, ascii::space_type> start;
// lexemes do not ignore embedded skippables
qi::rule<Iterator, int()> keyword;
qi::rule<Iterator, std::string()/*IMPLICIT LEXEME:, ascii::space_type*/> invalid;
};
int main()
{
using boost::spirit::ascii::space;
typedef std::string::const_iterator iterator_type;
typedef keyword_parser<iterator_type> keyword_parser;
std::string s = "ONE TWO ONEMORE THREE";
iterator_type b = s.begin();
iterator_type e = s.end();
phrase_parse(b, e, keyword_parser(), space);
return 0;
}