I have the following working Qi code:
struct query_grammar
: public boost::spirit::qi::grammar<Iterator, string_map<std::string>()>
{
query_grammar() : query_grammar::base_type(query)
{
query = pair >> *(boost::spirit::qi::lit('&') >> pair);
pair = +qchar >> -(boost::spirit::qi::lit('=') >> +qchar);
qchar = ~boost::spirit::qi::char_("&=");
}
boost::spirit::qi::rule<Iterator, std::map<std::string,std::string>()> query;
boost::spirit::qi::rule<Iterator, std::map<std::string,std::string>::value_type()> pair;
boost::spirit::qi::rule<Iterator, char()> qchar;
};
I tried porting it to x3:
namespace x3 = boost::spirit::x3;
const x3::rule<class query_char_, char> query_char_ = "query_char";
const x3::rule<class string_pair_, std::map<std::string,std::string>::value_type> string_pair_ = "string_pair";
const x3::rule<class string_map_, std::map<std::string,std::string>> string_map_ = "string_map";
const auto query_char__def = ~boost::spirit::x3::char_("&=");
const auto string_pair__def = +query_char_ >> -(boost::spirit::x3::lit('=') >> +query_char_);
const auto string_map__def = string_pair_ >> *(boost::spirit::x3::lit('&') >> string_pair_);
BOOST_SPIRIT_DEFINE(string_map_)
BOOST_SPIRIT_DEFINE(string_pair_)
BOOST_SPIRIT_DEFINE(query_char_)
but I am getting the following error when trying to parse a string with string_map_ :
/usr/include/boost/spirit/home/x3/support/traits/move_to.hpp:209: erreur : no matching function for call to move_to(const char*&, const char*&, std::pair<std::__cxx11::basic_string<char>, std::__cxx11::basic_string<char> >&, boost::mpl::identity<boost::spirit::x3::traits::plain_attribute>::type)
detail::move_to(first, last, dest, typename attribute_category<Dest>::type());
~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
I saw this answer: Parsing pair of strings fails. Bad spirit x3 grammar and tried to make my string_pair raw but to no avail.
Edit:
this example code from the spirit examples does not compile either so I guess the problem is a bit deeper:
#include <boost/spirit/home/x3.hpp>
namespace x3 = boost::spirit::x3;
int main()
{
std::string input( "cosmic pizza " );
auto iter = input.begin();
auto end_iter = input.end();
std::pair<std::string, std::string> result;
x3::parse( iter, end_iter, *(~x3::char_(' ')) >> ' ' >> *x3::char_, result);
}
Qi Fixes
First off, I had to fix the rule declaration with the Qi variant before it could work:
qi::rule<Iterator, std::pair<std::string,std::string>()> pair;
For the simple reason that value_type has pair<key_type const, mapped_type> which is never assignable.
Here's a Qi SSCCE:
Live On Coliru
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/adapted/std_pair.hpp>
#include <map>
namespace qi = boost::spirit::qi;
template <typename T> using string_map = std::map<T, T>;
template <typename Iterator>
struct query_grammar : public qi::grammar<Iterator, string_map<std::string>()>
{
query_grammar() : query_grammar::base_type(query)
{
qchar = ~qi::char_("&=");
pair = +qchar >> -(qi::lit('=') >> +qchar);
query = pair >> *(qi::lit('&') >> pair);
}
private:
qi::rule<Iterator, std::map<std::string,std::string>()> query;
qi::rule<Iterator, std::pair<std::string,std::string>()> pair;
qi::rule<Iterator, char()> qchar;
};
int main() {
using It = std::string::const_iterator;
for (std::string const input : { "foo=bar&baz=boo" })
{
std::cout << "======= " << input << "\n";
It f = input.begin(), l = input.end();
string_map<std::string> sm;
if (parse(f, l, query_grammar<It>{}, sm)) {
std::cout << "Parsed " << sm.size() << " pairs\n";
} else {
std::cout << "Parse failed\n";
}
if (f != l)
std::cout << "Remaining unparsed: '" << std::string(f,l) << "'\n";
}
}
Prints
======= foo=bar&baz=boo
Parsed 2 pairs
Qi Improvements
The following simpler grammar seems better:
Live On Coliru
template <typename Iterator, typename T = std::string>
struct query_grammar : public qi::grammar<Iterator, string_map<T>()>
{
query_grammar() : query_grammar::base_type(query) {
using namespace qi;
pair = +~char_("&=") >> '=' >> *~char_("&");
query = pair % '&';
}
private:
qi::rule<Iterator, std::pair<T,T>()> pair;
qi::rule<Iterator, std::map<T,T>()> query;
};
It accepts empty values (e.g. &q=&x=) and values containing additional =: &q=7==8&rt=bool. It could be significantly more efficient (untested).
X3 version
Without looking at your code, I translated it directly into an X3 version:
Live On Coliru
#include <boost/spirit/home/x3.hpp>
#include <boost/fusion/adapted/std_pair.hpp>
#include <iostream>
#include <map>
namespace x3 = boost::spirit::x3;
template <typename T> using string_map = std::map<T, T>;
namespace grammar {
using namespace x3;
auto pair = +~char_("&=") >> '=' >> *~char_("&");
auto query = pair % '&';
}
int main() {
using It = std::string::const_iterator;
for (std::string const input : { "foo=bar&baz=boo" })
{
std::cout << "======= " << input << "\n";
It f = input.begin(), l = input.end();
string_map<std::string> sm;
if (parse(f, l, grammar::query, sm)) {
std::cout << "Parsed " << sm.size() << " pairs\n";
} else {
std::cout << "Parse failed\n";
}
if (f != l)
std::cout << "Remaining unparsed: '" << std::string(f,l) << "'\n";
}
}
Which, obviously ( --- ) prints
======= foo=bar&baz=boo
Parsed 2 pairs
X3 Improvements
You should probably want to coerce the attribute types for the rules because automatic attribute propagation can have surprising heuristics.
namespace grammar {
template <typename T = std::string> auto& query() {
using namespace x3;
static const auto s_pair
= rule<struct pair_, std::pair<T, T> > {"pair"}
= +~char_("&=") >> -('=' >> *~char_("&"));
static const auto s_query
= rule<struct query_, std::map<T, T> > {"query"}
= s_pair % '&';
return s_query;
};
}
See it Live On Coliru
What Went wrong?
The X3 version suffered the same problem with const key type in std::map<>::value_type
Related
I want to parse a function (with an arbitrary name and an arbitrary numbers af arguments) in this form:
function(bye, 1, 3, 4, foo)
The arguments could be generic strings comma separated.
And I want to copy the name of the function and the arguments in a vector of strings.
like this
std::vector<std::string> F;
std::string fun = "function(bye, 1, 3, 4, foo)";
// The parser must produce this vector from the example
F[0] == "function"
F[1] == "1"
F[2] == "3"
F[3] == "4"
F[4] == "foo"
I've written the following code by after reading some tutorial but it does not work (In the sense that it not compile).
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_object.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>
#include <iostream>
#include <string>
namespace client
{
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
///////////////////////////////////////////////////////////////////////////////
template <typename Iterator>
struct command_parser : qi::grammar<Iterator, std::vector<std::string>(), ascii::space_type>
{
command_parser() : command_parser::base_type(start)
{
using qi::int_;
using qi::lit;
using qi::double_;
using qi::lexeme;
using ascii::char_;
fn_name = +qi::char_("a-zA-Z");
string = +qi::char_("a-zA-Z_0-9");
rec = *( lit(",") >> string );
start %= fn_name >> lit("(") >> string >> rec >> lit(")") ;
}
qi::rule<Iterator, std::string(), ascii::space_type> fn_name;
qi::rule<Iterator, std::string(), ascii::space_type> string;
qi::rule<Iterator, std::string(), ascii::space_type> rec;
qi::rule<Iterator, std::vector<std::string>, ascii::space_type> start;
};
}
////////////////////////////////////////////////////////////////////////////
// Main program
////////////////////////////////////////////////////////////////////////////
int
main()
{
namespace qi = boost::spirit::qi;
std::cout << "/////////////////////////////////////////////////////////\n\n";
client::command_parser<std::string::iterator> CP;
std::string cmd("fun(1,2,3,4 , 5, foo) ");
std::vector<std::string> VV;
bool result = qi::parse(cmd.begin(), cmd.end(), CP, VV);
if (result) {
for ( auto sss : VV ){
std::cout << sss << std::endl;
}
} else {
std::cout << "Fail" << std::endl;
}
return 0 ;
}
Just for fun, here's my minimalist take on this grammar:
using CallList = std::vector<std::string>;
struct ParseError : std::runtime_error {
ParseError() : std::runtime_error("ParseError") {}
};
// The parse implementation
CallList parse_function_call(std::string const& fun) {
CallList elements;
using namespace boost::spirit::qi;
using It = decltype(begin(fun));
static const rule<It, std::string()> identifier = alpha >> +(alnum | char_('_'));
if (!phrase_parse(begin(fun), end(fun),
identifier >> '(' >> -(lexeme[+~char_(",)")] % ",") >> ')' >> eoi,
space, elements))
throw ParseError{};
return elements;
}
With a little bit of plumbing
// just for test output
using TestResult = std::variant<CallList, ParseError>;
// exceptions are equivalent
static constexpr bool operator==(ParseError const&, ParseError const&)
{ return true; }
static inline std::ostream& operator<<(std::ostream& os, TestResult const& tr) {
using namespace std;
if (holds_alternative<ParseError>(tr)) {
return os << "ParseError";
} else {
auto& list = get<CallList>(tr);
copy(begin(list), end(list), std::experimental::make_ostream_joiner(os << "{", ","));
return os << "}";
}
}
TestResult try_parse(std::string const& fun) {
try { return parse_function_call(fun); }
catch(ParseError const& e) { return e; }
}
Here's a test runner:
for (auto const& [input, expected]: {
Case("function(bye, 1, 3, 4, foo)", CallList{"function", "1", "3", "4", "foo"}),
{"liar(pants on fire)", CallList{"liar", "pants on fire"}},
{"liar('pants on fire')", CallList{"liar", "'pants on fire'"}},
{"nullary()", CallList{"nullary"}},
{"nullary( )", CallList{"nullary"}},
{"zerolength(a,,b)", ParseError{}},
{"zerolength(a, ,b)", ParseError{}},
{"noarglust", ParseError{}},
{"", ParseError{}},
{"()", ParseError{}},
{"1(invalidfunctionname)", ParseError{}},
{"foo(bar) BOGUS", ParseError{}},
})
{
auto const actual = try_parse(input);
bool const ok = (actual == expected);
cout << std::quoted(input) << ": " << (ok? "PASS":"FAIL") << "\n";
if (!ok) {
std::cout << " -- expected: " << expected << "\n";
std::cout << " -- actual: " << actual << "\n";
}
}
Which prints Live On Coliru
"function(bye, 1, 3, 4, foo)": FAIL
-- expected: {function,1,3,4,foo}
-- actual: {function,bye,1,3,4,foo}
"liar(pants on fire)": PASS
"liar('pants on fire')": PASS
"nullary()": PASS
"nullary( )": PASS
"zerolength(a,,b)": PASS
"zerolength(a, ,b)": PASS
"noarglust": PASS
"": PASS
"()": PASS
"1(invalidfunctionname)": PASS
"foo(bar) BOGUS": PASS
Note that your example test-case doesn't pass, but I think that was a mistake in the test case.
Full Listing
Live On Coliru
//#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <experimental/iterator>
#include <variant>
#include <iomanip>
using CallList = std::vector<std::string>;
struct ParseError : std::runtime_error {
ParseError() : std::runtime_error("ParseError") {}
};
// The parse implementation
CallList parse_function_call(std::string const& fun) {
CallList elements;
using namespace boost::spirit::qi;
using It = decltype(begin(fun));
static const rule<It, std::string()> identifier = alpha >> +(alnum | char_('_'));
if (!phrase_parse(begin(fun), end(fun),
identifier >> '(' >> -(lexeme[+~char_(",)")] % ",") >> ')' >> eoi,
space, elements))
throw ParseError{};
return elements;
}
// just for test output
using TestResult = std::variant<CallList, ParseError>;
// exceptions are equivalent
static constexpr bool operator==(ParseError const&, ParseError const&)
{ return true; }
static inline std::ostream& operator<<(std::ostream& os, TestResult const& tr) {
using namespace std;
if (holds_alternative<ParseError>(tr)) {
return os << "ParseError";
} else {
auto& list = get<CallList>(tr);
copy(begin(list), end(list), std::experimental::make_ostream_joiner(os << "{", ","));
return os << "}";
}
}
TestResult try_parse(std::string const& fun) {
try { return parse_function_call(fun); }
catch(ParseError const& e) { return e; }
}
int main() {
using namespace std;
using Case = pair<std::string, TestResult>;
for (auto const& [input, expected]: {
Case("function(bye, 1, 3, 4, foo)", CallList{"function", "1", "3", "4", "foo"}),
{"liar(pants on fire)", CallList{"liar", "pants on fire"}},
{"liar('pants on fire')", CallList{"liar", "'pants on fire'"}},
{"nullary()", CallList{"nullary"}},
{"nullary( )", CallList{"nullary"}},
{"zerolength(a,,b)", ParseError{}},
{"zerolength(a, ,b)", ParseError{}},
{"noarglust", ParseError{}},
{"", ParseError{}},
{"()", ParseError{}},
{"1(invalidfunctionname)", ParseError{}},
{"foo(bar) BOGUS", ParseError{}},
})
{
auto const actual = try_parse(input);
bool const ok = (actual == expected);
cout << std::quoted(input) << ": " << (ok? "PASS":"FAIL") << "\n";
if (!ok) {
std::cout << " -- expected: " << expected << "\n";
std::cout << " -- actual: " << actual << "\n";
}
}
}
I'm correcting my answer per suggestions made by #sehe. All the credit for these corrections go to him. I am referencing your line numbers below. So the first error is from spirit and it says:
incompatible_start_rule:
// If you see the assertion below failing then the start rule
// passed to the constructor of the grammar is not compatible with
// the grammar (i.e. it uses different template parameters).
The signature of the start parser does not match that of the parser deceleration.
22. struct command_parser : qi::grammar<Iterator, std::vector<std::string>(), ascii::space_type>
43. qi::rule<Iterator, std::vector<std::string>, ascii::space_type> start;
I googled this and could not find an explanation but using an object rather than a type is preferable. I did it the other way in my first answer. The proper fix is at line 43:
43. qi::rule<Iterator, std::vector<std::string>(), ascii::space_type> start;
The next spirit error is:
The rule was instantiated with a skipper type but you have not pass
any. Did you use parse instead of phrase_parse?");
So a phrase_parse is required with a skipper. Note that we need a skipper to pass along.
64. using qi::ascii::space;
65. bool result = qi::phrase_parse(cmd.begin(), cmd.end(), CP, space, VV);
Now it compiles and the output is:
fun
1
2345foo
I see that won't do and you are looking to stuff the vector with each of the passed parameters. So you need a rule that is compatible with your attribute and intention. The kleene operator working with a std::string will put all the data into one string. So use your attribute:
41. qi::rule<Iterator, std::vector<std::string>(), ascii::space_type> rec;``
Now as #sehe points out, the skipper with fn_name and string will just concatenate names with spaces and newlines. So don't use skippers there.
39. qi::rule<Iterator, std::string()> fn_name;
40. qi::rule<Iterator, std::string()> string;
The other error I made was to see the %= and call it a list operator. From here, it is a definition operator. I'm not sure why there are two but playing around, it seems you need to use %= with semantic action. Here is the corrected code:
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_object.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>
#include <iostream>
#include <string>
namespace client
{
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
template <typename Iterator>
struct command_parser : qi::grammar<Iterator, std::vector<std::string>(), ascii::space_type>
{
command_parser() : command_parser::base_type(start)
{
using qi::int_;
using qi::lit;
using qi::double_;
using qi::lexeme;
using ascii::char_;
fn_name = +qi::char_("a-zA-Z");
string = +qi::char_("a-zA-Z_0-9");
rec = *(lit(",") >> string);
start %= fn_name >> lit("(") >> string >> rec >> lit(")");
}
qi::rule<Iterator, std::string()> fn_name;
qi::rule<Iterator, std::string()> string;
qi::rule<Iterator, std::vector<std::string>(), ascii::space_type> rec;
qi::rule<Iterator, std::vector<std::string>(), ascii::space_type> start;
};
}
int main()
{
namespace qi = boost::spirit::qi;
client::command_parser<std::string::iterator> CP;
std::string cmd("function(1,2,3,4 , 5, foo) ");
std::vector<std::string> VV;
bool result = qi::phrase_parse(cmd.begin(), cmd.end(), CP, qi::ascii::space, VV);
if (result) {
for (auto sss : VV) {
std::cout << sss << std::endl;
}
}
else {
std::cout << "Fail" << std::endl;
}
return 0;
}
And here is an example using X3:
#include <boost/spirit/home/x3.hpp>
#include <iostream>
#include <vector>
//your attribute, could be more complex, might use namespace
using attr = std::vector<std::string>;
namespace parser {
namespace x3 = boost::spirit::x3;
const auto fn_name = +x3::char_("a-zA-Z");
const auto string = +x3::char_("a-zA-Z_0-9");
const auto start = x3::rule<struct _, attr>() = fn_name >> "(" >> string % ',' >> ")";
}
int main()
{
namespace x3 = boost::spirit::x3;
std::string cmd("fun(1,.2,3,4 , 5, foo) ");
attr VV;
auto it = cmd.begin();
bool result = phrase_parse(it, cmd.end(), parser::start, x3::space, VV);
if (result) {
for (auto sss : VV) {
std::cout << "-> " << sss << std::endl;
}
}
else
std::cout << "Fail at" << std::endl;
return 0;
}
Here is an example of the syntax -- two groups of items:
I_name m_name parameter1=value parameter2=value
I_name m_name parameter1=value \
parameter2=value
My question is how to define the skip-type.
It is not just space_type but space_type minus newline.
But newline followed by backslash is a skip-type.
E.g.
I define name like that:
qi::rule<Iterator, std::string(), ascii::space_type> m_sName;
m_sName %= qi::lexeme[ascii::alpha >> *ascii::alnum];
This is obviously not correct, as the space_type must include newline-backslash.
The following grammar works for me.
*("\\\n" | ~qi::char_('\n')) % '\n'
It will ignore any newline after the backslash. And the following is a simple test.
#include <vector>
#include <string>
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#define BOOST_TEST_MODULE example
#include <boost/test/unit_test.hpp>
typedef std::vector<std::string> Lines;
inline auto ParseLines(std::string const& str) {
Lines lines;
namespace qi = boost::spirit::qi;
if (qi::parse(
str.begin(), str.end(),
*("\\\n" | ~qi::char_('\n')) % '\n',
lines)) {
return lines;
}
else {
throw std::invalid_argument("Parse error at ParseLines");
}
}
BOOST_AUTO_TEST_CASE(TestParseLines) {
std::string const str =
"I_name m_name parameter1=value parameter2=value\n"
"I_name m_name parameter1 = value \\\n"
"parameter2 = value";
Lines const expected{
"I_name m_name parameter1=value parameter2=value",
"I_name m_name parameter1 = value parameter2 = value"
};
BOOST_TEST(ParseLines(str) == expected);
}
You should use "-std=c++14 -lboost_unit_test_framework" for compilation. Anyway, it is easy to convert the code for c++03.
qi::blank is exactly that. It's qi::space without newlines.
You can do this too: ("\\\n" | qi::blank)
To be able to declare a rule with such a skipper, define a skipper grammar:
template <typename It>
struct my_skipper : qi::grammar<It> {
my_skipper() : my_skipper::base_type(start) {}
qi::rule<It> start = ("\\\n" | qi::blank);
};
Full Demo
Live On Coliru
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapted.hpp>
#include <map>
namespace qi = boost::spirit::qi;
namespace ast {
struct record {
std::string iname, mname;
std::map<std::string, std::string> params;
};
using records = std::vector<record>;
}
BOOST_FUSION_ADAPT_STRUCT(ast::record, iname, mname, params)
template <typename It>
struct my_parser : qi::grammar<It, ast::records()> {
using Skipper = qi::rule<It>;
my_parser() : my_parser::base_type(start) {
skipper = ("\\\n" | qi::blank);
name = +qi::graph;
key = +(qi::graph - '=');
param = key >> '=' >> name;
record = name >> name >> *param;
records = *(record >> +qi::eol);
start = qi::skip(qi::copy(skipper)) [ records ];
}
private:
Skipper skipper;
qi::rule<It, ast::records(), Skipper> records;
qi::rule<It, ast::record(), Skipper> record;
qi::rule<It, ast::records()> start;
qi::rule<It, std::pair<std::string, std::string>()> param;
qi::rule<It, std::string()> name, key;
};
int main() {
#if 1
using It = boost::spirit::istream_iterator;
It f(std::cin >> std::noskipws), l;
#else
using It = std::string::const_iterator;
std::string const input = "something here a=1\n";
It f = input.begin(), l = input.end();
#endif
ast::records data;
bool ok = qi::parse(f, l, my_parser<It>(), data);
if (ok) {
std::cout << "Parsed:\n";
for (auto& r : data) {
std::cout << "\t" << r.iname << " " << r.mname;
for (auto& p : r.params)
std::cout << " [" << p.first << ": " << p.second << "]";
std::cout << "\n";
}
} else {
std::cout << "Parse failed\n";
}
if (f!=l)
std::cout << "Remaining input: '" << std::string(f,l) << "'\n";
}
Prints (for the input in your question):
Parsed:
I_name m_name [parameter1: value] [parameter2: value]
I_name m_name [parameter1: value] [parameter2: value]
I am having trouble writing what I think should be a simple parser using Boost::Spirit. (I'm using Spirit instead of just using string functions as this is partly a learning exercise for me).
Data
The data to parse takes the form of key value pairs, where a value can itself be a key value pair. Keys are alphanumeric (with underscores and no digit as first character); values are alphanumeric plus .-_ - the values can be dates in the format DD-MMM-YYYY e.g. 01-Jan-2015 and floating point numbers like 3.1415 in addition to plain old alphanumeric strings. Keys and values are separated with =; pairs are separated with ;; structured values are delimited with {...}. At the moment I am erasing all spaces from the user input before passing it to Spirit.
Example input:
Key1 = Value1; Key2 = { NestedKey1=Alan; NestedKey2 = 43.1232; }; Key3 = 15-Jul-1974 ;
I would then strip all spaces to give
Key1=Value1;Key2={NestedKey1=Alan;NestedKey2=43.1232;};Key3=15-Jul-1974;
and then I actually pass it to Spirit.
Problem
What I currently have works just dandy when values are simply values. When I start encoding structured values in the input then Spirit stops after the first structured value. A workaround if there is only one structured value is to put it at the end of the input... but I will need two or more structured values on occasion.
The code
The below compiles in VS2013 and illustrates the errors:
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/pair.hpp>
#include <boost/fusion/adapted.hpp>
#include <map>
#include <string>
#include <iostream>
typedef std::map<std::string, std::string> ARGTYPE;
#define BOOST_SPIRIT_DEBUG
namespace qi = boost::spirit::qi;
namespace fusion = boost::fusion;
template < typename It, typename Skipper>
struct NestedGrammar : qi::grammar < It, ARGTYPE(), Skipper >
{
NestedGrammar() : NestedGrammar::base_type(Sequence)
{
using namespace qi;
KeyName = qi::char_("a-zA-Z_") >> *qi::char_("a-zA-Z0-9_");
Value = +qi::char_("-.a-zA-Z_0-9");
Pair = KeyName >> -(
'=' >> ('{' >> raw[Sequence] >> '}' | Value)
);
Sequence = Pair >> *((qi::lit(';') | '&') >> Pair);
BOOST_SPIRIT_DEBUG_NODE(KeyName);
BOOST_SPIRIT_DEBUG_NODE(Value);
BOOST_SPIRIT_DEBUG_NODE(Pair);
BOOST_SPIRIT_DEBUG_NODE(Sequence);
}
private:
qi::rule<It, ARGTYPE(), Skipper> Sequence;
qi::rule<It, std::string()> KeyName;
qi::rule<It, std::string(), Skipper> Value;
qi::rule<It, std::pair < std::string, std::string>(), Skipper> Pair;
};
template <typename Iterator>
ARGTYPE Parse2(Iterator begin, Iterator end)
{
NestedGrammar<Iterator, qi::space_type> p;
ARGTYPE data;
qi::phrase_parse(begin, end, p, qi::space, data);
return data;
}
// ARGTYPE is std::map<std::string,std::string>
void NestedParse(std::string Input, ARGTYPE& Output)
{
Input.erase(std::remove_if(Input.begin(), Input.end(), isspace), Input.end());
Output = Parse2(Input.begin(), Input.end());
}
int main(int argc, char** argv)
{
std::string Example1, Example2, Example3;
ARGTYPE Out;
Example1 = "Key1=Value1 ; Key2 = 01-Jan-2015; Key3 = 2.7181; Key4 = Johnny";
Example2 = "Key1 = Value1; Key2 = {InnerK1 = one; IK2 = 11-Nov-2011;};";
Example3 = "K1 = V1; K2 = {IK1=IV1; IK2=IV2;}; K3=V3; K4 = {JK1=JV1; JK2=JV2;};";
NestedParse(Example1, Out);
for (ARGTYPE::iterator i = Out.begin(); i != Out.end(); i++)
std::cout << i->first << "|" << i->second << std::endl;
std::cout << "=====" << std::endl;
/* get the following, as expected:
Key1|Value1
Key2|01-Jan-2015
Key3|2.7181
Key4|Johnny
*/
NestedParse(Example2, Out);
for (ARGTYPE::iterator i = Out.begin(); i != Out.end(); i++)
std::cout << i->first << "|" << i->second << std::endl;
std::cout << "=====" << std::endl;
/* get the following, as expected:
Key1|Value1
key2|InnerK1=one;IK2=11-Nov-2011
*/
NestedParse(Example3, Out);
for (ARGTYPE::iterator i = Out.begin(); i != Out.end(); i++)
std::cout << i->first << "|" << i->second << std::endl;
/* Only get the first two lines of the expected output:
K1|V1
K2|IK1=IV1;IK2=IV2
K3|V3
K4|JK1=JV1;JK2=JV2
*/
return 0;
}
I'm not sure if the problem is down to my ignorance of BNF, my ignorance of Spirit, or perhaps my ignorance of both at this point.
Any help appreciated. I've read e.g. Spirit Qi sequence parsing issues and links therein but I still can't figure out what I am doing wrong.
Indeed this precisely a simple grammar that Spirit excels at.
Moreover there is absolutely no need to skip whitespace up front: Spirit has skippers built in for the purpose.
To your explicit question, though:
The Sequence rule is overcomplicated. You could just use the list operator (%):
Sequence = Pair % char_(";&");
Now your problem is that you end the sequence with a ; that isn't expected, so both Sequence and Value fail the parse eventually. This isn't very clear unless you #define BOOST_SPIRIT_DEBUG¹ and inspect the debug output.
So to fix it use:
Sequence = Pair % char_(";&") >> -omit[char_(";&")];
Fix Live On Coliru (or with debug info)
Prints:
Key1|Value1
Key2|01-Jan-2015
Key3|2.7181
Key4|Johnny
=====
Key1|Value1
Key2|InnerK1=one;IK2=11-Nov-2011;
=====
K1|V1
K2|IK1=IV1;IK2=IV2;
K3|V3
K4|JK1=JV1;JK2=JV2;
Bonus Cleanup
Actually, that was simple. Just remove the redundant line removing whitespace. The skipper was already qi::space.
(Note though that the skipper doesn't apply to your Value rule, so values cannot contain whitespace but the parser will not silently skip it either; I suppose this is likely what you want. Just be aware of it).
Recursive AST
You would actually want to have a recursive AST, instead of parsing into a flat map.
Boost recursive variants make this a breeze:
namespace ast {
typedef boost::make_recursive_variant<std::string, std::map<std::string, boost::recursive_variant_> >::type Value;
typedef std::map<std::string, Value> Sequence;
}
To make this work you just change the declared attribute types of the rules:
qi::rule<It, ast::Sequence(), Skipper> Sequence;
qi::rule<It, std::pair<std::string, ast::Value>(), Skipper> Pair;
qi::rule<It, std::string(), Skipper> String;
qi::rule<It, std::string()> KeyName;
The rules themselves don't even have to change at all. You will need to write a little visitor to stream the AST:
static inline std::ostream& operator<<(std::ostream& os, ast::Value const& value) {
struct vis : boost::static_visitor<> {
vis(std::ostream& os, std::string indent = "") : _os(os), _indent(indent) {}
void operator()(std::map<std::string, ast::Value> const& map) const {
_os << "map {\n";
for (auto& entry : map) {
_os << _indent << " " << entry.first << '|';
boost::apply_visitor(vis(_os, _indent+" "), entry.second);
_os << "\n";
}
_os << _indent << "}\n";
}
void operator()(std::string const& s) const {
_os << s;
}
private:
std::ostream& _os;
std::string _indent;
};
boost::apply_visitor(vis(os), value);
return os;
}
Now it prints:
map {
Key1|Value1
Key2|01-Jan-2015
Key3|2.7181
Key4|Johnny
}
=====
map {
Key1|Value1
Key2|InnerK1 = one; IK2 = 11-Nov-2011;
}
=====
map {
K1|V1
K2|IK1=IV1; IK2=IV2;
K3|V3
K4|JK1=JV1; JK2=JV2;
}
Of course, the clincher is when you change raw[Sequence] to just Sequence now:
map {
Key1|Value1
Key2|01-Jan-2015
Key3|2.7181
Key4|Johnny
}
=====
map {
Key1|Value1
Key2|map {
IK2|11-Nov-2011
InnerK1|one
}
}
=====
map {
K1|V1
K2|map {
IK1|IV1
IK2|IV2
}
K3|V3
K4|map {
JK1|JV1
JK2|JV2
}
}
Full Demo Code
Live On Coliru
//#define BOOST_SPIRIT_DEBUG
#include <boost/variant.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/adapted/std_pair.hpp>
#include <iostream>
#include <string>
#include <map>
namespace ast {
typedef boost::make_recursive_variant<std::string, std::map<std::string, boost::recursive_variant_> >::type Value;
typedef std::map<std::string, Value> Sequence;
}
namespace qi = boost::spirit::qi;
template <typename It, typename Skipper>
struct NestedGrammar : qi::grammar <It, ast::Sequence(), Skipper>
{
NestedGrammar() : NestedGrammar::base_type(Sequence)
{
using namespace qi;
KeyName = qi::char_("a-zA-Z_") >> *qi::char_("a-zA-Z0-9_");
String = +qi::char_("-.a-zA-Z_0-9");
Pair = KeyName >> -(
'=' >> ('{' >> Sequence >> '}' | String)
);
Sequence = Pair % char_(";&") >> -omit[char_(";&")];
BOOST_SPIRIT_DEBUG_NODES((KeyName) (String) (Pair) (Sequence))
}
private:
qi::rule<It, ast::Sequence(), Skipper> Sequence;
qi::rule<It, std::pair<std::string, ast::Value>(), Skipper> Pair;
qi::rule<It, std::string(), Skipper> String;
qi::rule<It, std::string()> KeyName;
};
template <typename Iterator>
ast::Sequence DoParse(Iterator begin, Iterator end)
{
NestedGrammar<Iterator, qi::space_type> p;
ast::Sequence data;
qi::phrase_parse(begin, end, p, qi::space, data);
return data;
}
static inline std::ostream& operator<<(std::ostream& os, ast::Value const& value) {
struct vis : boost::static_visitor<> {
vis(std::ostream& os, std::string indent = "") : _os(os), _indent(indent) {}
void operator()(std::map<std::string, ast::Value> const& map) const {
_os << "map {\n";
for (auto& entry : map) {
_os << _indent << " " << entry.first << '|';
boost::apply_visitor(vis(_os, _indent+" "), entry.second);
_os << "\n";
}
_os << _indent << "}\n";
}
void operator()(std::string const& s) const {
_os << s;
}
private:
std::ostream& _os;
std::string _indent;
};
boost::apply_visitor(vis(os), value);
return os;
}
int main()
{
std::string const Example1 = "Key1=Value1 ; Key2 = 01-Jan-2015; Key3 = 2.7181; Key4 = Johnny";
std::string const Example2 = "Key1 = Value1; Key2 = {InnerK1 = one; IK2 = 11-Nov-2011;};";
std::string const Example3 = "K1 = V1; K2 = {IK1=IV1; IK2=IV2;}; K3=V3; K4 = {JK1=JV1; JK2=JV2;};";
std::cout << DoParse(Example1.begin(), Example1.end()) << "\n";
std::cout << DoParse(Example2.begin(), Example2.end()) << "\n";
std::cout << DoParse(Example3.begin(), Example3.end()) << "\n";
}
¹ You "had" it, but not in the right place! It should go before any Boost includes.
Writing Qi grammar with Phoenix bind I got a compilation error like
boost/spirit/home/support/context.hpp(180): error C2338: index_is_out_of_bounds
here
>> ruleHandId_[phx::bind(&parseContext::handId_, qi::_r1) = qi::_1];
I just havent too much expirience with phoenix binding but perv bind in the line
ruleStart_ = ruleEncoding_[phx::bind(&parseContext::encoding_, qi::_r1) = qi::_1]
works good without compilation errors
It's all under MSVC from VS2013 with boost 1.56 x86
Whats I do wrong under code with the compilation error?
Source Code
#include <boost/spirit/include/qi.hpp>
#include <boost/phoenix/phoenix.hpp>
#include <boost/shared_ptr.hpp>
#include <sstream>
namespace sp = boost::spirit;
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;
struct parseContext {
std::string encoding_;
uint64_t handId_;
};
typedef boost::shared_ptr<parseContext> parseContextShPtr;
template <typename Iterator>
struct parseGrammar : qi::grammar<Iterator, void(parseContext&)> {
parseGrammar() : parseGrammar::base_type(ruleStart_)
{
ruleStart_ = ruleEncoding_[phx::bind(&parseContext::encoding_, qi::_r1) = qi::_1]
>> ruleHandHeader_;
ruleEncoding_ = qi::lit("ABC");
ruleHandHeader_ = qi::lit("DEF") >> qi::space
>> qi::lit("XYZ #")
>> ruleHandId_[phx::bind(&parseContext::handId_, qi::_r1) = qi::_1];
ruleHandId_ = qi::long_long;
}
// Rules
qi::rule<Iterator, void(parseContext&)> ruleStart_;
qi::rule<Iterator, std::string()> ruleEncoding_;
qi::rule<Iterator> ruleHandHeader_;
qi::rule<Iterator, uint64_t> ruleHandId_;
};
void test()
{
std::string s("ABCDEF XYZ #555: PQI #777");
std::stringstream sb;
sb.unsetf(std::ios::skipws);
sb << s;
const parseGrammar<sp::istream_iterator> p;
sp::istream_iterator b(sb);
sp::istream_iterator e;
parseContextShPtr ctx(new parseContext);
bool r = qi::parse(b, e, p(phx::ref(*ctx.get())));
if (r) {
std::cout << "Success" << std::endl;
}
else {
std::cout << "Failure" << std::endl;
}
std::cout << std::string(b, e).substr(0, 32) << std::endl;
}
Some of the placeholders cannot be bound.
This could be because ruleEncoding_ doesn't expose an attribute (for _1) (unlikely) or ruleStart_ doesn't have the inherited attribute (_r1).
That's all I can tell you right now.
Edit It was the latter. ruleHandHeader doesn't declare any attributes, let alone an inherited attribute to bind to _r1
Update To the comment.
Here are some suggestions. Much in the vein of my oft-repeated advice to avoid semantic actions (Boost Spirit: "Semantic actions are evil"?), I'd adapt the structure as a fusion sequence:
And use much simplified grammar rules:
ruleStart_ = ruleEncoding_ >> ruleHandHeader_;
ruleEncoding_ = "ABC";
ruleHandId_ = qi::long_long;
ruleHandHeader_ = "DEF XYZ #" >> ruleHandId_;
Now, adding in BOOST_SPIRIT_DEBUG macros and fixing uint64_t to uint64_t() in the rule definition:
Live On Coliru
#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/adapted/struct.hpp>
#include <boost/shared_ptr.hpp>
#include <sstream>
namespace qi = boost::spirit::qi;
struct parseContext {
std::string encoding_;
uint64_t handId_;
};
BOOST_FUSION_ADAPT_STRUCT(parseContext, (std::string, encoding_)(uint64_t,handId_))
typedef boost::shared_ptr<parseContext> parseContextShPtr;
template <typename Iterator>
struct parseGrammar : qi::grammar<Iterator, parseContext()> {
parseGrammar() : parseGrammar::base_type(ruleStart_)
{
ruleStart_ = ruleEncoding_ >> ruleHandHeader_;
ruleEncoding_ = "ABC";
ruleHandId_ = qi::long_long;
ruleHandHeader_ = "DEF XYZ #" >> ruleHandId_;
BOOST_SPIRIT_DEBUG_NODES((ruleStart_)(ruleEncoding_)(ruleHandId_)(ruleHandHeader_))
}
// Rules
qi::rule<Iterator, parseContext()> ruleStart_;
qi::rule<Iterator, std::string()> ruleEncoding_;
qi::rule<Iterator, uint64_t()> ruleHandId_, ruleHandHeader_;
};
void test()
{
std::stringstream sb("ABCDEF XYZ #555: PQI #777");
sb.unsetf(std::ios::skipws);
typedef boost::spirit::istream_iterator It;
const parseGrammar<It> p;
It b(sb), e;
parseContextShPtr ctx(new parseContext);
bool r = qi::parse(b, e, p, *ctx);
if (r) {
std::cout << "Success: " << ctx->encoding_ << ", " << ctx->handId_ << std::endl;
}
else {
std::cout << "Failure" << std::endl;
}
if (b!=e)
std::cout << "Remaining: '" << std::string(b, e).substr(0, 32) << "'...\n";
}
int main()
{
test();
}
Prints
Success: ABC, 555
Remaining: ': PQI #777'...
I'm trying the code sehe gave here : Boolean expression (grammar) parser in c++
I would like to create a string variable max, that would store the maximum variable encountered at each parsing (on the lexicographic order, for example).
I tried things like :
var_ = qi::lexeme[ +alpha ] [_val = _1, if_(phx::ref(m) < _1) [phx::ref(m) = _1]];, but there is a (really long) compilation error
var_ = qi::lexeme[ +alpha [_val = _1, if_(phx::ref(m) < _1) [phx::ref(m) = _1]]]; but with this one I only get the first caracter of a variable, which is restrincting.
I also tried to simplify things using integers instead of string for variables, but var_ = int_ [...] didn't work either, because int_ is already a parser (I think).
Do you have any ideas ?
Thanks in advance
I'd say that
start = *word [ if_(_1>_val) [_val=_1] ];
should be fine. However, due to a bug (?) Phoenix statements in a single-statement semantic action do not compile. You can easily work around it using a no-op statement, like e.g. _pass=true in this context:
start = *word [ if_(_1>_val) [_val=_1], _pass = true ];
Now, for this I assumed a
rule<It, std::string()> word = +alpha;
If you insist you can cram it all into one rule though:
start = *as_string[lexeme[+alpha]] [ if_(_1>_val) [_val=_1], _pass = true ];
I don't recommend that.
Demo
Live On Colir
#define BOOST_SPIRIT_USE_PHOENIX_V3
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;
template <typename It, typename Skipper>
struct max_parser : qi::grammar<It, std::string(), Skipper> {
max_parser() : max_parser::base_type(start) {
using namespace qi;
using phx::if_;
#if 1
word = lexeme [ +alpha ];
start = *word [ if_(_1>_val) [_val=_1], _pass = true ];
#else
start = *as_string[lexeme[+alpha]] [ if_(_1>_val) [_val=_1], _pass = true ];
#endif
}
private:
qi::rule<It, std::string(), Skipper> start, word;
};
int main() {
std::string const input("beauty shall be in ze eye of the beholder");
using It = std::string::const_iterator;
max_parser<It, qi::space_type> parser;
std::string data;
It it = input.begin(), end = input.end();
bool ok = qi::phrase_parse(it, end, parser, qi::space, data);
if (ok) {
std::cout << "Parse success: " << data << "\n";
} else {
std::cout << "Parse failed\n";
}
if (it != end)
std::cout << "Remaining unparsed: '" << std::string(it,end) << "'\n";
}
Prints:
Parse success: ze
Re: comment:
Thanks for your answers. I wanted to do both usual parsing and keeping the maximum encountered string, and it worked with : var_ = *as_string[qi::lexeme[ +digit ]] [if_(phx::ref(m) < _1) [phx::ref(m) = _1], _val = _1];
For even more fun, and in the interest of complete overkill, I've come up with something that I think is close to useful:
Live On Coliru
int main() {
do_test<int>(" 1 99 -1312 4 1014", -9999);
do_test<double>(" 1 NaN -4 7e3 7e4 -31e9");
do_test<std::string>("beauty shall be in ze eye of the beholder", "", qi::as_string[qi::lexeme[+qi::graph]]);
}
The sample prints:
Parse success: 5 elements with maximum of 1014
values: 1 99 -1312 4 1014
Parse success: 6 elements with maximum of 70000
values: 1 nan -4 7000 70000 -3.1e+10
Parse success: 9 elements with maximum of ze
values: beauty shall be in ze eye of the beholder
As you can see, with string we need to help the Spirit a bit because it doesn't know how you would like to "define" a single "word". The test driver is completely generic:
template <typename T, typename ElementParser = typename boost::spirit::traits::create_parser<T>::type>
void do_test(std::string const& input,
T const& start_value = std::numeric_limits<T>::lowest(),
ElementParser const& element_parser = boost::spirit::traits::create_parser<T>::call())
{
using It = std::string::const_iterator;
vector_and_max<T> data;
It it = input.begin(), end = input.end();
bool ok = qi::phrase_parse(it, end, max_parser<It, T>(start_value, element_parser), qi::space, data);
if (ok) {
std::cout << "Parse success: " << data.first.size() << " elements with maximum of " << data.second << "\n";
std::copy(data.first.begin(), data.first.end(), std::ostream_iterator<T>(std::cout << "\t values: ", " "));
std::cout << "\n";
} else {
std::cout << "Parse failed\n";
}
if (it != end)
std::cout << "Remaining unparsed: '" << std::string(it,end) << "'\n";
}
The start-element and element-parser are passed to the constructor of our grammar:
template <typename T>
using vector_and_max = std::pair<std::vector<T>, T>;
template <typename It, typename T, typename Skipper = qi::space_type>
struct max_parser : qi::grammar<It, vector_and_max<T>(), Skipper> {
template <typename ElementParser>
max_parser(T const& start_value, ElementParser const& element_parser) : max_parser::base_type(start) {
using namespace qi;
using phx::if_;
_a_type running_max;
vector_with_max %=
eps [ running_max = start_value ]
>> *boost::proto::deep_copy(element_parser)
[ if_(_1>running_max) [running_max=_1], _pass = true ]
>> attr(running_max)
;
start = vector_with_max;
}
private:
qi::rule<It, vector_and_max<T>(), Skipper> start;
qi::rule<It, vector_and_max<T>(), Skipper, qi::locals<T> > vector_with_max;
};
Full Listing
For reference
Live On Coliru
#define BOOST_SPIRIT_USE_PHOENIX_V3
#include <boost/fusion/adapted/std_pair.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;
template <typename T>
using vector_and_max = std::pair<std::vector<T>, T>;
template <typename It, typename T, typename Skipper = qi::space_type>
struct max_parser : qi::grammar<It, vector_and_max<T>(), Skipper> {
template <typename ElementParser>
max_parser(T const& start_value, ElementParser const& element_parser) : max_parser::base_type(start) {
using namespace qi;
using phx::if_;
_a_type running_max;
vector_with_max %=
eps [ running_max = start_value ]
>> *boost::proto::deep_copy(element_parser)
[ if_(_1>running_max) [running_max=_1], _pass = true ]
>> attr(running_max)
;
start = vector_with_max;
}
private:
qi::rule<It, vector_and_max<T>(), Skipper> start;
qi::rule<It, vector_and_max<T>(), Skipper, qi::locals<T> > vector_with_max;
};
template <typename T, typename ElementParser = typename boost::spirit::traits::create_parser<T>::type>
void do_test(std::string const& input,
T const& start_value = std::numeric_limits<T>::lowest(),
ElementParser const& element_parser = boost::spirit::traits::create_parser<T>::call())
{
using It = std::string::const_iterator;
vector_and_max<T> data;
It it = input.begin(), end = input.end();
bool ok = qi::phrase_parse(it, end, max_parser<It, T>(start_value, element_parser), qi::space, data);
if (ok) {
std::cout << "Parse success: " << data.first.size() << " elements with maximum of " << data.second << "\n";
std::copy(data.first.begin(), data.first.end(), std::ostream_iterator<T>(std::cout << "\t values: ", " "));
std::cout << "\n";
} else {
std::cout << "Parse failed\n";
}
if (it != end)
std::cout << "Remaining unparsed: '" << std::string(it,end) << "'\n";
}
int main() {
do_test<int>(" 1 99 -1312 4 1014");
do_test<double>(" 1 NaN -4 7e3 7e4 -31e9");
do_test<std::string>("beauty shall be in ze eye of the beholder", "", qi::as_string[qi::lexeme[+qi::graph]]);
}
Just for fun, here's how to do roughly¹ the same as in my other answer, and more, but without using boost spirit at all:
Live On Coliru
#include <algorithm>
#include <sstream>
#include <iterator>
#include <iostream>
int main() {
std::istringstream iss("beauty shall be in ze eye of the beholder");
std::string top2[2];
auto end = std::partial_sort_copy(
std::istream_iterator<std::string>(iss), {},
std::begin(top2), std::end(top2),
std::greater<std::string>());
for (auto it=top2; it!=end; ++it)
std::cout << "(Next) highest word: '" << *it << "'\n";
}
Output:
(Next) highest word: 'ze'
(Next) highest word: 'the'
¹ we're not nearly as specific about isalpha and isspace character types here