I want to parse a function (with an arbitrary name and an arbitrary numbers af arguments) in this form:
function(bye, 1, 3, 4, foo)
The arguments could be generic strings comma separated.
And I want to copy the name of the function and the arguments in a vector of strings.
like this
std::vector<std::string> F;
std::string fun = "function(bye, 1, 3, 4, foo)";
// The parser must produce this vector from the example
F[0] == "function"
F[1] == "1"
F[2] == "3"
F[3] == "4"
F[4] == "foo"
I've written the following code by after reading some tutorial but it does not work (In the sense that it not compile).
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_object.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>
#include <iostream>
#include <string>
namespace client
{
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
///////////////////////////////////////////////////////////////////////////////
template <typename Iterator>
struct command_parser : qi::grammar<Iterator, std::vector<std::string>(), ascii::space_type>
{
command_parser() : command_parser::base_type(start)
{
using qi::int_;
using qi::lit;
using qi::double_;
using qi::lexeme;
using ascii::char_;
fn_name = +qi::char_("a-zA-Z");
string = +qi::char_("a-zA-Z_0-9");
rec = *( lit(",") >> string );
start %= fn_name >> lit("(") >> string >> rec >> lit(")") ;
}
qi::rule<Iterator, std::string(), ascii::space_type> fn_name;
qi::rule<Iterator, std::string(), ascii::space_type> string;
qi::rule<Iterator, std::string(), ascii::space_type> rec;
qi::rule<Iterator, std::vector<std::string>, ascii::space_type> start;
};
}
////////////////////////////////////////////////////////////////////////////
// Main program
////////////////////////////////////////////////////////////////////////////
int
main()
{
namespace qi = boost::spirit::qi;
std::cout << "/////////////////////////////////////////////////////////\n\n";
client::command_parser<std::string::iterator> CP;
std::string cmd("fun(1,2,3,4 , 5, foo) ");
std::vector<std::string> VV;
bool result = qi::parse(cmd.begin(), cmd.end(), CP, VV);
if (result) {
for ( auto sss : VV ){
std::cout << sss << std::endl;
}
} else {
std::cout << "Fail" << std::endl;
}
return 0 ;
}
Just for fun, here's my minimalist take on this grammar:
using CallList = std::vector<std::string>;
struct ParseError : std::runtime_error {
ParseError() : std::runtime_error("ParseError") {}
};
// The parse implementation
CallList parse_function_call(std::string const& fun) {
CallList elements;
using namespace boost::spirit::qi;
using It = decltype(begin(fun));
static const rule<It, std::string()> identifier = alpha >> +(alnum | char_('_'));
if (!phrase_parse(begin(fun), end(fun),
identifier >> '(' >> -(lexeme[+~char_(",)")] % ",") >> ')' >> eoi,
space, elements))
throw ParseError{};
return elements;
}
With a little bit of plumbing
// just for test output
using TestResult = std::variant<CallList, ParseError>;
// exceptions are equivalent
static constexpr bool operator==(ParseError const&, ParseError const&)
{ return true; }
static inline std::ostream& operator<<(std::ostream& os, TestResult const& tr) {
using namespace std;
if (holds_alternative<ParseError>(tr)) {
return os << "ParseError";
} else {
auto& list = get<CallList>(tr);
copy(begin(list), end(list), std::experimental::make_ostream_joiner(os << "{", ","));
return os << "}";
}
}
TestResult try_parse(std::string const& fun) {
try { return parse_function_call(fun); }
catch(ParseError const& e) { return e; }
}
Here's a test runner:
for (auto const& [input, expected]: {
Case("function(bye, 1, 3, 4, foo)", CallList{"function", "1", "3", "4", "foo"}),
{"liar(pants on fire)", CallList{"liar", "pants on fire"}},
{"liar('pants on fire')", CallList{"liar", "'pants on fire'"}},
{"nullary()", CallList{"nullary"}},
{"nullary( )", CallList{"nullary"}},
{"zerolength(a,,b)", ParseError{}},
{"zerolength(a, ,b)", ParseError{}},
{"noarglust", ParseError{}},
{"", ParseError{}},
{"()", ParseError{}},
{"1(invalidfunctionname)", ParseError{}},
{"foo(bar) BOGUS", ParseError{}},
})
{
auto const actual = try_parse(input);
bool const ok = (actual == expected);
cout << std::quoted(input) << ": " << (ok? "PASS":"FAIL") << "\n";
if (!ok) {
std::cout << " -- expected: " << expected << "\n";
std::cout << " -- actual: " << actual << "\n";
}
}
Which prints Live On Coliru
"function(bye, 1, 3, 4, foo)": FAIL
-- expected: {function,1,3,4,foo}
-- actual: {function,bye,1,3,4,foo}
"liar(pants on fire)": PASS
"liar('pants on fire')": PASS
"nullary()": PASS
"nullary( )": PASS
"zerolength(a,,b)": PASS
"zerolength(a, ,b)": PASS
"noarglust": PASS
"": PASS
"()": PASS
"1(invalidfunctionname)": PASS
"foo(bar) BOGUS": PASS
Note that your example test-case doesn't pass, but I think that was a mistake in the test case.
Full Listing
Live On Coliru
//#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <experimental/iterator>
#include <variant>
#include <iomanip>
using CallList = std::vector<std::string>;
struct ParseError : std::runtime_error {
ParseError() : std::runtime_error("ParseError") {}
};
// The parse implementation
CallList parse_function_call(std::string const& fun) {
CallList elements;
using namespace boost::spirit::qi;
using It = decltype(begin(fun));
static const rule<It, std::string()> identifier = alpha >> +(alnum | char_('_'));
if (!phrase_parse(begin(fun), end(fun),
identifier >> '(' >> -(lexeme[+~char_(",)")] % ",") >> ')' >> eoi,
space, elements))
throw ParseError{};
return elements;
}
// just for test output
using TestResult = std::variant<CallList, ParseError>;
// exceptions are equivalent
static constexpr bool operator==(ParseError const&, ParseError const&)
{ return true; }
static inline std::ostream& operator<<(std::ostream& os, TestResult const& tr) {
using namespace std;
if (holds_alternative<ParseError>(tr)) {
return os << "ParseError";
} else {
auto& list = get<CallList>(tr);
copy(begin(list), end(list), std::experimental::make_ostream_joiner(os << "{", ","));
return os << "}";
}
}
TestResult try_parse(std::string const& fun) {
try { return parse_function_call(fun); }
catch(ParseError const& e) { return e; }
}
int main() {
using namespace std;
using Case = pair<std::string, TestResult>;
for (auto const& [input, expected]: {
Case("function(bye, 1, 3, 4, foo)", CallList{"function", "1", "3", "4", "foo"}),
{"liar(pants on fire)", CallList{"liar", "pants on fire"}},
{"liar('pants on fire')", CallList{"liar", "'pants on fire'"}},
{"nullary()", CallList{"nullary"}},
{"nullary( )", CallList{"nullary"}},
{"zerolength(a,,b)", ParseError{}},
{"zerolength(a, ,b)", ParseError{}},
{"noarglust", ParseError{}},
{"", ParseError{}},
{"()", ParseError{}},
{"1(invalidfunctionname)", ParseError{}},
{"foo(bar) BOGUS", ParseError{}},
})
{
auto const actual = try_parse(input);
bool const ok = (actual == expected);
cout << std::quoted(input) << ": " << (ok? "PASS":"FAIL") << "\n";
if (!ok) {
std::cout << " -- expected: " << expected << "\n";
std::cout << " -- actual: " << actual << "\n";
}
}
}
I'm correcting my answer per suggestions made by #sehe. All the credit for these corrections go to him. I am referencing your line numbers below. So the first error is from spirit and it says:
incompatible_start_rule:
// If you see the assertion below failing then the start rule
// passed to the constructor of the grammar is not compatible with
// the grammar (i.e. it uses different template parameters).
The signature of the start parser does not match that of the parser deceleration.
22. struct command_parser : qi::grammar<Iterator, std::vector<std::string>(), ascii::space_type>
43. qi::rule<Iterator, std::vector<std::string>, ascii::space_type> start;
I googled this and could not find an explanation but using an object rather than a type is preferable. I did it the other way in my first answer. The proper fix is at line 43:
43. qi::rule<Iterator, std::vector<std::string>(), ascii::space_type> start;
The next spirit error is:
The rule was instantiated with a skipper type but you have not pass
any. Did you use parse instead of phrase_parse?");
So a phrase_parse is required with a skipper. Note that we need a skipper to pass along.
64. using qi::ascii::space;
65. bool result = qi::phrase_parse(cmd.begin(), cmd.end(), CP, space, VV);
Now it compiles and the output is:
fun
1
2345foo
I see that won't do and you are looking to stuff the vector with each of the passed parameters. So you need a rule that is compatible with your attribute and intention. The kleene operator working with a std::string will put all the data into one string. So use your attribute:
41. qi::rule<Iterator, std::vector<std::string>(), ascii::space_type> rec;``
Now as #sehe points out, the skipper with fn_name and string will just concatenate names with spaces and newlines. So don't use skippers there.
39. qi::rule<Iterator, std::string()> fn_name;
40. qi::rule<Iterator, std::string()> string;
The other error I made was to see the %= and call it a list operator. From here, it is a definition operator. I'm not sure why there are two but playing around, it seems you need to use %= with semantic action. Here is the corrected code:
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_object.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>
#include <iostream>
#include <string>
namespace client
{
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
template <typename Iterator>
struct command_parser : qi::grammar<Iterator, std::vector<std::string>(), ascii::space_type>
{
command_parser() : command_parser::base_type(start)
{
using qi::int_;
using qi::lit;
using qi::double_;
using qi::lexeme;
using ascii::char_;
fn_name = +qi::char_("a-zA-Z");
string = +qi::char_("a-zA-Z_0-9");
rec = *(lit(",") >> string);
start %= fn_name >> lit("(") >> string >> rec >> lit(")");
}
qi::rule<Iterator, std::string()> fn_name;
qi::rule<Iterator, std::string()> string;
qi::rule<Iterator, std::vector<std::string>(), ascii::space_type> rec;
qi::rule<Iterator, std::vector<std::string>(), ascii::space_type> start;
};
}
int main()
{
namespace qi = boost::spirit::qi;
client::command_parser<std::string::iterator> CP;
std::string cmd("function(1,2,3,4 , 5, foo) ");
std::vector<std::string> VV;
bool result = qi::phrase_parse(cmd.begin(), cmd.end(), CP, qi::ascii::space, VV);
if (result) {
for (auto sss : VV) {
std::cout << sss << std::endl;
}
}
else {
std::cout << "Fail" << std::endl;
}
return 0;
}
And here is an example using X3:
#include <boost/spirit/home/x3.hpp>
#include <iostream>
#include <vector>
//your attribute, could be more complex, might use namespace
using attr = std::vector<std::string>;
namespace parser {
namespace x3 = boost::spirit::x3;
const auto fn_name = +x3::char_("a-zA-Z");
const auto string = +x3::char_("a-zA-Z_0-9");
const auto start = x3::rule<struct _, attr>() = fn_name >> "(" >> string % ',' >> ")";
}
int main()
{
namespace x3 = boost::spirit::x3;
std::string cmd("fun(1,.2,3,4 , 5, foo) ");
attr VV;
auto it = cmd.begin();
bool result = phrase_parse(it, cmd.end(), parser::start, x3::space, VV);
if (result) {
for (auto sss : VV) {
std::cout << "-> " << sss << std::endl;
}
}
else
std::cout << "Fail at" << std::endl;
return 0;
}
Related
I have multiple floating point numbers lines, numbers in a line are space separated
e.g.
1.2 2.2 3.2
1.1 2.1 3.1
I want to extract the above numbers as strings and parse to a 2D vector as;
std::vector< std::vector< std::string > > { {"1.2", "2.2", "3.2"},{"1.1", "2.1", "3.1} }
My code is like below.
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>
#include <string>
namespace client
{
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
using VecType = std::vector< std::vector< std::string >>;
struct employee
{
VecType name;
};
}
BOOST_FUSION_ADAPT_STRUCT(
client::employee,
(client::VecType, name)
)
//]
namespace client
{
template <typename Iterator>
struct employee_parser : qi::grammar<Iterator, VecType(), ascii::space_type>
{
employee_parser() : employee_parser::base_type(start)
{
using qi::lexeme;
using ascii::char_;
number %= lexeme[+char_( "0-9." ) >> qi::space ];
start %= +number;
}
qi::rule<Iterator, std::string(), ascii::space_type> number;
qi::rule<Iterator, VecType(), ascii::space_type> start;
};
}
But this produces 2D vector which the outer vec size is 6 and each inner vector size is 1.
I don't understand how to split the string from the new line only to make only 2 inner vectors.
You have to split up the rules. Let's start with the types:
using VecType = std::vector<std::string>;
using VecVecType = std::vector<VecType>;
Now, let's make a rule to parse a number, a row of numbers and multiple rows:
qi::rule<Iterator, std::string()> number;
qi::rule<Iterator, VecType(), qi::blank_type> row;
qi::rule<Iterator, VecVecType()> start;
Implementing them (note that I moved the skipper into the grammar because it's not a good idea to leak that into the interface):
number = raw [ double_ ]; // raw[] to get string value
row = +number;
start = qi::skip(blank) [ row % eol ];
Note: I used blank instead of space because we do NOT want to skip the eol which is significant for the grammar.
DEMO
Live On Coliru
#include <boost/spirit/include/qi.hpp>
#include <string>
#include <iomanip>
namespace client {
namespace qi = boost::spirit::qi;
using VecType = std::vector<std::string>;
using VecVecType = std::vector<VecType>;
} // namespace client
namespace client {
template <typename Iterator>
struct my_parser : qi::grammar<Iterator, VecVecType()> {
my_parser() : my_parser::base_type(start) {
using namespace qi;
number = raw [ double_ ]; // raw[] to get string value
row = *number;
start = qi::skip(blank) [ row % eol ];
}
qi::rule<Iterator, std::string()> number;
qi::rule<Iterator, VecType(), qi::blank_type> row;
qi::rule<Iterator, VecVecType()> start;
};
} // namespace client
int main() {
client::my_parser<std::string::const_iterator> const p;
for (std::string const& input: {
"",
"1.2 2.2 3.2\n1.1 2.1 3.1",
})
{
std::cout << "--- " << std::quoted(input) << " -----\n";
auto f = begin(input), l = end(input);
client::VecVecType output;
if (parse(f, l, p, output)) {
std::cout << "Parsed:\n";
for (auto& row : output) {
for (auto& v : row) {
std::cout << "\t" << v;
}
std::cout << "\n";
}
} else {
std::cout << "Failed\n";
}
if (f!=l) {
std::cout << "Remaining input: " << std::quoted(std::string(f,l)) << "\n";
}
}
}
Prints
--- "" -----
Parsed:
--- "1.2 2.2 3.2
1.1 2.1 3.1" -----
Parsed:
1.2 2.2 3.2
1.1 2.1 3.1
BONUS
Strong typing makes everything more fun: why parse into strings if you can parse into doubles?
Also showing how to enable rule debugging:
Live On Coliru
#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <string>
#include <iomanip>
namespace client {
namespace qi = boost::spirit::qi;
using VecType = std::vector<double>;
using VecVecType = std::vector<VecType>;
} // namespace client
namespace client {
template <typename Iterator>
struct my_parser : qi::grammar<Iterator, VecVecType()> {
my_parser() : my_parser::base_type(start) {
using namespace qi;
row = *double_;
start = qi::skip(blank) [ row % eol ];
BOOST_SPIRIT_DEBUG_NODES((start)(row))
}
private:
qi::rule<Iterator, VecType(), qi::blank_type> row;
qi::rule<Iterator, VecVecType()> start;
};
} // namespace client
int main() {
client::my_parser<std::string::const_iterator> const p;
for (std::string const& input: {
"",
"1.2 2.2 3.2\n1.1 2.1 3.1",
})
{
std::cout << "--- " << std::quoted(input) << " -----\n";
auto f = begin(input), l = end(input);
client::VecVecType output;
if (parse(f, l, p, output)) {
std::cout << "Parsed:\n";
for (auto& row : output) {
for (auto& v : row) {
std::cout << "\t" << v;
}
std::cout << "\n";
}
} else {
std::cout << "Failed\n";
}
if (f!=l) {
std::cout << "Remaining input: " << std::quoted(std::string(f,l)) << "\n";
}
}
}
Prints
--- "" -----
<start>
<try></try>
<row>
<try></try>
<success></success>
<attributes>[[]]</attributes>
</row>
<success></success>
<attributes>[[[]]]</attributes>
</start>
Parsed:
--- "1.2 2.2 3.2
1.1 2.1 3.1" -----
<start>
<try>1.2 2.2 3.2\n1.1 2.1 </try>
<row>
<try>1.2 2.2 3.2\n1.1 2.1 </try>
<success>\n1.1 2.1 3.1</success>
<attributes>[[1.2, 2.2, 3.2]]</attributes>
</row>
<row>
<try>1.1 2.1 3.1</try>
<success></success>
<attributes>[[1.1, 2.1, 3.1]]</attributes>
</row>
<success></success>
<attributes>[[[1.2, 2.2, 3.2], [1.1, 2.1, 3.1]]]</attributes>
</start>
Parsed:
1.2 2.2 3.2
1.1 2.1 3.1
I have the following working Qi code:
struct query_grammar
: public boost::spirit::qi::grammar<Iterator, string_map<std::string>()>
{
query_grammar() : query_grammar::base_type(query)
{
query = pair >> *(boost::spirit::qi::lit('&') >> pair);
pair = +qchar >> -(boost::spirit::qi::lit('=') >> +qchar);
qchar = ~boost::spirit::qi::char_("&=");
}
boost::spirit::qi::rule<Iterator, std::map<std::string,std::string>()> query;
boost::spirit::qi::rule<Iterator, std::map<std::string,std::string>::value_type()> pair;
boost::spirit::qi::rule<Iterator, char()> qchar;
};
I tried porting it to x3:
namespace x3 = boost::spirit::x3;
const x3::rule<class query_char_, char> query_char_ = "query_char";
const x3::rule<class string_pair_, std::map<std::string,std::string>::value_type> string_pair_ = "string_pair";
const x3::rule<class string_map_, std::map<std::string,std::string>> string_map_ = "string_map";
const auto query_char__def = ~boost::spirit::x3::char_("&=");
const auto string_pair__def = +query_char_ >> -(boost::spirit::x3::lit('=') >> +query_char_);
const auto string_map__def = string_pair_ >> *(boost::spirit::x3::lit('&') >> string_pair_);
BOOST_SPIRIT_DEFINE(string_map_)
BOOST_SPIRIT_DEFINE(string_pair_)
BOOST_SPIRIT_DEFINE(query_char_)
but I am getting the following error when trying to parse a string with string_map_ :
/usr/include/boost/spirit/home/x3/support/traits/move_to.hpp:209: erreur : no matching function for call to move_to(const char*&, const char*&, std::pair<std::__cxx11::basic_string<char>, std::__cxx11::basic_string<char> >&, boost::mpl::identity<boost::spirit::x3::traits::plain_attribute>::type)
detail::move_to(first, last, dest, typename attribute_category<Dest>::type());
~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
I saw this answer: Parsing pair of strings fails. Bad spirit x3 grammar and tried to make my string_pair raw but to no avail.
Edit:
this example code from the spirit examples does not compile either so I guess the problem is a bit deeper:
#include <boost/spirit/home/x3.hpp>
namespace x3 = boost::spirit::x3;
int main()
{
std::string input( "cosmic pizza " );
auto iter = input.begin();
auto end_iter = input.end();
std::pair<std::string, std::string> result;
x3::parse( iter, end_iter, *(~x3::char_(' ')) >> ' ' >> *x3::char_, result);
}
Qi Fixes
First off, I had to fix the rule declaration with the Qi variant before it could work:
qi::rule<Iterator, std::pair<std::string,std::string>()> pair;
For the simple reason that value_type has pair<key_type const, mapped_type> which is never assignable.
Here's a Qi SSCCE:
Live On Coliru
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/adapted/std_pair.hpp>
#include <map>
namespace qi = boost::spirit::qi;
template <typename T> using string_map = std::map<T, T>;
template <typename Iterator>
struct query_grammar : public qi::grammar<Iterator, string_map<std::string>()>
{
query_grammar() : query_grammar::base_type(query)
{
qchar = ~qi::char_("&=");
pair = +qchar >> -(qi::lit('=') >> +qchar);
query = pair >> *(qi::lit('&') >> pair);
}
private:
qi::rule<Iterator, std::map<std::string,std::string>()> query;
qi::rule<Iterator, std::pair<std::string,std::string>()> pair;
qi::rule<Iterator, char()> qchar;
};
int main() {
using It = std::string::const_iterator;
for (std::string const input : { "foo=bar&baz=boo" })
{
std::cout << "======= " << input << "\n";
It f = input.begin(), l = input.end();
string_map<std::string> sm;
if (parse(f, l, query_grammar<It>{}, sm)) {
std::cout << "Parsed " << sm.size() << " pairs\n";
} else {
std::cout << "Parse failed\n";
}
if (f != l)
std::cout << "Remaining unparsed: '" << std::string(f,l) << "'\n";
}
}
Prints
======= foo=bar&baz=boo
Parsed 2 pairs
Qi Improvements
The following simpler grammar seems better:
Live On Coliru
template <typename Iterator, typename T = std::string>
struct query_grammar : public qi::grammar<Iterator, string_map<T>()>
{
query_grammar() : query_grammar::base_type(query) {
using namespace qi;
pair = +~char_("&=") >> '=' >> *~char_("&");
query = pair % '&';
}
private:
qi::rule<Iterator, std::pair<T,T>()> pair;
qi::rule<Iterator, std::map<T,T>()> query;
};
It accepts empty values (e.g. &q=&x=) and values containing additional =: &q=7==8&rt=bool. It could be significantly more efficient (untested).
X3 version
Without looking at your code, I translated it directly into an X3 version:
Live On Coliru
#include <boost/spirit/home/x3.hpp>
#include <boost/fusion/adapted/std_pair.hpp>
#include <iostream>
#include <map>
namespace x3 = boost::spirit::x3;
template <typename T> using string_map = std::map<T, T>;
namespace grammar {
using namespace x3;
auto pair = +~char_("&=") >> '=' >> *~char_("&");
auto query = pair % '&';
}
int main() {
using It = std::string::const_iterator;
for (std::string const input : { "foo=bar&baz=boo" })
{
std::cout << "======= " << input << "\n";
It f = input.begin(), l = input.end();
string_map<std::string> sm;
if (parse(f, l, grammar::query, sm)) {
std::cout << "Parsed " << sm.size() << " pairs\n";
} else {
std::cout << "Parse failed\n";
}
if (f != l)
std::cout << "Remaining unparsed: '" << std::string(f,l) << "'\n";
}
}
Which, obviously ( --- ) prints
======= foo=bar&baz=boo
Parsed 2 pairs
X3 Improvements
You should probably want to coerce the attribute types for the rules because automatic attribute propagation can have surprising heuristics.
namespace grammar {
template <typename T = std::string> auto& query() {
using namespace x3;
static const auto s_pair
= rule<struct pair_, std::pair<T, T> > {"pair"}
= +~char_("&=") >> -('=' >> *~char_("&"));
static const auto s_query
= rule<struct query_, std::map<T, T> > {"query"}
= s_pair % '&';
return s_query;
};
}
See it Live On Coliru
What Went wrong?
The X3 version suffered the same problem with const key type in std::map<>::value_type
I'm trying to implement a very specific grammar, which requires me at a certain point to parse a list of comma separated integers. The qi rule looks like the following:
qi::rule<Iterator, ascii::space_type> ident;
qi::rule<Iterator, ascii::space_type> nlist;
...
ident = char_ >> nlist;
nlist = ("(" >> int_ % "," >> ")");
...
I need to pass the values up to the ident rule (The expression ident has to create a syntax tree node, where the parsed values from nlist are required for the constructor). I thought about creating and filling a std::vector and use the semantic action like _val = vector<int>.... What is now unclear to me is how do I create a vector of arbitrary length from this rule, since I do not make any assumptions on how long the input will be or using a predefined vector like the examples.
Is this even possible or does is there a better way to do it?
This is the bread and butter of Spirit Qi.
Just use any compatible attribute type and profit:
using nlist_t = std::vector<int>;
using ident_t = std::pair<char, nlist_t>;
qi::rule<Iterator, ident_t(), qi::ascii::space_type> ident;
qi::rule<Iterator, nlist_t(), qi::ascii::space_type> nlist;
Note: For std::pair attribute compatibility, include the relevant fusion header:
Live On Coliru
#include <boost/fusion/adapted/std_pair.hpp>
#include <boost/spirit/include/qi.hpp>
namespace qi = boost::spirit::qi;
int main()
{
using nlist_t = std::vector<int>;
using ident_t = std::pair<char, nlist_t>;
using Iterator = std::string::const_iterator;
qi::rule<Iterator, ident_t(), qi::ascii::space_type> ident;
qi::rule<Iterator, nlist_t(), qi::ascii::space_type> nlist;
ident = qi::char_ >> nlist;
nlist = '(' >> qi::int_ % ',' >> ')';
for (std::string const input : { "a (1,2,3)", "+(881,-2,42) \n", "?(0)" }) {
ident_t data;
if (qi::phrase_parse(input.begin(), input.end(), ident, qi::ascii::space, data)) {
std::cout << "Parsed: " << data.first << "(";
for (auto i : data.second) std::cout << i << ",";
std::cout << ")\n";
} else
std::cout << "Parse failed: '" << input << "'\n";
}
}
Prints
Parsed: a(1,2,3,)
Parsed: +(881,-2,42,)
Parsed: ?(0,)
BONUS
Version with imagined Ast type using phoenix::construct:
Also Live On Coliru
#include <boost/fusion/adapted/std_pair.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/phoenix.hpp>
namespace qi = boost::spirit::qi;
namespace px = boost::phoenix;
namespace OoShinyAst {
using MyName = char;
using MyArgument = int;
using MyArgumentList = std::vector<MyArgument>;
struct MyIdent {
MyName name;
MyArgumentList args;
MyIdent() = default;
MyIdent(MyName name, MyArgumentList args)
: name(std::move(name)), args(std::move(args)) { }
};
}
int main()
{
using Iterator = std::string::const_iterator;
qi::rule<Iterator, OoShinyAst::MyIdent(), qi::ascii::space_type> ident;
qi::rule<Iterator, OoShinyAst::MyArgumentList(), qi::ascii::space_type> nlist;
nlist = '(' >> qi::int_ % ',' >> ')';
ident = (qi::char_ >> nlist) [ qi::_val = px::construct<OoShinyAst::MyIdent>(qi::_1, qi::_2) ];
for (std::string const input : { "a (1,2,3)", "+(881,-2,42) \n", "?(0)" }) {
OoShinyAst::MyIdent data;
if (qi::phrase_parse(input.begin(), input.end(), ident, qi::ascii::space, data)) {
std::cout << "Parsed: " << data.name << "(";
for (auto i : data.args) std::cout << i << ",";
std::cout << ")\n";
} else
std::cout << "Parse failed: '" << input << "'\n";
}
}
Here is an example of the syntax -- two groups of items:
I_name m_name parameter1=value parameter2=value
I_name m_name parameter1=value \
parameter2=value
My question is how to define the skip-type.
It is not just space_type but space_type minus newline.
But newline followed by backslash is a skip-type.
E.g.
I define name like that:
qi::rule<Iterator, std::string(), ascii::space_type> m_sName;
m_sName %= qi::lexeme[ascii::alpha >> *ascii::alnum];
This is obviously not correct, as the space_type must include newline-backslash.
The following grammar works for me.
*("\\\n" | ~qi::char_('\n')) % '\n'
It will ignore any newline after the backslash. And the following is a simple test.
#include <vector>
#include <string>
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#define BOOST_TEST_MODULE example
#include <boost/test/unit_test.hpp>
typedef std::vector<std::string> Lines;
inline auto ParseLines(std::string const& str) {
Lines lines;
namespace qi = boost::spirit::qi;
if (qi::parse(
str.begin(), str.end(),
*("\\\n" | ~qi::char_('\n')) % '\n',
lines)) {
return lines;
}
else {
throw std::invalid_argument("Parse error at ParseLines");
}
}
BOOST_AUTO_TEST_CASE(TestParseLines) {
std::string const str =
"I_name m_name parameter1=value parameter2=value\n"
"I_name m_name parameter1 = value \\\n"
"parameter2 = value";
Lines const expected{
"I_name m_name parameter1=value parameter2=value",
"I_name m_name parameter1 = value parameter2 = value"
};
BOOST_TEST(ParseLines(str) == expected);
}
You should use "-std=c++14 -lboost_unit_test_framework" for compilation. Anyway, it is easy to convert the code for c++03.
qi::blank is exactly that. It's qi::space without newlines.
You can do this too: ("\\\n" | qi::blank)
To be able to declare a rule with such a skipper, define a skipper grammar:
template <typename It>
struct my_skipper : qi::grammar<It> {
my_skipper() : my_skipper::base_type(start) {}
qi::rule<It> start = ("\\\n" | qi::blank);
};
Full Demo
Live On Coliru
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapted.hpp>
#include <map>
namespace qi = boost::spirit::qi;
namespace ast {
struct record {
std::string iname, mname;
std::map<std::string, std::string> params;
};
using records = std::vector<record>;
}
BOOST_FUSION_ADAPT_STRUCT(ast::record, iname, mname, params)
template <typename It>
struct my_parser : qi::grammar<It, ast::records()> {
using Skipper = qi::rule<It>;
my_parser() : my_parser::base_type(start) {
skipper = ("\\\n" | qi::blank);
name = +qi::graph;
key = +(qi::graph - '=');
param = key >> '=' >> name;
record = name >> name >> *param;
records = *(record >> +qi::eol);
start = qi::skip(qi::copy(skipper)) [ records ];
}
private:
Skipper skipper;
qi::rule<It, ast::records(), Skipper> records;
qi::rule<It, ast::record(), Skipper> record;
qi::rule<It, ast::records()> start;
qi::rule<It, std::pair<std::string, std::string>()> param;
qi::rule<It, std::string()> name, key;
};
int main() {
#if 1
using It = boost::spirit::istream_iterator;
It f(std::cin >> std::noskipws), l;
#else
using It = std::string::const_iterator;
std::string const input = "something here a=1\n";
It f = input.begin(), l = input.end();
#endif
ast::records data;
bool ok = qi::parse(f, l, my_parser<It>(), data);
if (ok) {
std::cout << "Parsed:\n";
for (auto& r : data) {
std::cout << "\t" << r.iname << " " << r.mname;
for (auto& p : r.params)
std::cout << " [" << p.first << ": " << p.second << "]";
std::cout << "\n";
}
} else {
std::cout << "Parse failed\n";
}
if (f!=l)
std::cout << "Remaining input: '" << std::string(f,l) << "'\n";
}
Prints (for the input in your question):
Parsed:
I_name m_name [parameter1: value] [parameter2: value]
I_name m_name [parameter1: value] [parameter2: value]
Writing Qi grammar with Phoenix bind I got a compilation error like
boost/spirit/home/support/context.hpp(180): error C2338: index_is_out_of_bounds
here
>> ruleHandId_[phx::bind(&parseContext::handId_, qi::_r1) = qi::_1];
I just havent too much expirience with phoenix binding but perv bind in the line
ruleStart_ = ruleEncoding_[phx::bind(&parseContext::encoding_, qi::_r1) = qi::_1]
works good without compilation errors
It's all under MSVC from VS2013 with boost 1.56 x86
Whats I do wrong under code with the compilation error?
Source Code
#include <boost/spirit/include/qi.hpp>
#include <boost/phoenix/phoenix.hpp>
#include <boost/shared_ptr.hpp>
#include <sstream>
namespace sp = boost::spirit;
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;
struct parseContext {
std::string encoding_;
uint64_t handId_;
};
typedef boost::shared_ptr<parseContext> parseContextShPtr;
template <typename Iterator>
struct parseGrammar : qi::grammar<Iterator, void(parseContext&)> {
parseGrammar() : parseGrammar::base_type(ruleStart_)
{
ruleStart_ = ruleEncoding_[phx::bind(&parseContext::encoding_, qi::_r1) = qi::_1]
>> ruleHandHeader_;
ruleEncoding_ = qi::lit("ABC");
ruleHandHeader_ = qi::lit("DEF") >> qi::space
>> qi::lit("XYZ #")
>> ruleHandId_[phx::bind(&parseContext::handId_, qi::_r1) = qi::_1];
ruleHandId_ = qi::long_long;
}
// Rules
qi::rule<Iterator, void(parseContext&)> ruleStart_;
qi::rule<Iterator, std::string()> ruleEncoding_;
qi::rule<Iterator> ruleHandHeader_;
qi::rule<Iterator, uint64_t> ruleHandId_;
};
void test()
{
std::string s("ABCDEF XYZ #555: PQI #777");
std::stringstream sb;
sb.unsetf(std::ios::skipws);
sb << s;
const parseGrammar<sp::istream_iterator> p;
sp::istream_iterator b(sb);
sp::istream_iterator e;
parseContextShPtr ctx(new parseContext);
bool r = qi::parse(b, e, p(phx::ref(*ctx.get())));
if (r) {
std::cout << "Success" << std::endl;
}
else {
std::cout << "Failure" << std::endl;
}
std::cout << std::string(b, e).substr(0, 32) << std::endl;
}
Some of the placeholders cannot be bound.
This could be because ruleEncoding_ doesn't expose an attribute (for _1) (unlikely) or ruleStart_ doesn't have the inherited attribute (_r1).
That's all I can tell you right now.
Edit It was the latter. ruleHandHeader doesn't declare any attributes, let alone an inherited attribute to bind to _r1
Update To the comment.
Here are some suggestions. Much in the vein of my oft-repeated advice to avoid semantic actions (Boost Spirit: "Semantic actions are evil"?), I'd adapt the structure as a fusion sequence:
And use much simplified grammar rules:
ruleStart_ = ruleEncoding_ >> ruleHandHeader_;
ruleEncoding_ = "ABC";
ruleHandId_ = qi::long_long;
ruleHandHeader_ = "DEF XYZ #" >> ruleHandId_;
Now, adding in BOOST_SPIRIT_DEBUG macros and fixing uint64_t to uint64_t() in the rule definition:
Live On Coliru
#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/adapted/struct.hpp>
#include <boost/shared_ptr.hpp>
#include <sstream>
namespace qi = boost::spirit::qi;
struct parseContext {
std::string encoding_;
uint64_t handId_;
};
BOOST_FUSION_ADAPT_STRUCT(parseContext, (std::string, encoding_)(uint64_t,handId_))
typedef boost::shared_ptr<parseContext> parseContextShPtr;
template <typename Iterator>
struct parseGrammar : qi::grammar<Iterator, parseContext()> {
parseGrammar() : parseGrammar::base_type(ruleStart_)
{
ruleStart_ = ruleEncoding_ >> ruleHandHeader_;
ruleEncoding_ = "ABC";
ruleHandId_ = qi::long_long;
ruleHandHeader_ = "DEF XYZ #" >> ruleHandId_;
BOOST_SPIRIT_DEBUG_NODES((ruleStart_)(ruleEncoding_)(ruleHandId_)(ruleHandHeader_))
}
// Rules
qi::rule<Iterator, parseContext()> ruleStart_;
qi::rule<Iterator, std::string()> ruleEncoding_;
qi::rule<Iterator, uint64_t()> ruleHandId_, ruleHandHeader_;
};
void test()
{
std::stringstream sb("ABCDEF XYZ #555: PQI #777");
sb.unsetf(std::ios::skipws);
typedef boost::spirit::istream_iterator It;
const parseGrammar<It> p;
It b(sb), e;
parseContextShPtr ctx(new parseContext);
bool r = qi::parse(b, e, p, *ctx);
if (r) {
std::cout << "Success: " << ctx->encoding_ << ", " << ctx->handId_ << std::endl;
}
else {
std::cout << "Failure" << std::endl;
}
if (b!=e)
std::cout << "Remaining: '" << std::string(b, e).substr(0, 32) << "'...\n";
}
int main()
{
test();
}
Prints
Success: ABC, 555
Remaining: ': PQI #777'...