Related
What's the proper way to indicate a parse fail in a boost::spirit::traits::transform_attribute? Can I throw any old exception, or is there a specific thing it wants me to do?
namespace boost
{
namespace spirit
{
namespace traits
{
template <>
struct transform_attribute<TwoNums, std::vector<char>, qi::domain>
{
typedef std::vector<char> type;
static type pre(TwoWords&) { return{}; }
static void post(TwoWords& val, type const& attr) {
std::string stringed(attr.begin(), attr.end());
//https://stackoverflow.com/questions/236129/the-most-elegant-way-to-iterate-the-words-of-a-string
std::vector<std::string> strs;
boost::split(strs, stringed, ",");
if(strs.size()!=2)
{
//What do I do here?
}
val = TwoWords(strs[0],strs[1]);
}
static void fail(FDate&) { }
};
}
}
}
Yes, raising an exception seems the only out-of-band way.
You could use qi::on_error to trap and respond to it.
However, it's a bit unclear what you need this for. It seems a bit upside down to use split inside a parser. Splitting is basically a poor version of parsing.
Why not have a rule for the sub-parsing?
1. Simple Throw...
Live On Coliru
#include <boost/algorithm/string.hpp>
#include <boost/spirit/include/qi.hpp>
#include <iomanip>
namespace qi = boost::spirit::qi;
struct Invalid {};
struct TwoWords {
std::string one, two;
};
namespace boost { namespace spirit { namespace traits {
template <> struct transform_attribute<TwoWords, std::vector<char>, qi::domain> {
typedef std::vector<char> type;
static type pre(TwoWords &) { return {}; }
static void post(TwoWords &val, type const &attr) {
std::string stringed(attr.begin(), attr.end());
std::vector<std::string> strs;
boost::split(strs, stringed, boost::is_any_of(","));
if (strs.size() != 2) {
throw Invalid{};
}
val = TwoWords{ strs.at(0), strs.at(1) };
}
static void fail(TwoWords &) {}
};
} } }
template <typename It>
struct Demo1 : qi::grammar<It, TwoWords()> {
Demo1() : Demo1::base_type(start) {
start = qi::attr_cast<TwoWords>(+qi::char_);
}
private:
qi::rule<It, TwoWords()> start;
};
int main() {
Demo1<std::string::const_iterator> parser;
for (std::string const input : { ",", "a,b", "a,b,c" }) {
std::cout << "Parsing " << std::quoted(input) << " -> ";
TwoWords tw;
try {
if (parse(input.begin(), input.end(), parser, tw)) {
std::cout << std::quoted(tw.one) << ", " << std::quoted(tw.two) << "\n";
} else {
std::cout << "Failed\n";
}
} catch(Invalid) {
std::cout << "Input invalid\n";
}
}
}
Prints
Parsing "," -> "", ""
Parsing "a,b" -> "a", "b"
Parsing "a,b,c" -> Input invalid
2. Handling Errors Inside The Parser
This feels a bit hacky because it will require you to throw a expectation_failure.
This is not optimal since it assumes you know the iterator the parser is going to be instantiated with.
on_error was designed for use with expectation points
*Live On Coliru
#include <boost/algorithm/string.hpp>
#include <boost/spirit/include/qi.hpp>
#include <iomanip>
namespace qi = boost::spirit::qi;
struct Invalid {};
struct TwoWords {
std::string one, two;
};
namespace boost { namespace spirit { namespace traits {
template <> struct transform_attribute<TwoWords, std::vector<char>, qi::domain> {
typedef std::vector<char> type;
static type pre(TwoWords &) { return {}; }
static void post(TwoWords &val, type const &attr) {
std::string stringed(attr.begin(), attr.end());
std::vector<std::string> strs;
boost::split(strs, stringed, boost::is_any_of(","));
if (strs.size() != 2) {
throw qi::expectation_failure<std::string::const_iterator>({}, {}, info("test"));
}
val = TwoWords{ strs.at(0), strs.at(1) };
}
static void fail(TwoWords &) {}
};
} } }
template <typename It>
struct Demo2 : qi::grammar<It, TwoWords()> {
Demo2() : Demo2::base_type(start) {
start = qi::attr_cast<TwoWords>(+qi::char_);
qi::on_error(start, [](auto&&...){});
// more verbose spelling:
// qi::on_error<qi::error_handler_result::fail> (start, [](auto&&...){[>no-op<]});
}
private:
qi::rule<It, TwoWords()> start;
};
int main() {
Demo2<std::string::const_iterator> parser;
for (std::string const input : { ",", "a,b", "a,b,c" }) {
std::cout << "Parsing " << std::quoted(input) << " -> ";
TwoWords tw;
try {
if (parse(input.begin(), input.end(), parser, tw)) {
std::cout << std::quoted(tw.one) << ", " << std::quoted(tw.two) << "\n";
} else {
std::cout << "Failed\n";
}
} catch(Invalid) {
std::cout << "Input invalid\n";
}
}
}
Prints
Parsing "," -> "", ""
Parsing "a,b" -> "a", "b"
Parsing "a,b,c" -> Failed
3. Finally: Sub-rules Rule!
Let's assume a slightly more interesting grammar in which you have a ; separated list of TwoWords:
"foo,bar;a,b"
We parse into a vector of TwoWords:
using Word = std::string;
struct TwoWords { std::string one, two; };
using TwoWordses = std::vector<TwoWords>;
Instead of using traits to "coerce" attributes, we just adapt the struct and rely on automatic attribute propagation:
BOOST_FUSION_ADAPT_STRUCT(TwoWords, one, two)
The parser mimics the data-types:
template <typename It>
struct Demo3 : qi::grammar<It, TwoWordses()> {
Demo3() : Demo3::base_type(start) {
using namespace qi;
word = *(graph - ',' - ';');
twowords = word >> ',' >> word;
start = twowords % ';';
}
private:
qi::rule<It, Word()> word;
qi::rule<It, TwoWords()> twowords;
qi::rule<It, TwoWordses()> start;
};
And the full test is Live On Coliru
#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/include/qi.hpp>
#include <iomanip>
namespace qi = boost::spirit::qi;
using Word = std::string;
struct TwoWords { std::string one, two; };
using TwoWordses = std::vector<TwoWords>;
BOOST_FUSION_ADAPT_STRUCT(TwoWords, one, two);
template <typename It>
struct Demo3 : qi::grammar<It, TwoWordses()> {
Demo3() : Demo3::base_type(start) {
using namespace qi;
word = *(graph - ',' - ';');
twowords = word >> ',' >> word;
start = twowords % ';';
}
private:
qi::rule<It, Word()> word;
qi::rule<It, TwoWords()> twowords;
qi::rule<It, TwoWordses()> start;
};
int main() {
using It = std::string::const_iterator;
Demo3<It> parser;
for (std::string const input : {
",",
"foo,bar",
"foo,bar;qux,bax",
"foo,bar;qux,bax;err,;,ful",
// failing cases or cases with trailing input:
"",
"foo,bar;",
"foo,bar,qux",
})
{
std::cout << "Parsing " << std::quoted(input) << " ->\n";
TwoWordses tws;
It f = input.begin(), l = input.end();
if (parse(f, l, parser, tws)) {
for(auto& tw : tws) {
std::cout << " - " << std::quoted(tw.one) << ", " << std::quoted(tw.two) << "\n";
}
} else {
std::cout << "Failed\n";
}
if (f != l) {
std::cout << "Remaining unparsed input: " << std::quoted(std::string(f,l)) << "\n";
}
}
}
Prints
Parsing "," ->
- "", ""
Parsing "foo,bar" ->
- "foo", "bar"
Parsing "foo,bar;qux,bax" ->
- "foo", "bar"
- "qux", "bax"
Parsing "foo,bar;qux,bax;err,;,ful" ->
- "foo", "bar"
- "qux", "bax"
- "err", ""
- "", "ful"
Parsing "" ->
Failed
Parsing "foo,bar;" ->
- "foo", "bar"
Remaining unparsed input: ";"
Parsing "foo,bar,qux" ->
- "foo", "bar"
Remaining unparsed input: ",qux"
Here is an example of the syntax -- two groups of items:
I_name m_name parameter1=value parameter2=value
I_name m_name parameter1=value \
parameter2=value
My question is how to define the skip-type.
It is not just space_type but space_type minus newline.
But newline followed by backslash is a skip-type.
E.g.
I define name like that:
qi::rule<Iterator, std::string(), ascii::space_type> m_sName;
m_sName %= qi::lexeme[ascii::alpha >> *ascii::alnum];
This is obviously not correct, as the space_type must include newline-backslash.
The following grammar works for me.
*("\\\n" | ~qi::char_('\n')) % '\n'
It will ignore any newline after the backslash. And the following is a simple test.
#include <vector>
#include <string>
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#define BOOST_TEST_MODULE example
#include <boost/test/unit_test.hpp>
typedef std::vector<std::string> Lines;
inline auto ParseLines(std::string const& str) {
Lines lines;
namespace qi = boost::spirit::qi;
if (qi::parse(
str.begin(), str.end(),
*("\\\n" | ~qi::char_('\n')) % '\n',
lines)) {
return lines;
}
else {
throw std::invalid_argument("Parse error at ParseLines");
}
}
BOOST_AUTO_TEST_CASE(TestParseLines) {
std::string const str =
"I_name m_name parameter1=value parameter2=value\n"
"I_name m_name parameter1 = value \\\n"
"parameter2 = value";
Lines const expected{
"I_name m_name parameter1=value parameter2=value",
"I_name m_name parameter1 = value parameter2 = value"
};
BOOST_TEST(ParseLines(str) == expected);
}
You should use "-std=c++14 -lboost_unit_test_framework" for compilation. Anyway, it is easy to convert the code for c++03.
qi::blank is exactly that. It's qi::space without newlines.
You can do this too: ("\\\n" | qi::blank)
To be able to declare a rule with such a skipper, define a skipper grammar:
template <typename It>
struct my_skipper : qi::grammar<It> {
my_skipper() : my_skipper::base_type(start) {}
qi::rule<It> start = ("\\\n" | qi::blank);
};
Full Demo
Live On Coliru
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapted.hpp>
#include <map>
namespace qi = boost::spirit::qi;
namespace ast {
struct record {
std::string iname, mname;
std::map<std::string, std::string> params;
};
using records = std::vector<record>;
}
BOOST_FUSION_ADAPT_STRUCT(ast::record, iname, mname, params)
template <typename It>
struct my_parser : qi::grammar<It, ast::records()> {
using Skipper = qi::rule<It>;
my_parser() : my_parser::base_type(start) {
skipper = ("\\\n" | qi::blank);
name = +qi::graph;
key = +(qi::graph - '=');
param = key >> '=' >> name;
record = name >> name >> *param;
records = *(record >> +qi::eol);
start = qi::skip(qi::copy(skipper)) [ records ];
}
private:
Skipper skipper;
qi::rule<It, ast::records(), Skipper> records;
qi::rule<It, ast::record(), Skipper> record;
qi::rule<It, ast::records()> start;
qi::rule<It, std::pair<std::string, std::string>()> param;
qi::rule<It, std::string()> name, key;
};
int main() {
#if 1
using It = boost::spirit::istream_iterator;
It f(std::cin >> std::noskipws), l;
#else
using It = std::string::const_iterator;
std::string const input = "something here a=1\n";
It f = input.begin(), l = input.end();
#endif
ast::records data;
bool ok = qi::parse(f, l, my_parser<It>(), data);
if (ok) {
std::cout << "Parsed:\n";
for (auto& r : data) {
std::cout << "\t" << r.iname << " " << r.mname;
for (auto& p : r.params)
std::cout << " [" << p.first << ": " << p.second << "]";
std::cout << "\n";
}
} else {
std::cout << "Parse failed\n";
}
if (f!=l)
std::cout << "Remaining input: '" << std::string(f,l) << "'\n";
}
Prints (for the input in your question):
Parsed:
I_name m_name [parameter1: value] [parameter2: value]
I_name m_name [parameter1: value] [parameter2: value]
This is a greatly reduced case of something I'm trying to do in the best way possible. (Certainly though, the question is also about, me trying to understand how best to use spirit though.)
I need to parse data into a struct with several members. The members are simply listed as key-value pairs, so this is straightforward -- however, if some of the keys are different, then in the data that I'm parsing, different values may appear later, or some keys may be omitted. Nevertheless, the data structure that I ultimately parse has a fixed form.
In the example code, my_struct is a struct like this:
struct my_struct {
std::string a;
std::string b;
std::string c;
std::string d;
};
and grammar1 is a grammar which parses strings like this
"a: x b: y c: z d: w"
into structs like this
my_struct{ "x", "y", "z", "w" }
I would like to additionally parse strings like this:
"a: x b: y d-no-c: w"
into structs like this
my_struct{ "x", "y", "", "w" }
and I would ideally like to do this in as simple a way as I can without making unnecessary copies of strings along the way.
My first thought was, the main rule should be rewritten so that it parses "a" and "b", and then selects between two alternatives depending on whether "c" is present or not. This is easy to work out as a grammar, but when we try to get the data-types right for the attributed grammar part of it, I can't seem to get it to work. I tried using std::pair<std::string, std::string> and also fusion::vector for the alternative types, but this cannot apparently be streamed into my struct using qi operator <<. (grammar2 tests are commented out because it doesn't compile.)
My next thought was, we can simply have two alternative forms of the main rule, which are attributed with type my_struct to make sure the attributed parsing works out. Surprisingly though, this implementation is actually broken -- it seems that when the grammar backtracks, it duplicates the a and b fields inside the resulting structure. I did not expect this and I don't know why it happens, do you know? (This is grammar3).
grammar3 has the problem that, even if it worked like I think it should (the tests passed), when the alternative part backtracks, it will have to reparse a and b which is some inefficiency. If we are willing to change our target struct from my_struct to a different struct, then we can use grammar4, which has the same plan as grammar2, but targets a struct in which one of the elements is a std::pair. Then we have move all the strings out of this temporary struct into the format we really wanted.
So, the questions are:
grammar4 works, but is there a way to do something along the lines of grammar2 which is presumably more efficient?
Why does grammar3 fail the tests?
Complete listing:
#define SPIRIT_USE_PHOENIX_V3
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_fusion.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/adapted/struct/define_struct.hpp>
#include <boost/fusion/include/define_struct.hpp>
#include <boost/fusion/include/std_pair.hpp>
#include <iostream>
#include <string>
#include <utility>
namespace qi = boost::spirit::qi;
BOOST_FUSION_DEFINE_STRUCT(
/**/
,
my_struct,
(std::string, a)
(std::string, b)
(std::string, c)
(std::string, d))
template<typename Iterator>
class grammar1 : public qi::grammar<Iterator, my_struct()> {
public:
qi::rule<Iterator, std::string()> id;
qi::rule<Iterator, my_struct()> main;
grammar1() : grammar1::base_type(main) {
using qi::lit;
using qi::char_;
using qi::omit;
using qi::space;
id = omit[ *space ] >> *char_("A-Za-z_") >> omit [ *space ];
main = lit("a:") >> id >> lit("b:") >> id >> lit("c:") >> id >> lit("d:") >> id;
}
};
//typedef std::pair<std::string, std::string> second_part_type;
typedef boost::fusion::vector<std::string, std::string> second_part_type;
template<typename Iterator>
class grammar2 : public qi::grammar<Iterator, my_struct()> {
public:
qi::rule<Iterator, std::string()> id;
qi::rule<Iterator, second_part_type()> with_c;
qi::rule<Iterator, second_part_type()> without_c;
qi::rule<Iterator, my_struct()> main;
grammar2() : grammar2::base_type(main) {
using qi::lit;
using qi::char_;
using qi::omit;
using qi::space;
using qi::attr;
id = omit[ *space ] >> *char_("A-Za-z_") >> omit [ *space ];
with_c = lit("c:") >> id >> lit("d:") >> id;
without_c = attr("") >> lit("d-no-c:") >> id;
main = lit("a:") >> id >> lit("b:") >> id >> (with_c | without_c);
}
};
template<typename Iterator>
class grammar3 : public qi::grammar<Iterator, my_struct()> {
public:
qi::rule<Iterator, std::string()> id;
qi::rule<Iterator, my_struct()> with_c;
qi::rule<Iterator, my_struct()> without_c;
qi::rule<Iterator, my_struct()> main;
grammar3() : grammar3::base_type(main) {
using qi::lit;
using qi::char_;
using qi::omit;
using qi::space;
using qi::attr;
id = omit[ *space ] >> *char_("A-Za-z_") >> omit [ *space ];
with_c = lit("a:") >> id >> lit("b:") >> id >> lit("c:") >> id >> lit("d:") >> id;
without_c = lit("a:") >> id >> lit("b:") >> id >> attr("") >> lit("d-no-c:") >> id;
main = with_c | without_c;
}
};
/***
* Alternate approach
*/
typedef std::pair<std::string, std::string> spair;
BOOST_FUSION_DEFINE_STRUCT(
/**/
,
my_struct2,
(std::string, a)
(std::string, b)
(spair, cd))
template<typename Iterator>
class grammar4 : public qi::grammar<Iterator, my_struct2()> {
public:
qi::rule<Iterator, std::string()> id;
qi::rule<Iterator, spair()> with_c;
qi::rule<Iterator, spair()> without_c;
qi::rule<Iterator, my_struct2()> main;
grammar4() : grammar4::base_type(main) {
using qi::lit;
using qi::char_;
using qi::omit;
using qi::space;
using qi::attr;
id = omit[ *space ] >> *char_("A-Za-z_") >> omit [ *space ];
with_c = lit("c:") >> id >> lit("d:") >> id;
without_c = attr("") >> lit("d-no-c:") >> id;
main = lit("a:") >> id >> lit("b:") >> id >> (with_c | without_c);
}
};
my_struct convert_struct(my_struct2 && s) {
return { std::move(s.a), std::move(s.b), std::move(s.cd.first), std::move(s.cd.second) };
}
/***
* Testing
*/
void check_strings_eq(const std::string & a, const std::string & b, const char * label, int line = 0) {
if (a != b) {
std::cerr << "Mismatch '" << label << "' ";
if (line) { std::cerr << "at line " << line << " "; }
std::cerr << "\"" << a << "\" != \"" << b << "\"\n";
}
}
void check_eq(const my_struct & s, const my_struct & t, int line = 0) {
check_strings_eq(s.a, t.a, "a", line);
check_strings_eq(s.b, t.b, "b", line);
check_strings_eq(s.c, t.c, "c", line);
check_strings_eq(s.d, t.d, "d", line);
}
template<template<typename> class Grammar>
void test_grammar(const std::string & input, const my_struct & expected, int line = 0) {
auto it = input.begin();
auto end = input.end();
Grammar<decltype(it)> grammar;
my_struct result;
if (!qi::parse(it, end, grammar, result)) {
std::cerr << "Failed to parse! ";
if (line) { std::cerr << "line = " << line; }
std::cerr << "\n";
std::cerr << "Stopped at:\n" << input << "\n";
for (auto temp = input.begin(); temp != it; ++temp) { std::cerr << " "; }
std::cerr << "^\n";
} else {
check_eq(result, expected, line);
}
}
int main() {
test_grammar<grammar1> ( "a: x b: y c: z d: w", my_struct{ "x", "y", "z", "w" }, __LINE__);
test_grammar<grammar1> ( "a: asdf b: jkl c: foo d: bar", my_struct{ "asdf", "jkl", "foo", "bar" }, __LINE__ );
//test_grammar<grammar2> ( "a: asdf b: jkl c: foo d: bar", my_struct{ "asdf", "jkl", "foo", "bar" }, __LINE__ );
//test_grammar<grammar2> ( "a: asdf b: jkl d-no-c: bar", my_struct{ "asdf", "jkl", "", "bar" }, __LINE__ );
test_grammar<grammar3> ( "a: asdf b: jkl c: foo d: bar", my_struct{ "asdf", "jkl", "foo", "bar" }, __LINE__);
test_grammar<grammar3> ( "a: asdf b: jkl d-no-c: bar", my_struct{ "asdf", "jkl", "", "bar" }, __LINE__ );
// Test 4th grammar
{
std::string input = "a: asdf b: jkl c: foo d: bar";
auto it = input.begin();
auto end = input.end();
grammar4<decltype(it)> grammar;
my_struct2 result;
if (!qi::parse(it, end, grammar, result)) {
std::cerr << "Failed to parse! Line = " << __LINE__ << std::endl;
} else {
check_eq(convert_struct(std::move(result)), my_struct{ "asdf", "jkl", "foo", "bar" }, __LINE__);
}
}
{
std::string input = "a: asdf b: jkl d-no-c: bar";
auto it = input.begin();
auto end = input.end();
grammar4<decltype(it)> grammar;
my_struct2 result;
if (!qi::parse(it, end, grammar, result)) {
std::cerr << "Failed to parse! Line = " << __LINE__ << std::endl;
} else {
check_eq(convert_struct(std::move(result)), my_struct{ "asdf", "jkl", "", "bar" }, __LINE__);
}
}
}
My suggestion here would be to use a permutation parser indeed.
It is quite a bit more flexible though, so you might wish to add a validation constraint in a semantic action:
Live On Coliru
//#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/tuple/tuple_comparison.hpp>
#include <iostream>
#include <string>
namespace qi = boost::spirit::qi;
struct my_struct {
std::string a,b,c,d;
};
BOOST_FUSION_ADAPT_STRUCT(my_struct, a, b, c, d)
template<typename Iterator>
class grammar : public qi::grammar<Iterator, my_struct()> {
public:
grammar() : grammar::base_type(start) {
using namespace qi;
id = +char_("A-Za-z_");
part = lexeme[lit(_r1) >> ':'] >> id;
main = part(+"a")
^ part(+"b")
^ part(+"c")
^ (part(+"d") | part(+"d-no-c"));
;
start = skip(space) [ main ];
BOOST_SPIRIT_DEBUG_NODES((main)(part))
}
private:
qi::rule<Iterator, std::string()> id;
qi::rule<Iterator, std::string(const char*), qi::space_type> part;
qi::rule<Iterator, my_struct(), qi::space_type> main;
//
qi::rule<Iterator, my_struct()> start;
};
/***
* Testing
*/
void check_strings_eq(const std::string & a, const std::string & b, const char * label) {
if (a != b) {
std::cerr << "Mismatch '" << label << "' \"" << a << "\" != \"" << b << "\"\n";
}
}
void check_eq(const my_struct & s, const my_struct & t) {
check_strings_eq(s.a, t.a, "a");
check_strings_eq(s.b, t.b, "b");
check_strings_eq(s.c, t.c, "c");
check_strings_eq(s.d, t.d, "d");
if (boost::tie(s.a,s.b,s.c,s.d) == boost::tie(t.a,t.b,t.c,t.d))
std::cerr << "struct data matches\n";
}
template<template<typename> class Grammar>
void test_grammar(const std::string &input, const my_struct &expected) {
auto it = input.begin();
auto end = input.end();
Grammar<decltype(it)> grammar;
my_struct result;
if (!qi::parse(it, end, grammar, result)) {
std::cerr << "Failed to parse!\n";
std::cerr << "Stopped at:\n" << input << "\n";
for (auto temp = input.begin(); temp != it; ++temp) {
std::cerr << " ";
}
std::cerr << "^\n";
} else {
check_eq(result, expected);
}
}
int main() {
for (auto&& p : std::vector<std::pair<std::string, my_struct> > {
{"a: x b: y c: z d: w", my_struct{ "x", "y", "z", "w" }},
{"a: x c: z d: w", my_struct{ "x", "" , "z", "w" }},
{"a: x c: z" , my_struct{ "x", "" , "z", "" }},
{" b: y c: z d: w", my_struct{ "" , "y", "z", "w" }},
{"b: y c: z a: x d: w", my_struct{ "x", "y", "z", "w" }},
// if you really need:
{"a: x b: y d-no-c: w", my_struct{ "x", "y", "" , "w" }},
})
{
auto const& input = p.first;
auto const& expected = p.second;
std::cout << "----\nParsing '" << input << "'\n";
test_grammar<grammar> (input, expected);
}
}
Prints
----
Parsing 'a: x b: y c: z d: w'
struct data matches
----
Parsing 'a: x c: z d: w'
struct data matches
----
Parsing 'a: x c: z'
struct data matches
----
Parsing ' b: y c: z d: w'
struct data matches
----
Parsing 'b: y c: z a: x d: w'
struct data matches
----
Parsing 'a: x b: y d-no-c: w'
struct data matches
I want to use boost spirit to parse an expression like
function1(arg1, arg2, function2(arg1, arg2, arg3),
function3(arg1,arg2))
and call corresponding c++ functions. What should be the grammar to parse above expression and call the corresponding c++ function by phoneix::bind()?
I have 2 types of functions to call
1) string functions;
wstring GetSubString(wstring stringToCut, int position, int length);
wstring GetStringToken(wstring stringToTokenize, wstring seperators,
int tokenNumber );
2) Functions that return integer;
int GetCount();
int GetId(wstring srcId, wstring srcType);
Second Answer (more pragmatic)
Here's a second take, for comparison:
Just in case you really didn't want to parse into an abstract syntax tree representation, but rather evaluate the functions on-the-fly during parsing, you can simplify the grammar.
It comes in at 92 lines as opposed to 209 lines in the first answer. It really depends on what you're implementing which approach is more suitable.
This shorter approach has some downsides:
less flexible (not reusable)
less robust (if functions have side effects, they will happen even if parsing fails halfway)
less extensible (the supported functions are hardwired into the grammar1)
Full code:
//#define BOOST_SPIRIT_DEBUG
#define BOOST_SPIRIT_USE_PHOENIX_V3
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/phoenix/function.hpp>
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;
typedef boost::variant<int, std::string> value;
//////////////////////////////////////////////////
// Demo functions:
value AnswerToLTUAE() {
return 42;
}
value ReverseString(value const& input) {
auto& as_string = boost::get<std::string>(input);
return std::string(as_string.rbegin(), as_string.rend());
}
value Concatenate(value const& a, value const& b) {
std::ostringstream oss;
oss << a << b;
return oss.str();
}
BOOST_PHOENIX_ADAPT_FUNCTION_NULLARY(value, AnswerToLTUAE_, AnswerToLTUAE)
BOOST_PHOENIX_ADAPT_FUNCTION(value, ReverseString_, ReverseString, 1)
BOOST_PHOENIX_ADAPT_FUNCTION(value, Concatenate_, Concatenate, 2)
//////////////////////////////////////////////////
// Parser grammar
template <typename It, typename Skipper = qi::space_type>
struct parser : qi::grammar<It, value(), Skipper>
{
parser() : parser::base_type(expr_)
{
using namespace qi;
function_call_ =
(lit("AnswerToLTUAE") > '(' > ')')
[ _val = AnswerToLTUAE_() ]
| (lit("ReverseString") > '(' > expr_ > ')')
[ _val = ReverseString_(_1) ]
| (lit("Concatenate") > '(' > expr_ > ',' > expr_ > ')')
[ _val = Concatenate_(_1, _2) ]
;
string_ = as_string [
lexeme [ "'" >> *~char_("'") >> "'" ]
];
value_ = int_ | string_;
expr_ = function_call_ | value_;
on_error<fail> ( expr_, std::cout
<< phx::val("Error! Expecting ") << _4 << phx::val(" here: \"")
<< phx::construct<std::string>(_3, _2) << phx::val("\"\n"));
BOOST_SPIRIT_DEBUG_NODES((expr_)(function_call_)(value_)(string_))
}
private:
qi::rule<It, value(), Skipper> value_, function_call_, expr_, string_;
};
int main()
{
for (const std::string input: std::vector<std::string> {
"-99",
"'string'",
"AnswerToLTUAE()",
"ReverseString('string')",
"Concatenate('string', 987)",
"Concatenate('The Answer Is ', AnswerToLTUAE())",
})
{
auto f(std::begin(input)), l(std::end(input));
const static parser<decltype(f)> p;
value direct_eval;
bool ok = qi::phrase_parse(f,l,p,qi::space,direct_eval);
if (!ok)
std::cout << "invalid input\n";
else
{
std::cout << "input:\t" << input << "\n";
std::cout << "eval:\t" << direct_eval << "\n\n";
}
if (f!=l) std::cout << "unparsed: '" << std::string(f,l) << "'\n";
}
}
Note how, instead of using BOOST_PHOENIX_ADAPT_FUNCTION* we could have directly used boost::phoenix::bind.
The output is still the same:
input: -99
eval: -99
input: 'string'
eval: string
input: AnswerToLTUAE()
eval: 42
input: ReverseString('string')
eval: gnirts
input: Concatenate('string', 987)
eval: string987
input: Concatenate('The Answer Is ', AnswerToLTUAE())
eval: The Answer Is 42
1 This last downside is easily remedied by using the 'Nabialek Trick'
First Answer (complete)
I've gone and implemented a simple recursive expression grammar for functions having up-to-three parameters:
for (const std::string input: std::vector<std::string> {
"-99",
"'string'",
"AnswerToLTUAE()",
"ReverseString('string')",
"Concatenate('string', 987)",
"Concatenate('The Answer Is ', AnswerToLTUAE())",
})
{
auto f(std::begin(input)), l(std::end(input));
const static parser<decltype(f)> p;
expr parsed_script;
bool ok = qi::phrase_parse(f,l,p,qi::space,parsed_script);
if (!ok)
std::cout << "invalid input\n";
else
{
const static generator<boost::spirit::ostream_iterator> g;
std::cout << "input:\t" << input << "\n";
std::cout << "tree:\t" << karma::format(g, parsed_script) << "\n";
std::cout << "eval:\t" << evaluate(parsed_script) << "\n";
}
if (f!=l) std::cout << "unparsed: '" << std::string(f,l) << "'\n";
}
Which prints:
input: -99
tree: -99
eval: -99
input: 'string'
tree: 'string'
eval: string
input: AnswerToLTUAE()
tree: nullary_function_call()
eval: 42
input: ReverseString('string')
tree: unary_function_call('string')
eval: gnirts
input: Concatenate('string', 987)
tree: binary_function_call('string',987)
eval: string987
input: Concatenate('The Answer Is ', AnswerToLTUAE())
tree: binary_function_call('The Answer Is ',nullary_function_call())
eval: The Answer Is 42
Some notes:
I separated parsing from execution (which is always a good idea IMO)
I implemented function evaluation for zero, one or two parameters (this should be easy to extend)
Values are assumed to be integers or strings (should be easy to extend)
I added a karma generator to display the parsed expression (with a TODO marked in the comment)
I hope this helps:
//#define BOOST_SPIRIT_DEBUG
#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/include/karma.hpp>
#include <boost/variant/recursive_wrapper.hpp>
namespace qi = boost::spirit::qi;
namespace karma = boost::spirit::karma;
namespace phx = boost::phoenix;
typedef boost::variant<int, std::string> value;
typedef boost::variant<value, boost::recursive_wrapper<struct function_call> > expr;
typedef std::function<value() > nullary_function_impl;
typedef std::function<value(value const&) > unary_function_impl;
typedef std::function<value(value const&, value const&)> binary_function_impl;
typedef boost::variant<nullary_function_impl, unary_function_impl, binary_function_impl> function_impl;
typedef qi::symbols<char, function_impl> function_table;
struct function_call
{
typedef std::vector<expr> arguments_t;
function_call() = default;
function_call(function_impl f, arguments_t const& arguments)
: f(f), arguments(arguments) { }
function_impl f;
arguments_t arguments;
};
BOOST_FUSION_ADAPT_STRUCT(function_call, (function_impl, f)(function_call::arguments_t, arguments))
#ifdef BOOST_SPIRIT_DEBUG
namespace std
{
static inline std::ostream& operator<<(std::ostream& os, nullary_function_impl const& f) { return os << "<nullary_function_impl>"; }
static inline std::ostream& operator<<(std::ostream& os, unary_function_impl const& f) { return os << "<unary_function_impl>"; }
static inline std::ostream& operator<<(std::ostream& os, binary_function_impl const& f) { return os << "<binary_function_impl>"; }
}
static inline std::ostream& operator<<(std::ostream& os, function_call const& call) { return os << call.f << "(" << call.arguments.size() << ")"; }
#endif
//////////////////////////////////////////////////
// Evaluation
value evaluate(const expr& e);
struct eval : boost::static_visitor<value>
{
eval() {}
value operator()(const value& v) const
{
return v;
}
value operator()(const function_call& call) const
{
return boost::apply_visitor(invoke(call.arguments), call.f);
}
private:
struct invoke : boost::static_visitor<value>
{
function_call::arguments_t const& _args;
invoke(function_call::arguments_t const& args) : _args(args) {}
value operator()(nullary_function_impl const& f) const {
return f();
}
value operator()(unary_function_impl const& f) const {
auto a = evaluate(_args.at(0));
return f(a);
}
value operator()(binary_function_impl const& f) const {
auto a = evaluate(_args.at(0));
auto b = evaluate(_args.at(1));
return f(a, b);
}
};
};
value evaluate(const expr& e)
{
return boost::apply_visitor(eval(), e);
}
//////////////////////////////////////////////////
// Demo functions:
value AnswerToLTUAE() {
return 42;
}
value ReverseString(value const& input) {
auto& as_string = boost::get<std::string>(input);
return std::string(as_string.rbegin(), as_string.rend());
}
value Concatenate(value const& a, value const& b) {
std::ostringstream oss;
oss << a << b;
return oss.str();
}
//////////////////////////////////////////////////
// Parser grammar
template <typename It, typename Skipper = qi::space_type>
struct parser : qi::grammar<It, expr(), Skipper>
{
parser() : parser::base_type(expr_)
{
using namespace qi;
n_ary_ops.add
("AnswerToLTUAE", nullary_function_impl{ &::AnswerToLTUAE })
("ReverseString", unary_function_impl { &::ReverseString })
("Concatenate" , binary_function_impl { &::Concatenate });
function_call_ = n_ary_ops > '(' > expr_list > ')';
string_ = qi::lexeme [ "'" >> *~qi::char_("'") >> "'" ];
value_ = qi::int_ | string_;
expr_list = -expr_ % ',';
expr_ = function_call_ | value_;
on_error<fail> ( expr_, std::cout
<< phx::val("Error! Expecting ") << _4 << phx::val(" here: \"")
<< phx::construct<std::string>(_3, _2) << phx::val("\"\n"));
BOOST_SPIRIT_DEBUG_NODES((expr_)(expr_list)(function_call_)(value_)(string_))
}
private:
function_table n_ary_ops;
template <typename Attr> using Rule = qi::rule<It, Attr(), Skipper>;
Rule<std::string> string_;
Rule<value> value_;
Rule<function_call> function_call_;
Rule<std::vector<expr>> expr_list;
Rule<expr> expr_;
};
//////////////////////////////////////////////////
// Output generator
template <typename It>
struct generator : karma::grammar<It, expr()>
{
generator() : generator::base_type(expr_)
{
using namespace karma;
nullary_ = eps << "nullary_function_call"; // TODO reverse lookup :)
unary_ = eps << "unary_function_call";
binary_ = eps << "binary_function_call";
function_ = nullary_ | unary_ | binary_;
function_call_ = function_ << expr_list;
expr_list = '(' << -(expr_ % ',') << ')';
value_ = karma::int_ | ("'" << karma::string << "'");
expr_ = function_call_ | value_;
}
private:
template <typename Attr> using Rule = karma::rule<It, Attr()>;
Rule<nullary_function_impl> nullary_;
Rule<unary_function_impl> unary_;
Rule<binary_function_impl> binary_;
Rule<function_impl> function_;
Rule<function_call> function_call_;
Rule<value> value_;
Rule<std::vector<expr>> expr_list;
Rule<expr> expr_;
};
int main()
{
for (const std::string input: std::vector<std::string> {
"-99",
"'string'",
"AnswerToLTUAE()",
"ReverseString('string')",
"Concatenate('string', 987)",
"Concatenate('The Answer Is ', AnswerToLTUAE())",
})
{
auto f(std::begin(input)), l(std::end(input));
const static parser<decltype(f)> p;
expr parsed_script;
bool ok = qi::phrase_parse(f,l,p,qi::space,parsed_script);
if (!ok)
std::cout << "invalid input\n";
else
{
const static generator<boost::spirit::ostream_iterator> g;
std::cout << "input:\t" << input << "\n";
std::cout << "tree:\t" << karma::format(g, parsed_script) << "\n";
std::cout << "eval:\t" << evaluate(parsed_script) << "\n\n";
}
if (f!=l) std::cout << "unparsed: '" << std::string(f,l) << "'\n";
}
}
Given a grammar that synthesizes a user-defined type, how can I write another grammar that:
Reuses the first grammar.
Synthesizes a second, distinct type, using the values underlying the first type?
In the example below, I've followed the Boost Spirit documentation to create a parser, foo_parser, that synthesizes a value of type foo_struct. I'd like to write a second parser, bar_parser, that reuses foo_parser but synthesizes a different (but obviously similar) value of type bar_struct. My naive example, commented out, causes spectacular g++ fireworks. :)
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
namespace s {
using namespace boost::spirit;
using namespace boost::spirit::qi;
using namespace boost::spirit::ascii;
}
struct foo_struct {
int i;
char c;
};
BOOST_FUSION_ADAPT_STRUCT(
foo_struct,
(int, i)
(char, c)
)
struct bar_struct {
int i;
char c;
int j;
};
template <typename Iterator>
struct foo_parser : s::grammar<Iterator, foo_struct()>
{
foo_parser() : foo_parser::base_type(start) {
start %= s::int_ >> s::char_ ;
}
s::rule<Iterator, foo_struct()> start;
};
/*
BOOST_FUSION_ADAPT_STRUCT(
bar_struct,
(int, i)
(char, c)
(int, j)
)
template <typename Iterator>
struct bar_parser : s::grammar<Iterator, bar_struct()>
{
bar_parser() : bar_parser::base_type(start) {
// reuse a foo_parser
start %= foo >> s::int_ ;
}
foo_parser<Iterator> foo;
s::rule<Iterator, bar_struct()> start;
};
*/
I've previously devised this - somewhat - hacky way to address things
struct mybase { int a,b; };
struct myderived : mybase
{
mybase& base;
int c,d;
myderived() : base(*this) { }
};
BOOST_FUSION_ADAPT_STRUCT(mybase, (int,a)(int,b));
BOOST_FUSION_ADAPT_STRUCT(myderived, (mybase,base)(int,c)(int,d));
I'd much prefer a solution based on BOOST_FUSION_ADAPT_ADT, but I wasn't able to get it to work, see also the [spirit-general] mailing list:
http://boost.2283326.n4.nabble.com/struct-derived-struct-fusion-adapted-and-the-sequence-operator-td4259090.html
or search the list for derived base
Here's a full sample of that:
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/adapted.hpp>
struct mybase { int a,b; };
struct myderived : mybase {
myderived() : base(*this) { }
mybase& base;
int c,d;
};
BOOST_FUSION_ADAPT_STRUCT(mybase, (int,a)(int,b));
BOOST_FUSION_ADAPT_STRUCT(myderived, (mybase,base)(int,c)(int,d));
int main()
{
using namespace boost::spirit::qi;
const char input[] = "1 2 3 4";
const char *f(input), *l(f+strlen(input));
rule<const char*, mybase() , space_type> base_ = int_ >> int_;
rule<const char*, myderived(), space_type> derived_ = base_ >> int_ >> int_;
myderived data;
bool ok = phrase_parse(f,l,derived_,space,data);
if (ok) std::cout << "data: " << data.a << ", " << data.b << ", " << data.c << ", " << data.d << "\n";
else std::cerr << "whoops\n";
if (f!=l)
std::cerr << "left: '" << std::string(f,l) << std::endl;
return 0;
}
Output:
data: 1, 2, 3, 4