confusing output when parsing a struct in alternate ways using spirit - c++

This is a greatly reduced case of something I'm trying to do in the best way possible. (Certainly though, the question is also about, me trying to understand how best to use spirit though.)
I need to parse data into a struct with several members. The members are simply listed as key-value pairs, so this is straightforward -- however, if some of the keys are different, then in the data that I'm parsing, different values may appear later, or some keys may be omitted. Nevertheless, the data structure that I ultimately parse has a fixed form.
In the example code, my_struct is a struct like this:
struct my_struct {
std::string a;
std::string b;
std::string c;
std::string d;
};
and grammar1 is a grammar which parses strings like this
"a: x b: y c: z d: w"
into structs like this
my_struct{ "x", "y", "z", "w" }
I would like to additionally parse strings like this:
"a: x b: y d-no-c: w"
into structs like this
my_struct{ "x", "y", "", "w" }
and I would ideally like to do this in as simple a way as I can without making unnecessary copies of strings along the way.
My first thought was, the main rule should be rewritten so that it parses "a" and "b", and then selects between two alternatives depending on whether "c" is present or not. This is easy to work out as a grammar, but when we try to get the data-types right for the attributed grammar part of it, I can't seem to get it to work. I tried using std::pair<std::string, std::string> and also fusion::vector for the alternative types, but this cannot apparently be streamed into my struct using qi operator <<. (grammar2 tests are commented out because it doesn't compile.)
My next thought was, we can simply have two alternative forms of the main rule, which are attributed with type my_struct to make sure the attributed parsing works out. Surprisingly though, this implementation is actually broken -- it seems that when the grammar backtracks, it duplicates the a and b fields inside the resulting structure. I did not expect this and I don't know why it happens, do you know? (This is grammar3).
grammar3 has the problem that, even if it worked like I think it should (the tests passed), when the alternative part backtracks, it will have to reparse a and b which is some inefficiency. If we are willing to change our target struct from my_struct to a different struct, then we can use grammar4, which has the same plan as grammar2, but targets a struct in which one of the elements is a std::pair. Then we have move all the strings out of this temporary struct into the format we really wanted.
So, the questions are:
grammar4 works, but is there a way to do something along the lines of grammar2 which is presumably more efficient?
Why does grammar3 fail the tests?
Complete listing:
#define SPIRIT_USE_PHOENIX_V3
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_fusion.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/adapted/struct/define_struct.hpp>
#include <boost/fusion/include/define_struct.hpp>
#include <boost/fusion/include/std_pair.hpp>
#include <iostream>
#include <string>
#include <utility>
namespace qi = boost::spirit::qi;
BOOST_FUSION_DEFINE_STRUCT(
/**/
,
my_struct,
(std::string, a)
(std::string, b)
(std::string, c)
(std::string, d))
template<typename Iterator>
class grammar1 : public qi::grammar<Iterator, my_struct()> {
public:
qi::rule<Iterator, std::string()> id;
qi::rule<Iterator, my_struct()> main;
grammar1() : grammar1::base_type(main) {
using qi::lit;
using qi::char_;
using qi::omit;
using qi::space;
id = omit[ *space ] >> *char_("A-Za-z_") >> omit [ *space ];
main = lit("a:") >> id >> lit("b:") >> id >> lit("c:") >> id >> lit("d:") >> id;
}
};
//typedef std::pair<std::string, std::string> second_part_type;
typedef boost::fusion::vector<std::string, std::string> second_part_type;
template<typename Iterator>
class grammar2 : public qi::grammar<Iterator, my_struct()> {
public:
qi::rule<Iterator, std::string()> id;
qi::rule<Iterator, second_part_type()> with_c;
qi::rule<Iterator, second_part_type()> without_c;
qi::rule<Iterator, my_struct()> main;
grammar2() : grammar2::base_type(main) {
using qi::lit;
using qi::char_;
using qi::omit;
using qi::space;
using qi::attr;
id = omit[ *space ] >> *char_("A-Za-z_") >> omit [ *space ];
with_c = lit("c:") >> id >> lit("d:") >> id;
without_c = attr("") >> lit("d-no-c:") >> id;
main = lit("a:") >> id >> lit("b:") >> id >> (with_c | without_c);
}
};
template<typename Iterator>
class grammar3 : public qi::grammar<Iterator, my_struct()> {
public:
qi::rule<Iterator, std::string()> id;
qi::rule<Iterator, my_struct()> with_c;
qi::rule<Iterator, my_struct()> without_c;
qi::rule<Iterator, my_struct()> main;
grammar3() : grammar3::base_type(main) {
using qi::lit;
using qi::char_;
using qi::omit;
using qi::space;
using qi::attr;
id = omit[ *space ] >> *char_("A-Za-z_") >> omit [ *space ];
with_c = lit("a:") >> id >> lit("b:") >> id >> lit("c:") >> id >> lit("d:") >> id;
without_c = lit("a:") >> id >> lit("b:") >> id >> attr("") >> lit("d-no-c:") >> id;
main = with_c | without_c;
}
};
/***
* Alternate approach
*/
typedef std::pair<std::string, std::string> spair;
BOOST_FUSION_DEFINE_STRUCT(
/**/
,
my_struct2,
(std::string, a)
(std::string, b)
(spair, cd))
template<typename Iterator>
class grammar4 : public qi::grammar<Iterator, my_struct2()> {
public:
qi::rule<Iterator, std::string()> id;
qi::rule<Iterator, spair()> with_c;
qi::rule<Iterator, spair()> without_c;
qi::rule<Iterator, my_struct2()> main;
grammar4() : grammar4::base_type(main) {
using qi::lit;
using qi::char_;
using qi::omit;
using qi::space;
using qi::attr;
id = omit[ *space ] >> *char_("A-Za-z_") >> omit [ *space ];
with_c = lit("c:") >> id >> lit("d:") >> id;
without_c = attr("") >> lit("d-no-c:") >> id;
main = lit("a:") >> id >> lit("b:") >> id >> (with_c | without_c);
}
};
my_struct convert_struct(my_struct2 && s) {
return { std::move(s.a), std::move(s.b), std::move(s.cd.first), std::move(s.cd.second) };
}
/***
* Testing
*/
void check_strings_eq(const std::string & a, const std::string & b, const char * label, int line = 0) {
if (a != b) {
std::cerr << "Mismatch '" << label << "' ";
if (line) { std::cerr << "at line " << line << " "; }
std::cerr << "\"" << a << "\" != \"" << b << "\"\n";
}
}
void check_eq(const my_struct & s, const my_struct & t, int line = 0) {
check_strings_eq(s.a, t.a, "a", line);
check_strings_eq(s.b, t.b, "b", line);
check_strings_eq(s.c, t.c, "c", line);
check_strings_eq(s.d, t.d, "d", line);
}
template<template<typename> class Grammar>
void test_grammar(const std::string & input, const my_struct & expected, int line = 0) {
auto it = input.begin();
auto end = input.end();
Grammar<decltype(it)> grammar;
my_struct result;
if (!qi::parse(it, end, grammar, result)) {
std::cerr << "Failed to parse! ";
if (line) { std::cerr << "line = " << line; }
std::cerr << "\n";
std::cerr << "Stopped at:\n" << input << "\n";
for (auto temp = input.begin(); temp != it; ++temp) { std::cerr << " "; }
std::cerr << "^\n";
} else {
check_eq(result, expected, line);
}
}
int main() {
test_grammar<grammar1> ( "a: x b: y c: z d: w", my_struct{ "x", "y", "z", "w" }, __LINE__);
test_grammar<grammar1> ( "a: asdf b: jkl c: foo d: bar", my_struct{ "asdf", "jkl", "foo", "bar" }, __LINE__ );
//test_grammar<grammar2> ( "a: asdf b: jkl c: foo d: bar", my_struct{ "asdf", "jkl", "foo", "bar" }, __LINE__ );
//test_grammar<grammar2> ( "a: asdf b: jkl d-no-c: bar", my_struct{ "asdf", "jkl", "", "bar" }, __LINE__ );
test_grammar<grammar3> ( "a: asdf b: jkl c: foo d: bar", my_struct{ "asdf", "jkl", "foo", "bar" }, __LINE__);
test_grammar<grammar3> ( "a: asdf b: jkl d-no-c: bar", my_struct{ "asdf", "jkl", "", "bar" }, __LINE__ );
// Test 4th grammar
{
std::string input = "a: asdf b: jkl c: foo d: bar";
auto it = input.begin();
auto end = input.end();
grammar4<decltype(it)> grammar;
my_struct2 result;
if (!qi::parse(it, end, grammar, result)) {
std::cerr << "Failed to parse! Line = " << __LINE__ << std::endl;
} else {
check_eq(convert_struct(std::move(result)), my_struct{ "asdf", "jkl", "foo", "bar" }, __LINE__);
}
}
{
std::string input = "a: asdf b: jkl d-no-c: bar";
auto it = input.begin();
auto end = input.end();
grammar4<decltype(it)> grammar;
my_struct2 result;
if (!qi::parse(it, end, grammar, result)) {
std::cerr << "Failed to parse! Line = " << __LINE__ << std::endl;
} else {
check_eq(convert_struct(std::move(result)), my_struct{ "asdf", "jkl", "", "bar" }, __LINE__);
}
}
}

My suggestion here would be to use a permutation parser indeed.
It is quite a bit more flexible though, so you might wish to add a validation constraint in a semantic action:
Live On Coliru
//#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/tuple/tuple_comparison.hpp>
#include <iostream>
#include <string>
namespace qi = boost::spirit::qi;
struct my_struct {
std::string a,b,c,d;
};
BOOST_FUSION_ADAPT_STRUCT(my_struct, a, b, c, d)
template<typename Iterator>
class grammar : public qi::grammar<Iterator, my_struct()> {
public:
grammar() : grammar::base_type(start) {
using namespace qi;
id = +char_("A-Za-z_");
part = lexeme[lit(_r1) >> ':'] >> id;
main = part(+"a")
^ part(+"b")
^ part(+"c")
^ (part(+"d") | part(+"d-no-c"));
;
start = skip(space) [ main ];
BOOST_SPIRIT_DEBUG_NODES((main)(part))
}
private:
qi::rule<Iterator, std::string()> id;
qi::rule<Iterator, std::string(const char*), qi::space_type> part;
qi::rule<Iterator, my_struct(), qi::space_type> main;
//
qi::rule<Iterator, my_struct()> start;
};
/***
* Testing
*/
void check_strings_eq(const std::string & a, const std::string & b, const char * label) {
if (a != b) {
std::cerr << "Mismatch '" << label << "' \"" << a << "\" != \"" << b << "\"\n";
}
}
void check_eq(const my_struct & s, const my_struct & t) {
check_strings_eq(s.a, t.a, "a");
check_strings_eq(s.b, t.b, "b");
check_strings_eq(s.c, t.c, "c");
check_strings_eq(s.d, t.d, "d");
if (boost::tie(s.a,s.b,s.c,s.d) == boost::tie(t.a,t.b,t.c,t.d))
std::cerr << "struct data matches\n";
}
template<template<typename> class Grammar>
void test_grammar(const std::string &input, const my_struct &expected) {
auto it = input.begin();
auto end = input.end();
Grammar<decltype(it)> grammar;
my_struct result;
if (!qi::parse(it, end, grammar, result)) {
std::cerr << "Failed to parse!\n";
std::cerr << "Stopped at:\n" << input << "\n";
for (auto temp = input.begin(); temp != it; ++temp) {
std::cerr << " ";
}
std::cerr << "^\n";
} else {
check_eq(result, expected);
}
}
int main() {
for (auto&& p : std::vector<std::pair<std::string, my_struct> > {
{"a: x b: y c: z d: w", my_struct{ "x", "y", "z", "w" }},
{"a: x c: z d: w", my_struct{ "x", "" , "z", "w" }},
{"a: x c: z" , my_struct{ "x", "" , "z", "" }},
{" b: y c: z d: w", my_struct{ "" , "y", "z", "w" }},
{"b: y c: z a: x d: w", my_struct{ "x", "y", "z", "w" }},
// if you really need:
{"a: x b: y d-no-c: w", my_struct{ "x", "y", "" , "w" }},
})
{
auto const& input = p.first;
auto const& expected = p.second;
std::cout << "----\nParsing '" << input << "'\n";
test_grammar<grammar> (input, expected);
}
}
Prints
----
Parsing 'a: x b: y c: z d: w'
struct data matches
----
Parsing 'a: x c: z d: w'
struct data matches
----
Parsing 'a: x c: z'
struct data matches
----
Parsing ' b: y c: z d: w'
struct data matches
----
Parsing 'b: y c: z a: x d: w'
struct data matches
----
Parsing 'a: x b: y d-no-c: w'
struct data matches

Related

Boost X3: Can a variant member be avoided in disjunctions?

I'd like to parse string | (string, int) and store it in a structure that defaults the int component to some value. The attribute of such a construction in X3 is a variant<string, tuple<string, int>>. I was thinking I could have a struct that takes either a string or a (string, int) to automagically be populated:
struct bar
{
bar (std::string x = "", int y = 0) : baz1 {x}, baz2 {y} {}
std::string baz1;
int baz2;
};
BOOST_FUSION_ADAPT_STRUCT (disj::ast::bar, baz1, baz2)
and then simply have:
const x3::rule<class bar, ast::bar> bar = "bar";
using x3::int_;
using x3::ascii::alnum;
auto const bar_def = (+(alnum) | ('(' >> +(alnum) >> ',' >> int_ >> ')')) >> ';';
BOOST_SPIRIT_DEFINE(bar);
However this does not work:
/usr/include/boost/spirit/home/x3/core/detail/parse_into_container.hpp:139:59: error: static assertion failed: Expecting a single element fusion sequence
139 | static_assert(traits::has_size<Attribute, 1>::value,
Setting baz2 to an optional does not help. One way to solve this is to have a variant field or inherit from that type:
struct string_int {
std::string s;
int i;
};
struct foo {
boost::variant<std::string, string_int> var;
};
BOOST_FUSION_ADAPT_STRUCT (disj::ast::string_int, s, i)
BOOST_FUSION_ADAPT_STRUCT (disj::ast::foo, var)
(For some reason, I have to use boost::variant instead of x3::variant for operator<< to work; also, using std::pair or tuple for string_int does not work, but boost::fusion::deque does.) One can then equip foo somehow to get the string and integer.
Question: What is the proper, clean way to do this in X3? Is there a more natural way than this second option and equipping foo with accessors?
Live On Coliru
Sadly the wording in the x3 section is exceedingly sparse and allows it (contrast the Qi section). A quick test confirms it:
Live On Coliru
#include <boost/spirit/home/x3.hpp>
namespace x3 = boost::spirit::x3;
template <typename Expr>
std::string inspect(Expr const& expr) {
using A = typename x3::traits::attribute_of<Expr, x3::unused_type>::type;
return boost::core::demangle(typeid(A).name());
}
int main()
{
std::cout << inspect(x3::double_ | x3::int_) << "\n"; // variant expected
std::cout << inspect(x3::int_ | "bla" >> x3::int_) << "\n"; // variant "understandable"
std::cout << inspect(x3::int_ | x3::int_) << "\n"; // variant suprising:
}
Prints
boost::variant<double, int>
boost::variant<int, int>
boost::variant<int, int>
All Hope Is Not Lost
In your specific case you could trick the system:
auto const bar_def = //
(+x3::alnum >> x3::attr(-1) //
| '(' >> +x3::alnum >> ',' >> x3::int_ >> ')' //
) >> ';';
Note how we "inject" an int value for the first branch. That satisfies the attribute propagation gods:
Live On Coliru
#include <boost/spirit/home/x3.hpp>
#include <boost/fusion/adapted/struct.hpp>
#include <boost/fusion/include/io.hpp>
#include <iomanip>
namespace x3 = boost::spirit::x3;
namespace disj::ast {
struct bar {
std::string x;
int y;
};
using boost::fusion::operator<<;
} // namespace disj::ast
BOOST_FUSION_ADAPT_STRUCT(disj::ast::bar, x, y)
namespace disj::parser {
const x3::rule<class bar, ast::bar> bar = "bar";
auto const bar_def = //
(+x3::alnum >> x3::attr(-1) //
| '(' >> +x3::alnum >> ',' >> x3::int_ >> ')' //
) >> ';';
BOOST_SPIRIT_DEFINE(bar)
}
namespace disj {
void run_tests() {
for (std::string const input : {
"",
";",
"bla;",
"bla, 42;",
"(bla, 42);",
}) {
ast::bar val;
auto f = begin(input), l = end(input);
std::cout << "\n" << quoted(input) << " -> ";
if (phrase_parse(f, l, parser::bar, x3::space, val)) {
std::cout << "Parsed: " << val << "\n";
} else {
std::cout << "Failed\n";
}
if (f!=l) {
std::cout << " -- Remaining " << quoted(std::string_view(f, l)) << "\n";
}
}
}
}
int main()
{
disj::run_tests();
}
Prints
"" -> Failed
";" -> Failed
-- Remaining ";"
"bla;" -> Parsed: (bla -1)
"bla, 42;" -> Failed
-- Remaining "bla, 42;"
"(bla, 42);" -> Parsed: (bla 42)
ยน just today

boost spirit: copy the result in a vector of strings

I want to parse a function (with an arbitrary name and an arbitrary numbers af arguments) in this form:
function(bye, 1, 3, 4, foo)
The arguments could be generic strings comma separated.
And I want to copy the name of the function and the arguments in a vector of strings.
like this
std::vector<std::string> F;
std::string fun = "function(bye, 1, 3, 4, foo)";
// The parser must produce this vector from the example
F[0] == "function"
F[1] == "1"
F[2] == "3"
F[3] == "4"
F[4] == "foo"
I've written the following code by after reading some tutorial but it does not work (In the sense that it not compile).
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_object.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>
#include <iostream>
#include <string>
namespace client
{
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
///////////////////////////////////////////////////////////////////////////////
template <typename Iterator>
struct command_parser : qi::grammar<Iterator, std::vector<std::string>(), ascii::space_type>
{
command_parser() : command_parser::base_type(start)
{
using qi::int_;
using qi::lit;
using qi::double_;
using qi::lexeme;
using ascii::char_;
fn_name = +qi::char_("a-zA-Z");
string = +qi::char_("a-zA-Z_0-9");
rec = *( lit(",") >> string );
start %= fn_name >> lit("(") >> string >> rec >> lit(")") ;
}
qi::rule<Iterator, std::string(), ascii::space_type> fn_name;
qi::rule<Iterator, std::string(), ascii::space_type> string;
qi::rule<Iterator, std::string(), ascii::space_type> rec;
qi::rule<Iterator, std::vector<std::string>, ascii::space_type> start;
};
}
////////////////////////////////////////////////////////////////////////////
// Main program
////////////////////////////////////////////////////////////////////////////
int
main()
{
namespace qi = boost::spirit::qi;
std::cout << "/////////////////////////////////////////////////////////\n\n";
client::command_parser<std::string::iterator> CP;
std::string cmd("fun(1,2,3,4 , 5, foo) ");
std::vector<std::string> VV;
bool result = qi::parse(cmd.begin(), cmd.end(), CP, VV);
if (result) {
for ( auto sss : VV ){
std::cout << sss << std::endl;
}
} else {
std::cout << "Fail" << std::endl;
}
return 0 ;
}
Just for fun, here's my minimalist take on this grammar:
using CallList = std::vector<std::string>;
struct ParseError : std::runtime_error {
ParseError() : std::runtime_error("ParseError") {}
};
// The parse implementation
CallList parse_function_call(std::string const& fun) {
CallList elements;
using namespace boost::spirit::qi;
using It = decltype(begin(fun));
static const rule<It, std::string()> identifier = alpha >> +(alnum | char_('_'));
if (!phrase_parse(begin(fun), end(fun),
identifier >> '(' >> -(lexeme[+~char_(",)")] % ",") >> ')' >> eoi,
space, elements))
throw ParseError{};
return elements;
}
With a little bit of plumbing
// just for test output
using TestResult = std::variant<CallList, ParseError>;
// exceptions are equivalent
static constexpr bool operator==(ParseError const&, ParseError const&)
{ return true; }
static inline std::ostream& operator<<(std::ostream& os, TestResult const& tr) {
using namespace std;
if (holds_alternative<ParseError>(tr)) {
return os << "ParseError";
} else {
auto& list = get<CallList>(tr);
copy(begin(list), end(list), std::experimental::make_ostream_joiner(os << "{", ","));
return os << "}";
}
}
TestResult try_parse(std::string const& fun) {
try { return parse_function_call(fun); }
catch(ParseError const& e) { return e; }
}
Here's a test runner:
for (auto const& [input, expected]: {
Case("function(bye, 1, 3, 4, foo)", CallList{"function", "1", "3", "4", "foo"}),
{"liar(pants on fire)", CallList{"liar", "pants on fire"}},
{"liar('pants on fire')", CallList{"liar", "'pants on fire'"}},
{"nullary()", CallList{"nullary"}},
{"nullary( )", CallList{"nullary"}},
{"zerolength(a,,b)", ParseError{}},
{"zerolength(a, ,b)", ParseError{}},
{"noarglust", ParseError{}},
{"", ParseError{}},
{"()", ParseError{}},
{"1(invalidfunctionname)", ParseError{}},
{"foo(bar) BOGUS", ParseError{}},
})
{
auto const actual = try_parse(input);
bool const ok = (actual == expected);
cout << std::quoted(input) << ": " << (ok? "PASS":"FAIL") << "\n";
if (!ok) {
std::cout << " -- expected: " << expected << "\n";
std::cout << " -- actual: " << actual << "\n";
}
}
Which prints Live On Coliru
"function(bye, 1, 3, 4, foo)": FAIL
-- expected: {function,1,3,4,foo}
-- actual: {function,bye,1,3,4,foo}
"liar(pants on fire)": PASS
"liar('pants on fire')": PASS
"nullary()": PASS
"nullary( )": PASS
"zerolength(a,,b)": PASS
"zerolength(a, ,b)": PASS
"noarglust": PASS
"": PASS
"()": PASS
"1(invalidfunctionname)": PASS
"foo(bar) BOGUS": PASS
Note that your example test-case doesn't pass, but I think that was a mistake in the test case.
Full Listing
Live On Coliru
//#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <experimental/iterator>
#include <variant>
#include <iomanip>
using CallList = std::vector<std::string>;
struct ParseError : std::runtime_error {
ParseError() : std::runtime_error("ParseError") {}
};
// The parse implementation
CallList parse_function_call(std::string const& fun) {
CallList elements;
using namespace boost::spirit::qi;
using It = decltype(begin(fun));
static const rule<It, std::string()> identifier = alpha >> +(alnum | char_('_'));
if (!phrase_parse(begin(fun), end(fun),
identifier >> '(' >> -(lexeme[+~char_(",)")] % ",") >> ')' >> eoi,
space, elements))
throw ParseError{};
return elements;
}
// just for test output
using TestResult = std::variant<CallList, ParseError>;
// exceptions are equivalent
static constexpr bool operator==(ParseError const&, ParseError const&)
{ return true; }
static inline std::ostream& operator<<(std::ostream& os, TestResult const& tr) {
using namespace std;
if (holds_alternative<ParseError>(tr)) {
return os << "ParseError";
} else {
auto& list = get<CallList>(tr);
copy(begin(list), end(list), std::experimental::make_ostream_joiner(os << "{", ","));
return os << "}";
}
}
TestResult try_parse(std::string const& fun) {
try { return parse_function_call(fun); }
catch(ParseError const& e) { return e; }
}
int main() {
using namespace std;
using Case = pair<std::string, TestResult>;
for (auto const& [input, expected]: {
Case("function(bye, 1, 3, 4, foo)", CallList{"function", "1", "3", "4", "foo"}),
{"liar(pants on fire)", CallList{"liar", "pants on fire"}},
{"liar('pants on fire')", CallList{"liar", "'pants on fire'"}},
{"nullary()", CallList{"nullary"}},
{"nullary( )", CallList{"nullary"}},
{"zerolength(a,,b)", ParseError{}},
{"zerolength(a, ,b)", ParseError{}},
{"noarglust", ParseError{}},
{"", ParseError{}},
{"()", ParseError{}},
{"1(invalidfunctionname)", ParseError{}},
{"foo(bar) BOGUS", ParseError{}},
})
{
auto const actual = try_parse(input);
bool const ok = (actual == expected);
cout << std::quoted(input) << ": " << (ok? "PASS":"FAIL") << "\n";
if (!ok) {
std::cout << " -- expected: " << expected << "\n";
std::cout << " -- actual: " << actual << "\n";
}
}
}
I'm correcting my answer per suggestions made by #sehe. All the credit for these corrections go to him. I am referencing your line numbers below. So the first error is from spirit and it says:
incompatible_start_rule:
// If you see the assertion below failing then the start rule
// passed to the constructor of the grammar is not compatible with
// the grammar (i.e. it uses different template parameters).
The signature of the start parser does not match that of the parser deceleration.
22. struct command_parser : qi::grammar<Iterator, std::vector<std::string>(), ascii::space_type>
43. qi::rule<Iterator, std::vector<std::string>, ascii::space_type> start;
I googled this and could not find an explanation but using an object rather than a type is preferable. I did it the other way in my first answer. The proper fix is at line 43:
43. qi::rule<Iterator, std::vector<std::string>(), ascii::space_type> start;
The next spirit error is:
The rule was instantiated with a skipper type but you have not pass
any. Did you use parse instead of phrase_parse?");
So a phrase_parse is required with a skipper. Note that we need a skipper to pass along.
64. using qi::ascii::space;
65. bool result = qi::phrase_parse(cmd.begin(), cmd.end(), CP, space, VV);
Now it compiles and the output is:
fun
1
2345foo
I see that won't do and you are looking to stuff the vector with each of the passed parameters. So you need a rule that is compatible with your attribute and intention. The kleene operator working with a std::string will put all the data into one string. So use your attribute:
41. qi::rule<Iterator, std::vector<std::string>(), ascii::space_type> rec;``
Now as #sehe points out, the skipper with fn_name and string will just concatenate names with spaces and newlines. So don't use skippers there.
39. qi::rule<Iterator, std::string()> fn_name;
40. qi::rule<Iterator, std::string()> string;
The other error I made was to see the %= and call it a list operator. From here, it is a definition operator. I'm not sure why there are two but playing around, it seems you need to use %= with semantic action. Here is the corrected code:
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_object.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>
#include <iostream>
#include <string>
namespace client
{
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
template <typename Iterator>
struct command_parser : qi::grammar<Iterator, std::vector<std::string>(), ascii::space_type>
{
command_parser() : command_parser::base_type(start)
{
using qi::int_;
using qi::lit;
using qi::double_;
using qi::lexeme;
using ascii::char_;
fn_name = +qi::char_("a-zA-Z");
string = +qi::char_("a-zA-Z_0-9");
rec = *(lit(",") >> string);
start %= fn_name >> lit("(") >> string >> rec >> lit(")");
}
qi::rule<Iterator, std::string()> fn_name;
qi::rule<Iterator, std::string()> string;
qi::rule<Iterator, std::vector<std::string>(), ascii::space_type> rec;
qi::rule<Iterator, std::vector<std::string>(), ascii::space_type> start;
};
}
int main()
{
namespace qi = boost::spirit::qi;
client::command_parser<std::string::iterator> CP;
std::string cmd("function(1,2,3,4 , 5, foo) ");
std::vector<std::string> VV;
bool result = qi::phrase_parse(cmd.begin(), cmd.end(), CP, qi::ascii::space, VV);
if (result) {
for (auto sss : VV) {
std::cout << sss << std::endl;
}
}
else {
std::cout << "Fail" << std::endl;
}
return 0;
}
And here is an example using X3:
#include <boost/spirit/home/x3.hpp>
#include <iostream>
#include <vector>
//your attribute, could be more complex, might use namespace
using attr = std::vector<std::string>;
namespace parser {
namespace x3 = boost::spirit::x3;
const auto fn_name = +x3::char_("a-zA-Z");
const auto string = +x3::char_("a-zA-Z_0-9");
const auto start = x3::rule<struct _, attr>() = fn_name >> "(" >> string % ',' >> ")";
}
int main()
{
namespace x3 = boost::spirit::x3;
std::string cmd("fun(1,.2,3,4 , 5, foo) ");
attr VV;
auto it = cmd.begin();
bool result = phrase_parse(it, cmd.end(), parser::start, x3::space, VV);
if (result) {
for (auto sss : VV) {
std::cout << "-> " << sss << std::endl;
}
}
else
std::cout << "Fail at" << std::endl;
return 0;
}

Parsing inherited struct with boost spirit

I have an abstract struct Base with no fields (only abstract methods) and struct A, struct B inheriting from Base with different fields in both.
Is it possible to have a rule for parsing either a A or a B and store the result in a shared_ptr<Base>?
I would like to do it in order to parse some A or B and store it in a container of shared_ptr<Base>.
Here is a definition for the structs:
#include <iostream>
using namespace std;
struct Base
{
virtual void operator() const = 0;
};
struct A : Base
{
int value;
void operator() const override { cout << "Hello A: " << x << endl; };
};
struct B : Base
{
float value;
void operator() const override { cout << "Hello B: " << x << endl; };
};
struct BaseContainer
{
multimap<string, shared_ptr<Base>> container;
}
Let's say a BaseContainer is define by some input formatted like:
name: A value
name: B value
name: A value
where name is a placeholder for a string used as a key for the multimap in BaseContainer, then A or B is a keyword for generating a struct A or struct B, and value is the value stored in the container.
How would I write a parser BaseContainer?
The real example I want to apply it to is more complicated, struct A and struct B does not have same number of fields in it so please don't answer with something too specific to that example.
Thank you.
So there's two questions here, I feel:
How to adapt derived classes Can I use BOOST_FUSION_ADAPT_STRUCT with inherited stuff?
How can I use polymorphic attributes with boost::spirit::qi parsers?
Update
Small demo of approach 2 in the context of this question:
Live On Wandbox
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/adapted.hpp>
#include <map>
#include <iomanip> // std::quoted
struct Base {}; // dynamic polymorphism not required for us now, no problem if you want it
struct A : Base {
int value;
void operator()() const { std::cout << "Hello A: " << value << std::endl; };
};
struct B : Base {
float value;
void operator()() const { std::cout << "Hello B: " << value << std::endl; };
};
using Node = boost::variant<A, B>;
struct BaseContainer {
using Map = std::multimap<std::string, Node>;
Map container;
};
BOOST_FUSION_ADAPT_STRUCT(A, value)
BOOST_FUSION_ADAPT_STRUCT(B, value)
BOOST_FUSION_ADAPT_STRUCT(BaseContainer, container)
namespace qi = boost::spirit::qi;
template <typename It>
struct Parser : qi::grammar<It, BaseContainer()> {
Parser() : Parser::base_type(start) {
using namespace qi;
_key = lexeme['"' >> *('\\' >> char_ | ~char_('"')) >> '"'];
_a_node = "A(" >> int_ >> ")";
_b_node = "B(" >> float_ >> ")";
_node = _a_node | _b_node;
_pair = '{' >> _key >> ',' >> _node >> '}';
_container = '{' >> -(_pair % ',') >> '}';
start = skip(space) [ _container ];
BOOST_SPIRIT_DEBUG_NODES((start)(_container)(_pair)(_key)(_node)(_a_node)(_b_node))
}
private:
qi::rule<It, BaseContainer()> start;
// lexeme
qi::rule<It, std::string()> _key;
using Skipper = qi::space_type;
using Pair = std::pair<std::string, Node>;
qi::rule<It, BaseContainer::Map(), Skipper> _container;
qi::rule<It, Pair(), Skipper> _pair;
qi::rule<It, Node(), Skipper> _node;
qi::rule<It, A(), Skipper> _a_node;
qi::rule<It, B(), Skipper> _b_node;
};
int main() {
Parser<std::string::const_iterator> const p;
for (std::string const input : {
R"({})",
R"({ { "one", A(42) } })",
R"({ { "two", B(3.14) } })",
R"({ { "three", A( -42 ) }, { "four", B( -3.14 ) } })",
})
{
std::cout << "-------\n";
std::cout << "Parsing " << input << "\n";
auto f = begin(input), l = end(input);
BaseContainer result;
if (qi::parse(f, l, p, result)) {
for (auto const& [k,v] : result.container) {
std::cout << " Key " << std::quoted(k) << ": ";
boost::apply_visitor([](auto const& node) { node(); }, v);
}
} else {
std::cout << "Parse failed\n";
}
if (f!=l) {
std::cout << "Remaining unparsed: " << std::quoted(std::string(f,l)) << "\n";
}
}
}
Prints
-------
Parsing {}
-------
Parsing { { "one", A(42) } }
Key "one": Hello A: 42
-------
Parsing { { "two", B(3.14) } }
Key "two": Hello B: 3.14
-------
Parsing { { "three", A( -42 ) }, { "four", B( -3.14 ) } }
Key "four": Hello B: -3.14
Key "three": Hello A: -42

What's the appropriate way to indicate a Qi transform attribute fail?

What's the proper way to indicate a parse fail in a boost::spirit::traits::transform_attribute? Can I throw any old exception, or is there a specific thing it wants me to do?
namespace boost
{
namespace spirit
{
namespace traits
{
template <>
struct transform_attribute<TwoNums, std::vector<char>, qi::domain>
{
typedef std::vector<char> type;
static type pre(TwoWords&) { return{}; }
static void post(TwoWords& val, type const& attr) {
std::string stringed(attr.begin(), attr.end());
//https://stackoverflow.com/questions/236129/the-most-elegant-way-to-iterate-the-words-of-a-string
std::vector<std::string> strs;
boost::split(strs, stringed, ",");
if(strs.size()!=2)
{
//What do I do here?
}
val = TwoWords(strs[0],strs[1]);
}
static void fail(FDate&) { }
};
}
}
}
Yes, raising an exception seems the only out-of-band way.
You could use qi::on_error to trap and respond to it.
However, it's a bit unclear what you need this for. It seems a bit upside down to use split inside a parser. Splitting is basically a poor version of parsing.
Why not have a rule for the sub-parsing?
1. Simple Throw...
Live On Coliru
#include <boost/algorithm/string.hpp>
#include <boost/spirit/include/qi.hpp>
#include <iomanip>
namespace qi = boost::spirit::qi;
struct Invalid {};
struct TwoWords {
std::string one, two;
};
namespace boost { namespace spirit { namespace traits {
template <> struct transform_attribute<TwoWords, std::vector<char>, qi::domain> {
typedef std::vector<char> type;
static type pre(TwoWords &) { return {}; }
static void post(TwoWords &val, type const &attr) {
std::string stringed(attr.begin(), attr.end());
std::vector<std::string> strs;
boost::split(strs, stringed, boost::is_any_of(","));
if (strs.size() != 2) {
throw Invalid{};
}
val = TwoWords{ strs.at(0), strs.at(1) };
}
static void fail(TwoWords &) {}
};
} } }
template <typename It>
struct Demo1 : qi::grammar<It, TwoWords()> {
Demo1() : Demo1::base_type(start) {
start = qi::attr_cast<TwoWords>(+qi::char_);
}
private:
qi::rule<It, TwoWords()> start;
};
int main() {
Demo1<std::string::const_iterator> parser;
for (std::string const input : { ",", "a,b", "a,b,c" }) {
std::cout << "Parsing " << std::quoted(input) << " -> ";
TwoWords tw;
try {
if (parse(input.begin(), input.end(), parser, tw)) {
std::cout << std::quoted(tw.one) << ", " << std::quoted(tw.two) << "\n";
} else {
std::cout << "Failed\n";
}
} catch(Invalid) {
std::cout << "Input invalid\n";
}
}
}
Prints
Parsing "," -> "", ""
Parsing "a,b" -> "a", "b"
Parsing "a,b,c" -> Input invalid
2. Handling Errors Inside The Parser
This feels a bit hacky because it will require you to throw a expectation_failure.
This is not optimal since it assumes you know the iterator the parser is going to be instantiated with.
on_error was designed for use with expectation points
*Live On Coliru
#include <boost/algorithm/string.hpp>
#include <boost/spirit/include/qi.hpp>
#include <iomanip>
namespace qi = boost::spirit::qi;
struct Invalid {};
struct TwoWords {
std::string one, two;
};
namespace boost { namespace spirit { namespace traits {
template <> struct transform_attribute<TwoWords, std::vector<char>, qi::domain> {
typedef std::vector<char> type;
static type pre(TwoWords &) { return {}; }
static void post(TwoWords &val, type const &attr) {
std::string stringed(attr.begin(), attr.end());
std::vector<std::string> strs;
boost::split(strs, stringed, boost::is_any_of(","));
if (strs.size() != 2) {
throw qi::expectation_failure<std::string::const_iterator>({}, {}, info("test"));
}
val = TwoWords{ strs.at(0), strs.at(1) };
}
static void fail(TwoWords &) {}
};
} } }
template <typename It>
struct Demo2 : qi::grammar<It, TwoWords()> {
Demo2() : Demo2::base_type(start) {
start = qi::attr_cast<TwoWords>(+qi::char_);
qi::on_error(start, [](auto&&...){});
// more verbose spelling:
// qi::on_error<qi::error_handler_result::fail> (start, [](auto&&...){[>no-op<]});
}
private:
qi::rule<It, TwoWords()> start;
};
int main() {
Demo2<std::string::const_iterator> parser;
for (std::string const input : { ",", "a,b", "a,b,c" }) {
std::cout << "Parsing " << std::quoted(input) << " -> ";
TwoWords tw;
try {
if (parse(input.begin(), input.end(), parser, tw)) {
std::cout << std::quoted(tw.one) << ", " << std::quoted(tw.two) << "\n";
} else {
std::cout << "Failed\n";
}
} catch(Invalid) {
std::cout << "Input invalid\n";
}
}
}
Prints
Parsing "," -> "", ""
Parsing "a,b" -> "a", "b"
Parsing "a,b,c" -> Failed
3. Finally: Sub-rules Rule!
Let's assume a slightly more interesting grammar in which you have a ; separated list of TwoWords:
"foo,bar;a,b"
We parse into a vector of TwoWords:
using Word = std::string;
struct TwoWords { std::string one, two; };
using TwoWordses = std::vector<TwoWords>;
Instead of using traits to "coerce" attributes, we just adapt the struct and rely on automatic attribute propagation:
BOOST_FUSION_ADAPT_STRUCT(TwoWords, one, two)
The parser mimics the data-types:
template <typename It>
struct Demo3 : qi::grammar<It, TwoWordses()> {
Demo3() : Demo3::base_type(start) {
using namespace qi;
word = *(graph - ',' - ';');
twowords = word >> ',' >> word;
start = twowords % ';';
}
private:
qi::rule<It, Word()> word;
qi::rule<It, TwoWords()> twowords;
qi::rule<It, TwoWordses()> start;
};
And the full test is Live On Coliru
#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/include/qi.hpp>
#include <iomanip>
namespace qi = boost::spirit::qi;
using Word = std::string;
struct TwoWords { std::string one, two; };
using TwoWordses = std::vector<TwoWords>;
BOOST_FUSION_ADAPT_STRUCT(TwoWords, one, two);
template <typename It>
struct Demo3 : qi::grammar<It, TwoWordses()> {
Demo3() : Demo3::base_type(start) {
using namespace qi;
word = *(graph - ',' - ';');
twowords = word >> ',' >> word;
start = twowords % ';';
}
private:
qi::rule<It, Word()> word;
qi::rule<It, TwoWords()> twowords;
qi::rule<It, TwoWordses()> start;
};
int main() {
using It = std::string::const_iterator;
Demo3<It> parser;
for (std::string const input : {
",",
"foo,bar",
"foo,bar;qux,bax",
"foo,bar;qux,bax;err,;,ful",
// failing cases or cases with trailing input:
"",
"foo,bar;",
"foo,bar,qux",
})
{
std::cout << "Parsing " << std::quoted(input) << " ->\n";
TwoWordses tws;
It f = input.begin(), l = input.end();
if (parse(f, l, parser, tws)) {
for(auto& tw : tws) {
std::cout << " - " << std::quoted(tw.one) << ", " << std::quoted(tw.two) << "\n";
}
} else {
std::cout << "Failed\n";
}
if (f != l) {
std::cout << "Remaining unparsed input: " << std::quoted(std::string(f,l)) << "\n";
}
}
}
Prints
Parsing "," ->
- "", ""
Parsing "foo,bar" ->
- "foo", "bar"
Parsing "foo,bar;qux,bax" ->
- "foo", "bar"
- "qux", "bax"
Parsing "foo,bar;qux,bax;err,;,ful" ->
- "foo", "bar"
- "qux", "bax"
- "err", ""
- "", "ful"
Parsing "" ->
Failed
Parsing "foo,bar;" ->
- "foo", "bar"
Remaining unparsed input: ";"
Parsing "foo,bar,qux" ->
- "foo", "bar"
Remaining unparsed input: ",qux"

Treat escaped newline as line continuation

Here is an example of the syntax -- two groups of items:
I_name m_name parameter1=value parameter2=value
I_name m_name parameter1=value \
parameter2=value
My question is how to define the skip-type.
It is not just space_type but space_type minus newline.
But newline followed by backslash is a skip-type.
E.g.
I define name like that:
qi::rule<Iterator, std::string(), ascii::space_type> m_sName;
m_sName %= qi::lexeme[ascii::alpha >> *ascii::alnum];
This is obviously not correct, as the space_type must include newline-backslash.
The following grammar works for me.
*("\\\n" | ~qi::char_('\n')) % '\n'
It will ignore any newline after the backslash. And the following is a simple test.
#include <vector>
#include <string>
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#define BOOST_TEST_MODULE example
#include <boost/test/unit_test.hpp>
typedef std::vector<std::string> Lines;
inline auto ParseLines(std::string const& str) {
Lines lines;
namespace qi = boost::spirit::qi;
if (qi::parse(
str.begin(), str.end(),
*("\\\n" | ~qi::char_('\n')) % '\n',
lines)) {
return lines;
}
else {
throw std::invalid_argument("Parse error at ParseLines");
}
}
BOOST_AUTO_TEST_CASE(TestParseLines) {
std::string const str =
"I_name m_name parameter1=value parameter2=value\n"
"I_name m_name parameter1 = value \\\n"
"parameter2 = value";
Lines const expected{
"I_name m_name parameter1=value parameter2=value",
"I_name m_name parameter1 = value parameter2 = value"
};
BOOST_TEST(ParseLines(str) == expected);
}
You should use "-std=c++14 -lboost_unit_test_framework" for compilation. Anyway, it is easy to convert the code for c++03.
qi::blank is exactly that. It's qi::space without newlines.
You can do this too: ("\\\n" | qi::blank)
To be able to declare a rule with such a skipper, define a skipper grammar:
template <typename It>
struct my_skipper : qi::grammar<It> {
my_skipper() : my_skipper::base_type(start) {}
qi::rule<It> start = ("\\\n" | qi::blank);
};
Full Demo
Live On Coliru
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapted.hpp>
#include <map>
namespace qi = boost::spirit::qi;
namespace ast {
struct record {
std::string iname, mname;
std::map<std::string, std::string> params;
};
using records = std::vector<record>;
}
BOOST_FUSION_ADAPT_STRUCT(ast::record, iname, mname, params)
template <typename It>
struct my_parser : qi::grammar<It, ast::records()> {
using Skipper = qi::rule<It>;
my_parser() : my_parser::base_type(start) {
skipper = ("\\\n" | qi::blank);
name = +qi::graph;
key = +(qi::graph - '=');
param = key >> '=' >> name;
record = name >> name >> *param;
records = *(record >> +qi::eol);
start = qi::skip(qi::copy(skipper)) [ records ];
}
private:
Skipper skipper;
qi::rule<It, ast::records(), Skipper> records;
qi::rule<It, ast::record(), Skipper> record;
qi::rule<It, ast::records()> start;
qi::rule<It, std::pair<std::string, std::string>()> param;
qi::rule<It, std::string()> name, key;
};
int main() {
#if 1
using It = boost::spirit::istream_iterator;
It f(std::cin >> std::noskipws), l;
#else
using It = std::string::const_iterator;
std::string const input = "something here a=1\n";
It f = input.begin(), l = input.end();
#endif
ast::records data;
bool ok = qi::parse(f, l, my_parser<It>(), data);
if (ok) {
std::cout << "Parsed:\n";
for (auto& r : data) {
std::cout << "\t" << r.iname << " " << r.mname;
for (auto& p : r.params)
std::cout << " [" << p.first << ": " << p.second << "]";
std::cout << "\n";
}
} else {
std::cout << "Parse failed\n";
}
if (f!=l)
std::cout << "Remaining input: '" << std::string(f,l) << "'\n";
}
Prints (for the input in your question):
Parsed:
I_name m_name [parameter1: value] [parameter2: value]
I_name m_name [parameter1: value] [parameter2: value]