So I want to write a... well... not-so-simple parser with boost::spirit::qi. I know the bare basics of boost spirit, having gotten acquainted with it for the first time in the past couple of hours.
Basically I need to parse this:
# comment
# other comment
set "Myset A"
{
figure "AF 1"
{
i 0 0 0
i 1 2 5
i 1 1 1
f 3.1 45.11 5.3
i 3 1 5
f 1.1 2.33 5.166
}
figure "AF 2"
{
i 25 5 1
i 3 1 3
}
}
# comment
set "Myset B"
{
figure "BF 1"
{
f 23.1 4.3 5.11
}
}
set "Myset C"
{
include "Myset A" # includes all figures from Myset A
figure "CF"
{
i 1 1 1
f 3.11 5.33 3
}
}
Into this:
struct int_point { int x, y, z; };
struct float_point { float x, y, z; };
struct figure
{
string name;
vector<int_point> int_points;
vector<float_point> float_points;
};
struct figure_set
{
string name;
vector<figure> figures
};
vector<figure_set> figure_sets; // fill with the data of the input
Now, obviously having somebody write it for me would be too much, but can you please provide some tips on what to read and how to structure the grammar and parsers for this task?
And also... it may be the case that boost::spirit is not the best library I could use for the task. If so, which one is?
EDIT:
Here's where I've gotten so far. But I'm not yet sure how to go on: http://liveworkspace.org/code/212c31dfc0b6fbdf6c462d8d931c0e9f
I am able to read a single figure but, I don't yet have an idea how to parse a set of figures.
Here's my take on it
I believe the rule that will have been the blocker for you would be
figure = eps >> "figure"
>> name [ at_c<0>(_val) = _1 ] >> '{' >>
*(
ipoints [ push_back(at_c<1>(_val), _1) ]
| fpoints [ push_back(at_c<2>(_val), _1) ]
) >> '}';
This is actually a symptom of the fact that you parse inter-mixed i and f lines into separate containers.
See below for an alternative.
Here's my full code: test.cpp
//#define BOOST_SPIRIT_DEBUG // before including Spirit
#define BOOST_SPIRIT_USE_PHOENIX_V3
#include <boost/fusion/adapted.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/karma.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/include/phoenix_fusion.hpp>
#include <fstream>
namespace Format
{
struct int_point { int x, y, z; };
struct float_point { float x, y, z; };
struct figure
{
std::string name;
std::vector<int_point> int_points;
std::vector<float_point> float_points;
friend std::ostream& operator<<(std::ostream& os, figure const& o);
};
struct figure_set
{
std::string name;
std::set<std::string> includes;
std::vector<figure> figures;
friend std::ostream& operator<<(std::ostream& os, figure_set const& o);
};
typedef std::vector<figure_set> file_data;
}
BOOST_FUSION_ADAPT_STRUCT(Format::int_point,
(int, x)(int, y)(int, z))
BOOST_FUSION_ADAPT_STRUCT(Format::float_point,
(float, x)(float, y)(float, z))
BOOST_FUSION_ADAPT_STRUCT(Format::figure,
(std::string, name)
(std::vector<Format::int_point>, int_points)
(std::vector<Format::float_point>, float_points))
BOOST_FUSION_ADAPT_STRUCT(Format::figure_set,
(std::string, name)
(std::set<std::string>, includes)
(std::vector<Format::figure>, figures))
namespace Format
{
std::ostream& operator<<(std::ostream& os, figure const& o)
{
using namespace boost::spirit::karma;
return os << format_delimited(
"\n figure" << no_delimit [ '"' << string << '"' ] << "\n {"
<< *("\n i" << int_ << int_ << int_)
<< *("\n f" << float_ << float_ << float_)
<< "\n }"
, ' ', o);
}
std::ostream& operator<<(std::ostream& os, figure_set const& o)
{
using namespace boost::spirit::karma;
return os << format_delimited(
"\nset" << no_delimit [ '"' << string << '"' ] << "\n{"
<< *("\n include " << no_delimit [ '"' << string << '"' ])
<< *stream
<< "\n}"
, ' ', o);
}
}
namespace /*anon*/
{
namespace phx=boost::phoenix;
namespace qi =boost::spirit::qi;
template <typename Iterator> struct skipper
: public qi::grammar<Iterator>
{
skipper() : skipper::base_type(start, "skipper")
{
using namespace qi;
comment = '#' >> *(char_ - eol) >> (eol|eoi);
start = comment | qi::space;
BOOST_SPIRIT_DEBUG_NODE(start);
BOOST_SPIRIT_DEBUG_NODE(comment);
}
private:
qi::rule<Iterator> start, comment;
};
template <typename Iterator> struct parser
: public qi::grammar<Iterator, Format::file_data(), skipper<Iterator> >
{
parser() : parser::base_type(start, "parser")
{
using namespace qi;
using phx::push_back;
using phx::at_c;
name = eps >> lexeme [ '"' >> *~char_('"') >> '"' ];
include = eps >> "include" >> name;
ipoints = eps >> "i" >> int_ >> int_ >> int_;
fpoints = eps >> "f" >> float_ >> float_ >> float_;
figure = eps >> "figure"
>> name [ at_c<0>(_val) = _1 ] >> '{' >>
*(
ipoints [ push_back(at_c<1>(_val), _1) ]
| fpoints [ push_back(at_c<2>(_val), _1) ]
) >> '}';
set = eps >> "set" >> name >> '{' >> *include >> *figure >> '}';
start = *set;
}
private:
qi::rule<Iterator, std::string() , skipper<Iterator> > name, include;
qi::rule<Iterator, Format::int_point() , skipper<Iterator> > ipoints;
qi::rule<Iterator, Format::float_point(), skipper<Iterator> > fpoints;
qi::rule<Iterator, Format::figure() , skipper<Iterator> > figure;
qi::rule<Iterator, Format::figure_set() , skipper<Iterator> > set;
qi::rule<Iterator, Format::file_data() , skipper<Iterator> > start;
};
}
namespace Parser {
bool parsefile(const std::string& spec, Format::file_data& data)
{
std::ifstream in(spec.c_str());
in.unsetf(std::ios::skipws);
std::string v;
v.reserve(4096);
v.insert(v.end(), std::istreambuf_iterator<char>(in.rdbuf()), std::istreambuf_iterator<char>());
if (!in)
return false;
typedef char const * iterator_type;
iterator_type first = &v[0];
iterator_type last = first+v.size();
try
{
parser<iterator_type> p;
skipper<iterator_type> s;
bool r = qi::phrase_parse(first, last, p, s, data);
r = r && (first == last);
if (!r)
std::cerr << spec << ": parsing failed at: \"" << std::string(first, last) << "\"\n";
return r;
}
catch (const qi::expectation_failure<char const *>& e)
{
std::cerr << "FIXME: expected " << e.what_ << ", got '" << std::string(e.first, e.last) << "'" << std::endl;
return false;
}
}
}
int main()
{
Format::file_data data;
bool ok = Parser::parsefile("input.txt", data);
std::cerr << "Parse " << (ok?"success":"failed") << std::endl;
std::cout << "# figure sets exported automatically by karma\n\n";
for (auto& set : data)
std::cout << set;
}
It outputs the parsed data as a verification: output.txt
Parse success
# figure sets exported automatically by karma
set "Myset A"
{
figure "AF 1"
{
i 0 0 0
i 1 2 5
i 1 1 1
i 3 1 5
f 3.1 45.11 5.3
f 1.1 2.33 5.166
}
figure "AF 2"
{
i 25 5 1
i 3 1 3
}
}
set "Myset B"
{
figure "BF 1"
{
f 23.1 4.3 5.11
}
}
set "Myset C"
{
include "Myset A"
figure "CF"
{
i 1 1 1
f 3.11 5.33 3.0
}
}
You will note that
the order of the point lines are changed (all int_points precede all float_points)
also, non-significant digits are added, e.g. in the last line 3.0 instead of 3 to show that the type if float.
you had 'forgotten' (?) about the includes in your question
Alternative
Have something that keeps the actual point lines in original order:
typedef boost::variant<int_point, float_point> if_point;
struct figure
{
std::string name;
std::vector<if_point> if_points;
}
Now the rules become simply:
name = eps >> lexeme [ '"' >> *~char_('"') >> '"' ];
include = eps >> "include" >> name;
ipoints = eps >> "i" >> int_ >> int_ >> int_;
fpoints = eps >> "f" >> float_ >> float_ >> float_;
figure = eps >> "figure" >> name >> '{' >> *(ipoints | fpoints) >> '}';
set = eps >> "set" >> name >> '{' >> *include >> *figure >> '}';
start = *set;
Note the elegance in
figure = eps >> "figure" >> name >> '{' >> *(ipoints | fpoints) >> '}';
And the output stays in the exact order of the input: output.txt
Once again, full demo code (on github only): test.cpp
Bonus update
Finally, I made my first proper Karma grammar to output the results:
name = no_delimit ['"' << string << '"'];
include = "include" << name;
ipoints = "\n i" << int_ << int_ << int_;
fpoints = "\n f" << float_ << float_ << float_;
figure = "figure" << name << "\n {" << *(ipoints | fpoints) << "\n }";
set = "set" << name << "\n{"
<< *("\n " << include)
<< *("\n " << figure) << "\n}";
start = "# figure sets exported automatically by karma\n\n"
<< set % eol;
That was actually considerably more comfortable than I had expected. See it in the lastest version of the fully updated gist: test.hpp
Related
Think about a preprocessor which will read the raw text (no significant white space or tokens).
There are 3 rules.
resolve_para_entry should solve the Argument inside a call. The top-level text is returned as string.
resolve_para should resolve the whole Parameter list and put all the top-level Parameter in a string list.
resolve is the entry
On the way I track the iterator and get the text portion
Samples:
sometext(para) → expect para in the string list
sometext(para1,para2) → expect para1 and para2 in string list
sometext(call(a)) → expect call(a) in the string list
sometext(call(a,b)) ← here it fails; it seams that the "!lit(',')" wont take the Parser to step outside ..
Rules:
resolve_para_entry = +(
(iter_pos >> lit('(') >> (resolve_para_entry | eps) >> lit(')') >> iter_pos) [_val= phoenix::bind(&appendString, _val, _1,_3)]
| (!lit(',') >> !lit(')') >> !lit('(') >> (wide::char_ | wide::space)) [_val = phoenix::bind(&appendChar, _val, _1)]
);
resolve_para = (lit('(') >> lit(')'))[_val = std::vector<std::wstring>()] // empty para -> old style
| (lit('(') >> resolve_para_entry >> *(lit(',') >> resolve_para_entry) > lit(')'))[_val = phoenix::bind(&appendStringList, _val, _1, _2)]
| eps;
;
resolve = (iter_pos >> name_valid >> iter_pos >> resolve_para >> iter_pos);
In the end doesn't seem very elegant. Maybe there is a better way to parse such stuff without skipper
Indeed this should be a lot simpler.
First off, I fail to see why the absense of a skipper is at all relevant.
Second, exposing the raw input is best done using qi::raw[] instead of dancing with iter_pos and clumsy semantic actions¹.
Among the other observations I see:
negating a charset is done with ~, so e.g. ~char_(",()")
(p|eps) would be better spelled -p
(lit('(') >> lit(')')) could be just "()" (after all, there's no skipper, right)
p >> *(',' >> p) is equivalent to p % ','
With the above, resolve_para simplifies to this:
resolve_para = '(' >> -(resolve_para_entry % ',') >> ')';
resolve_para_entry seems weird, to me. It appears that any nested parentheses are simply swallowed. Why not actually parse a recursive grammar so you detect syntax errors?
Here's my take on it:
Define An AST
I prefer to make this the first step because it helps me think about the parser productions:
namespace Ast {
using ArgList = std::list<std::string>;
struct Resolve {
std::string name;
ArgList arglist;
};
using Resolves = std::vector<Resolve>;
}
Creating The Grammar Rules
qi::rule<It, Ast::Resolves()> start;
qi::rule<It, Ast::Resolve()> resolve;
qi::rule<It, Ast::ArgList()> arglist;
qi::rule<It, std::string()> arg, identifier;
And their definitions:
identifier = char_("a-zA-Z_") >> *char_("a-zA-Z0-9_");
arg = raw [ +('(' >> -arg >> ')' | +~char_(",)(")) ];
arglist = '(' >> -(arg % ',') >> ')';
resolve = identifier >> arglist;
start = *qr::seek[hold[resolve]];
Notes:
No more semantic actions
No more eps
No more iter_pos
I've opted to make arglist not-optional. If you really wanted that, change it back:
resolve = identifier >> -arglist;
But in our sample it will generate a lot of noisy output.
Of course your entry point (start) will be different. I just did the simplest thing that could possibly work, using another handy parser directive from the Spirit Repository (like iter_pos that you were already using): seek[]
The hold is there for this reason: boost::spirit::qi duplicate parsing on the output - You might not need it in your actual parser.
Live On Coliru
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/repository/include/qi_seek.hpp>
namespace Ast {
using ArgList = std::list<std::string>;
struct Resolve {
std::string name;
ArgList arglist;
};
using Resolves = std::vector<Resolve>;
}
BOOST_FUSION_ADAPT_STRUCT(Ast::Resolve, name, arglist)
namespace qi = boost::spirit::qi;
namespace qr = boost::spirit::repository::qi;
template <typename It>
struct Parser : qi::grammar<It, Ast::Resolves()>
{
Parser() : Parser::base_type(start) {
using namespace qi;
identifier = char_("a-zA-Z_") >> *char_("a-zA-Z0-9_");
arg = raw [ +('(' >> -arg >> ')' | +~char_(",)(")) ];
arglist = '(' >> -(arg % ',') >> ')';
resolve = identifier >> arglist;
start = *qr::seek[hold[resolve]];
}
private:
qi::rule<It, Ast::Resolves()> start;
qi::rule<It, Ast::Resolve()> resolve;
qi::rule<It, Ast::ArgList()> arglist;
qi::rule<It, std::string()> arg, identifier;
};
#include <iostream>
int main() {
using It = std::string::const_iterator;
std::string const samples = R"--(
Samples:
sometext(para) → expect para in the string list
sometext(para1,para2) → expect para1 and para2 in string list
sometext(call(a)) → expect call(a) in the string list
sometext(call(a,b)) ← here it fails; it seams that the "!lit(',')" wont make the parser step outside
)--";
It f = samples.begin(), l = samples.end();
Ast::Resolves data;
if (parse(f, l, Parser<It>{}, data)) {
std::cout << "Parsed " << data.size() << " resolves\n";
} else {
std::cout << "Parsing failed\n";
}
for (auto& resolve: data) {
std::cout << " - " << resolve.name << "\n (\n";
for (auto& arg : resolve.arglist) {
std::cout << " " << arg << "\n";
}
std::cout << " )\n";
}
}
Prints
Parsed 6 resolves
- sometext
(
para
)
- sometext
(
para1
para2
)
- sometext
(
call(a)
)
- call
(
a
)
- call
(
a
b
)
- lit
(
'
'
)
More Ideas
That last output shows you a problem with your current grammar: lit(',') should obviously not be seen as a call with two parameters.
I recently did an answer on extracting (nested) function calls with parameters which does things more neatly:
Boost spirit parse rule is not applied
or this one boost spirit reporting semantic error
BONUS
Bonus version that uses string_view and also shows exact line/column information of all extracted words.
Note that it still doesn't require any phoenix or semantic actions. Instead it simply defines the necesary trait to assign to boost::string_view from an iterator range.
Live On Coliru
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/repository/include/qi_seek.hpp>
#include <boost/utility/string_view.hpp>
namespace Ast {
using Source = boost::string_view;
using ArgList = std::list<Source>;
struct Resolve {
Source name;
ArgList arglist;
};
using Resolves = std::vector<Resolve>;
}
BOOST_FUSION_ADAPT_STRUCT(Ast::Resolve, name, arglist)
namespace boost { namespace spirit { namespace traits {
template <typename It>
struct assign_to_attribute_from_iterators<boost::string_view, It, void> {
static void call(It f, It l, boost::string_view& attr) {
attr = boost::string_view { f.base(), size_t(std::distance(f.base(),l.base())) };
}
};
} } }
namespace qi = boost::spirit::qi;
namespace qr = boost::spirit::repository::qi;
template <typename It>
struct Parser : qi::grammar<It, Ast::Resolves()>
{
Parser() : Parser::base_type(start) {
using namespace qi;
identifier = raw [ char_("a-zA-Z_") >> *char_("a-zA-Z0-9_") ];
arg = raw [ +('(' >> -arg >> ')' | +~char_(",)(")) ];
arglist = '(' >> -(arg % ',') >> ')';
resolve = identifier >> arglist;
start = *qr::seek[hold[resolve]];
}
private:
qi::rule<It, Ast::Resolves()> start;
qi::rule<It, Ast::Resolve()> resolve;
qi::rule<It, Ast::ArgList()> arglist;
qi::rule<It, Ast::Source()> arg, identifier;
};
#include <iostream>
struct Annotator {
using Ref = boost::string_view;
struct Manip {
Ref fragment, context;
friend std::ostream& operator<<(std::ostream& os, Manip const& m) {
return os << "[" << m.fragment << " at line:" << m.line() << " col:" << m.column() << "]";
}
size_t line() const {
return 1 + std::count(context.begin(), fragment.begin(), '\n');
}
size_t column() const {
return 1 + (fragment.begin() - start_of_line().begin());
}
Ref start_of_line() const {
return context.substr(context.substr(0, fragment.begin()-context.begin()).find_last_of('\n') + 1);
}
};
Ref context;
Manip operator()(Ref what) const { return {what, context}; }
};
int main() {
using It = std::string::const_iterator;
std::string const samples = R"--(Samples:
sometext(para) → expect para in the string list
sometext(para1,para2) → expect para1 and para2 in string list
sometext(call(a)) → expect call(a) in the string list
sometext(call(a,b)) ← here it fails; it seams that the "!lit(',')" wont make the parser step outside
)--";
It f = samples.begin(), l = samples.end();
Ast::Resolves data;
if (parse(f, l, Parser<It>{}, data)) {
std::cout << "Parsed " << data.size() << " resolves\n";
} else {
std::cout << "Parsing failed\n";
}
Annotator annotate{samples};
for (auto& resolve: data) {
std::cout << " - " << annotate(resolve.name) << "\n (\n";
for (auto& arg : resolve.arglist) {
std::cout << " " << annotate(arg) << "\n";
}
std::cout << " )\n";
}
}
Prints
Parsed 6 resolves
- [sometext at line:3 col:1]
(
[para at line:3 col:10]
)
- [sometext at line:4 col:1]
(
[para1 at line:4 col:10]
[para2 at line:4 col:16]
)
- [sometext at line:5 col:1]
(
[call(a) at line:5 col:10]
)
- [call at line:5 col:34]
(
[a at line:5 col:39]
)
- [call at line:6 col:10]
(
[a at line:6 col:15]
[b at line:6 col:17]
)
- [lit at line:6 col:62]
(
[' at line:6 col:66]
[' at line:6 col:68]
)
¹ Boost Spirit: "Semantic actions are evil"?
Can you help me understand the difference between the a % b parser and its expanded a >> *(b >> a) form in Boost.Spirit? Even though the reference manual states that they are equivalent,
The list operator, a % b, is a binary operator that matches a list of one or more repetitions of a separated by occurrences of b. This is equivalent to a >> *(b >> a).
the following program produces different results depending on which is used:
#include <iostream>
#include <string>
#include <vector>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/spirit/include/qi.hpp>
struct Record {
int id;
std::vector<int> values;
};
BOOST_FUSION_ADAPT_STRUCT(Record,
(int, id)
(std::vector<int>, values)
)
int main() {
namespace qi = boost::spirit::qi;
const auto str = std::string{"1: 2, 3, 4"};
const auto rule1 = qi::int_ >> ':' >> (qi::int_ % ',') >> qi::eoi;
const auto rule2 = qi::int_ >> ':' >> (qi::int_ >> *(',' >> qi::int_)) >> qi::eoi;
Record record1;
if (qi::phrase_parse(str.begin(), str.end(), rule1, qi::space, record1)) {
std::cout << record1.id << ": ";
for (const auto& value : record1.values) { std::cout << value << ", "; }
std::cout << '\n';
} else {
std::cerr << "syntax error\n";
}
Record record2;
if (qi::phrase_parse(str.begin(), str.end(), rule2, qi::space, record2)) {
std::cout << record2.id << ": ";
for (const auto& value : record2.values) { std::cout << value << ", "; }
std::cout << '\n';
} else {
std::cerr << "syntax error\n";
}
}
Live on Coliru
1: 2, 3, 4,
1: 2,
rule1 and rule2 are different only in that rule1 uses the list operator ((qi::int_ % ',')) and rule2 uses its expanded form ((qi::int_ >> *(',' >> qi::int_))). However, rule1 produced 1: 2, 3, 4, (as expected) and rule2 produced 1: 2,. I cannot understand the result of rule2: 1) why is it different from that of rule1 and 2) why were 3 and 4 not included in record2.values even though phrase_parse returned true somehow?
Update X3 version added
First off, you fallen into a deep trap here:
Qi rules don't work with auto. Use qi::copy or just used qi::rule<>. Your program has undefined behaviour and indeed it crashed for me (valgrind pointed out where the dangling references originated).
So, first off:
const auto rule = qi::copy(qi::int_ >> ':' >> (qi::int_ % ',') >> qi::eoi);
Now, when you delete the redundancy in the program, you get:
Reproducing the problem
Live On Coliru
int main() {
test(qi::copy(qi::int_ >> ':' >> (qi::int_ % ',')));
test(qi::copy(qi::int_ >> ':' >> (qi::int_ >> *(',' >> qi::int_))));
}
Printing
1: 2, 3, 4,
1: 2,
The cause and the fix
What happened to 3, 4 which was successfully parsed?
Well, the attribute propagation rules indicate that qi::int_ >> *(',' >> qi::int_) exposes a tuple<int, vector<int> >. In a bid to magically DoTheRightThing(TM) Spirit accidentally misfires and "assigngs" the int into the attribute reference, ignoring the remaining vector<int>.
If you want to make container attributes parse as "an atomic group", use qi::as<>:
test(qi::copy(qi::int_ >> ':' >> qi::as<Record::values_t>() [ qi::int_ >> *(',' >> qi::int_)]));
Here as<> acts as a barrier for the attribute compatibility heuristics and the grammar knows what you meant:
Live On Coliru
#include <iostream>
#include <string>
#include <vector>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/spirit/include/qi.hpp>
struct Record {
int id;
using values_t = std::vector<int>;
values_t values;
};
BOOST_FUSION_ADAPT_STRUCT(Record, id, values)
namespace qi = boost::spirit::qi;
template <typename T>
void test(T const& rule) {
const std::string str = "1: 2, 3, 4";
Record record;
if (qi::phrase_parse(str.begin(), str.end(), rule >> qi::eoi, qi::space, record)) {
std::cout << record.id << ": ";
for (const auto& value : record.values) { std::cout << value << ", "; }
std::cout << '\n';
} else {
std::cerr << "syntax error\n";
}
}
int main() {
test(qi::copy(qi::int_ >> ':' >> (qi::int_ % ',')));
test(qi::copy(qi::int_ >> ':' >> (qi::int_ >> *(',' >> qi::int_))));
test(qi::copy(qi::int_ >> ':' >> qi::as<Record::values_t>() [ qi::int_ >> *(',' >> qi::int_)]));
}
Prints
1: 2, 3, 4,
1: 2,
1: 2, 3, 4,
Because it's time to get people started with X3 (the new version of Spirit), and because I like to challenge msyelf to do the corresponding tasks in Spirit X3, here is the Spirit X3 version.
There's no problem with auto in X3.
The "broken" case also behaves much better, triggering this static assertion:
// If you got an error here, then you are trying to pass
// a fusion sequence with the wrong number of elements
// as that expected by the (sequence) parser.
static_assert(
fusion::result_of::size<Attribute>::value == (l_size + r_size)
, "Attribute does not have the expected size."
);
That's nice, right?
The workaround seems a bit less readable:
test(int_ >> ':' >> (rule<struct _, Record::values_t>{} = (int_ >> *(',' >> int_))));
But it would be trivial to write your own as<> "directive" (or just a function), if you wanted:
namespace {
template <typename T>
struct as_type {
template <typename Expr>
auto operator[](Expr&& expr) const {
return x3::rule<struct _, T>{"as"} = x3::as_parser(std::forward<Expr>(expr));
}
};
template <typename T> static const as_type<T> as = {};
}
DEMO
Live On Coliru
#include <iostream>
#include <string>
#include <vector>
#include <boost/fusion/adapted/std_tuple.hpp>
#include <boost/spirit/home/x3.hpp>
struct Record {
int id;
using values_t = std::vector<int>;
values_t values;
};
namespace x3 = boost::spirit::x3;
template <typename T>
void test(T const& rule) {
const std::string str = "1: 2, 3, 4";
Record record;
auto attr = std::tie(record.id, record.values);
if (x3::phrase_parse(str.begin(), str.end(), rule >> x3::eoi, x3::space, attr)) {
std::cout << record.id << ": ";
for (const auto& value : record.values) { std::cout << value << ", "; }
std::cout << '\n';
} else {
std::cerr << "syntax error\n";
}
}
namespace {
template <typename T>
struct as_type {
template <typename Expr>
auto operator[](Expr&& expr) const {
return x3::rule<struct _, T>{"as"} = x3::as_parser(std::forward<Expr>(expr));
}
};
template <typename T> static const as_type<T> as = {};
}
int main() {
using namespace x3;
test(int_ >> ':' >> (int_ % ','));
//test(int_ >> ':' >> (int_ >> *(',' >> int_))); // COMPILER asserts "Attribute does not have the expected size."
// "clumsy" x3 style workaround
test(int_ >> ':' >> (rule<struct _, Record::values_t>{} = (int_ >> *(',' >> int_))));
// using an ad-hoc `as<>` implementation:
test(int_ >> ':' >> as<Record::values_t>[int_ >> *(',' >> int_)]);
}
Prints
1: 2, 3, 4,
1: 2, 3, 4,
1: 2, 3, 4,
From a previous post I found a way to parse with boost::spirit a struct of this type:
"parameter" : {
"name" : "MyName" ,
"type" : "MyType" ,
"unit" : "MyUnit" ,
"cardinality" : "MyCardinality",
"value" : "MyValue"
}
It's a simple JSON with key-value pairs. Now I want to parse this struct regardless to variable orders. I.e. I want to parse into the same object also this struct:
"parameter" : {
"type" : "MyType" ,
"value" : "MyValue" ,
"unit" : "MyUnit" ,
"cardinality" : "MyCardinality",
"name" : "MyName"
}
I know that I can use the ^ operator in order to parse data in any order but I dont't know how to handles commas at ends of lines but last. How can I parse both structures?
This is the #sehe code from previous post. Grammar is defined here.
#define BOOST_SPIRIT_DEBUG
#include <boost/fusion/include/io.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
// This is pasted and copied from another header file
namespace StateMachine {
namespace Private {
struct LuaParameterData {
std::wstring name;
std::wstring type;
std::wstring unit;
std::wstring cardinality;
std::wstring value;
};
} // namespace Private
} // namespace StateMachine
BOOST_FUSION_ADAPT_STRUCT(
StateMachine::Private::LuaParameterData,
(std::wstring, name)
(std::wstring, type)
(std::wstring, unit)
(std::wstring, cardinality)
(std::wstring, value)
)
namespace qi = boost::spirit::qi;
// From here original file continues
namespace StateMachine {
namespace Private {
template<typename Iterator>
struct LuaParameterDataParser : qi::grammar<Iterator, LuaParameterData(), qi::ascii::space_type>
{
LuaParameterDataParser() : LuaParameterDataParser::base_type(start)
{
quotedString = qi::lexeme['"' >> +(qi::ascii::char_ - '"') >> '"'];
start =
qi::lit("\"parameter\"")
>> ':'
>> '{'
>> qi::lit("\"name\"" ) >> ':' >> quotedString >> ','
>> qi::lit("\"type\"" ) >> ':' >> quotedString >> ','
>> qi::lit("\"unit\"" ) >> ':' >> quotedString >> ','
>> qi::lit("\"cardinality\"") >> ':' >> quotedString >> ','
>> qi::lit("\"value\"" ) >> ':' >> quotedString
>> '}'
;
BOOST_SPIRIT_DEBUG_NODES((start)(quotedString));
}
qi::rule<Iterator, std::string(), qi::ascii::space_type> quotedString;
qi::rule<Iterator, LuaParameterData(), qi::ascii::space_type> start;
};
} // namespace Private
} // namespace StateMachine
int main() {
using It = std::string::const_iterator;
std::string const input = R"(
"parameter" : {
"name" : "name" ,
"type" : "type" ,
"unit" : "unit" ,
"cardinality" : "cardinality",
"value" : "value"
}
)";
It f = input.begin(),
l = input.end();
StateMachine::Private::LuaParameterDataParser<It> p;
StateMachine::Private::LuaParameterData data;
bool ok = qi::phrase_parse(f, l, p, qi::ascii::space, data);
if (ok) {
std::wcout << L"Parsed: \n";
std::wcout << L"\tname: " << data.name << L'\n';
std::wcout << L"\ttype: " << data.type << L'\n';
std::wcout << L"\tunit: " << data.unit << L'\n';
std::wcout << L"\tcardinality: " << data.cardinality << L'\n';
std::wcout << L"\tvalue: " << data.value << L'\n';
} else {
std::wcout << L"Parse failure\n";
}
if (f!=l)
std::wcout << L"Remaining unparsed: '" << std::wstring(f,l) << L"'\n";
}
I'm going to refer to a set of recent answers where I've been over things quite extensively:
Parsing heterogeneous data using Boost::Spirit
ad-hoc JSON-like parsing Reading JSON file with C++ and BOOST
application of a more general JSON grammar: Reading JSON file with C++ and BOOST
Tangentially related:
Boost Spirit : something like permutation, but not exactly
http://boost-spirit.com/home/2011/04/16/the-keyword-parser/: the keyword parser
I am trying to parse 2 different type of strings and assign values into structures. For performance I am trying to use boost spirit subrules.
strings can be of the following types
Animal Type | Animal Attributes
Ex
DOG | Name=tim | Barks=Yes | Has a Tail=N | Address=3 infinite loop
BIRD| Name=poc | Tweets=Yes| Address=10 stack overflow street
The values are stored in an array of Dog and Bird structures below
#define BOOST_SPIRIT_USE_PHOENIX_V3
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/repository/include/qi_subrule.hpp>
#include <boost/spirit/include/qi_symbols.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <string>
#include <iostream>
using std::cout;
using std::endl;
using std::cerr;
struct Dog
{
std::string Name;
bool Barks;
bool HasATail;
std::string Address;
};
struct Bird
{
std::string Name;
bool Tweets;
std::string Address;
};
namespace qi = boost::spirit::qi;
namespace repo = boost::spirit::repository;
namespace ascii = boost::spirit::ascii;
namespace phx = boost::phoenix;
template <typename Iterator>
struct ZooGrammar : public qi::grammar<Iterator, ascii::space_type>
{
ZooGrammar() : ZooGrammar::base_type(start_)
{
using qi::char_;
using qi::lit_;
using qi::_1;
using boost::phoenix::ref;
boost::spirit::qi::symbols<char, bool> yesno_;
yesno_.add("Y", true)("N", false);
start_ = (
dog_ | bird_,
dog_ = "DOG" >> lit_[ref(d.Name) = _1]>> '|'
>>"Barks=">>yesno_[ref(d.Barks) = _1] >>'|'
>>"Has a Tail=">>yesno_[ref(d.HasATail) = _1] >> '|'
>>lit_[ref(d.Address) = _1]
,
bird_ = "BIRD" >> lit_[ref(b.Name) = _1]>> '|'
>>"Tweets=">>yesno_[ref(b.Tweets) = _1] >>'|'
>>lit_[ref(b.Address) = _1]
);
}
qi::rule<Iterator, ascii::space_type> start_;
repo::qi::subrule<0> dog_;
repo::qi::subrule<1> bird_;
Bird b;
Dog d;
};
int main()
{
std::string test1="DOG | Name=tim | Barks=Yes | Has a Tail=N | Address=3 infinite loop";
std::string test2="BIRD| Name=poc | Tweets=Yes| Address=10 stack overflow street";
using boost::spirit::ascii::space;
typedef std::string::const_iterator iterator_type;
typedef ZooGrammar<iterator_type> grammar;
iterator_type start = test1.begin();
iterator_type end = test1.end();
ZooGrammar g;
if(boost::spirit::qi::phrase_parse(start, end, g, space))
{
cout<<"matched"<<endl;
}
}
The code above crashes the compiler GCC 4.8 and 4.9. I don't know where I am making the mistake.
Please test run the code above in Coliru link
Many thanks in advance !
Subrules are a bit antiquated. To be honest, I didn't even know there was still such a thing in Spirit V2.
I suggest using regular Spirit V2 attribute propagation, which makes things a bit more readable at once:
dog_ = qi::lit("DOG") >> '|' >> "Name=" >> lit_ >> '|'
>> "Barks=" >> yesno_ >> '|'
>> "Has a Tail=" >> yesno_ >> '|'
>> "Address=" >> lit_
;
bird_ = qi::lit("BIRD") >> '|' >> "Name=" >> lit_ >> '|'
>> "Tweets=" >> yesno_ >> '|'
>> "Address=" >> lit_
;
start_ = dog_ | bird_;
I've imagined a lit_ rule (as qi::lit_ doesn't ring any bells):
lit_ = qi::lexeme [ *~qi::char_('|') ];
Of course, you need to adapt the attribute types as far as they don't have builtin support (as with boost::variant<Dog, Bird>, std::string and bool which are all handled without any additional code):
BOOST_FUSION_ADAPT_STRUCT(Dog,
(std::string, Name)(bool, Barks)(bool, HasATail)(std::string, Address))
BOOST_FUSION_ADAPT_STRUCT(Bird,
(std::string, Name)(bool, Tweets)(std::string, Address))
Now with the program extended to print some debug information, output is: Live On Coliru
Matched: [DOG|Name=tim |Barks=Yes|Has a Tail=No|Address=3 infinite loop]
Matched: [BIRD|Name=poc |Tweets=Yes|Address=10 stack overflow street]
Full Sample Code
//#define BOOST_SPIRIT_DEBUG
#define BOOST_SPIRIT_USE_PHOENIX_V3
#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/qi_symbols.hpp>
static const char* YesNo(bool b) { return b?"Yes":"No"; }
struct Dog {
std::string Name;
bool Barks;
bool HasATail;
std::string Address;
friend std::ostream& operator <<(std::ostream& os, Dog const& o) {
return os << "[DOG|Name=" << o.Name << "|Barks=" << YesNo(o.Barks) << "|Has a Tail=" << YesNo(o.HasATail) << "|Address=" << o.Address << "]";
}
};
struct Bird {
std::string Name;
bool Tweets;
std::string Address;
friend std::ostream& operator <<(std::ostream& os, Bird const& o) {
return os << "[BIRD|Name=" << o.Name << "|Tweets=" << YesNo(o.Tweets) << "|Address=" << o.Address << "]";
}
};
typedef boost::variant<Dog, Bird> ZooAnimal;
BOOST_FUSION_ADAPT_STRUCT(Dog, (std::string, Name)(bool, Barks)(bool, HasATail)(std::string, Address))
BOOST_FUSION_ADAPT_STRUCT(Bird, (std::string, Name)(bool, Tweets)(std::string, Address))
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
template <typename Iterator>
struct ZooGrammar : public qi::grammar<Iterator, ZooAnimal(), ascii::space_type>
{
ZooGrammar() : ZooGrammar::base_type(start_)
{
using qi::_1;
yesno_.add("Yes", true)("Y", true)("No", false)("N", false);
dog_ = qi::lit("DOG") >> '|' >> "Name=" >> lit_ >> '|'
>> "Barks=" >> yesno_ >> '|'
>> "Has a Tail=" >> yesno_ >> '|'
>> "Address=" >> lit_
;
bird_ = qi::lit("BIRD") >> '|' >> "Name=" >> lit_ >> '|'
>> "Tweets=" >> yesno_ >> '|'
>> "Address=" >> lit_
;
start_ = dog_ | bird_;
lit_ = qi::lexeme [ *~qi::char_('|') ];
BOOST_SPIRIT_DEBUG_NODES((dog_)(bird_)(start_)(lit_))
}
private:
qi::rule<Iterator, ZooAnimal(), ascii::space_type> start_;
qi::rule<Iterator, std::string(), ascii::space_type> lit_;
qi::rule<Iterator, Dog(), ascii::space_type> dog_;
qi::rule<Iterator, Bird(), ascii::space_type> bird_;
qi::symbols<char, bool> yesno_;
};
int main()
{
typedef std::string::const_iterator iterator_type;
typedef ZooGrammar<iterator_type> grammar;
for (std::string const input : {
"DOG | Name=tim | Barks=Yes | Has a Tail=N | Address=3 infinite loop",
"BIRD| Name=poc | Tweets=Yes| Address=10 stack overflow street"
})
{
iterator_type start = input.begin();
iterator_type end = input.end();
grammar g;
ZooAnimal animal;
if(qi::phrase_parse(start, end, g, ascii::space, animal))
std::cout << "Matched: " << animal << "\n";
else
std::cout << "Parse failed\n";
if (start != end)
std::cout << "Remaining input: '" << std::string(start, end) << "'\n";
}
}
I was attempting to replicate this example in order to implement C++ like operator precedence rules (I started with a subset, but I eventually plan to add the others).
Try as I might, I could not get the grammar to parse a single binary operation. It would parse literals (44, 3.42, "stackoverflow") just fine, but would fail anything like 3 + 4.
I did look at this question, and this one in an attempt to get my solution to work, but got the same result.
(In an attempt to keep things short, I'll post only the relevant bits here, the full code is here)
Relevant data structures for the AST:
enum class BinaryOperator
{
ADD, SUBTRACT, MULTIPLY, DIVIDE, MODULO, LEFT_SHIFT, RIGHT_SHIFT, EQUAL, NOT_EQUAL, LOWER, LOWER_EQUAL, GREATER, GREATER_EQUAL,
};
typedef boost::variant<double, int, std::string> Litteral;
struct Identifier { std::string name; };
typedef boost::variant<
Litteral,
Identifier,
boost::recursive_wrapper<UnaryOperation>,
boost::recursive_wrapper<BinaryOperation>,
boost::recursive_wrapper<FunctionCall>
> Expression;
struct BinaryOperation
{
Expression rhs, lhs;
BinaryOperator op;
BinaryOperation() {}
BinaryOperation(Expression rhs, BinaryOperator op, Expression lhs) : rhs(rhs), op(op), lhs(lhs) {}
};
The grammar:
template<typename Iterator, typename Skipper>
struct BoltGrammar : qi::grammar<Iterator, Skipper, Program()>
{
BoltGrammar() : BoltGrammar::base_type(start, "start")
{
equalOp.add("==", BinaryOperator::EQUAL)("!=", BinaryOperator::NOT_EQUAL);
equal %= (lowerGreater >> equalOp >> lowerGreater);
equal.name("equal");
lowerGreaterOp.add("<", BinaryOperator::LOWER)("<=", BinaryOperator::LOWER_EQUAL)(">", BinaryOperator::GREATER)(">=", BinaryOperator::GREATER_EQUAL);
lowerGreater %= (shift >> lowerGreaterOp >> shift);
lowerGreater.name("lower or greater");
shiftOp.add("<<", BinaryOperator::LEFT_SHIFT)(">>", BinaryOperator::RIGHT_SHIFT);
shift %= (addSub >> shiftOp >> addSub);
shift.name("shift");
addSubOp.add("+", BinaryOperator::ADD)("-", BinaryOperator::SUBTRACT);
addSub %= (multDivMod >> addSubOp >> multDivMod);
addSub.name("add or sub");
multDivModOp.add("*", BinaryOperator::MULTIPLY)("/", BinaryOperator::DIVIDE)("%", BinaryOperator::MODULO);
multDivMod %= (value >> multDivModOp >> value);
multDivMod.name("mult, div, or mod");
value %= identifier | litteral | ('(' > expression > ')');
value.name("value");
start %= qi::eps >> *(value >> qi::lit(';'));
start.name("start");
expression %= identifier | litteral | equal;
expression.name("expression");
identifier %= qi::lexeme[ascii::char_("a-zA-Z") >> *ascii::char_("0-9a-zA-Z")];
identifier.name("identifier");
litteral %= qi::double_ | qi::int_ | quotedString;
litteral.name("litteral");
quotedString %= qi::lexeme['"' >> +(ascii::char_ - '"') >> '"'];
quotedString.name("quoted string");
namespace phx = boost::phoenix;
using namespace qi::labels;
qi::on_error<qi::fail>(start, std::cout << phx::val("Error! Expecting: ") << _4 << phx::val(" here: \"") << phx::construct<std::string>(_3, _2) << phx::val("\"") << std::endl);
}
qi::symbols<char, BinaryOperator> equalOp, lowerGreaterOp, shiftOp, addSubOp, multDivModOp;
qi::rule<Iterator, Skipper, BinaryOperation()> equal, lowerGreater, shift, addSub, multDivMod;
qi::rule<Iterator, Skipper, Expression()> value;
qi::rule<Iterator, Skipper, Program()> start;
qi::rule<Iterator, Skipper, Expression()> expression;
qi::rule<Iterator, Skipper, Identifier()> identifier;
qi::rule<Iterator, Skipper, Litteral()> litteral;
qi::rule<Iterator, Skipper, std::string()> quotedString;
};
The main problem (indeed) appears to be addressed in that second answer you linked to.
Let me address some points:
the main problem was was compound:
your start rule is
start %= qi::eps >> *(value >> qi::lit(';'));
this means it expects values:
value %= identifier | literal | ('(' > expression > ')');
however, since this parses only identifiers and literals or parenthesized subexpressions, the 3+4 binary operation will never be parsed.
your expression rule, again allows identifier or literal first (redundant/confusing):
expression %= identifier | literal | equal;
I think you'd want something more like
expression = '(' >> expression >> ')' | equal | value;
value = identifier | literal;
// and then
start = qi::eps >> -expression % ';';
your BinaryOperation productions allow only for the case where the operator is present; this breaks the way the rules are nested for operator precedence: a multDivOp would never be accepted as match, unless it happens to be followed by an addSubOp:
addSub %= (multDivMod >> addSubOp >> multDivMod);
multDivMod %= (value >> multDivModOp >> value);
This can best be fixed as shown in the linked answer:
addSub = multDivMod >> -(addSubOp >> multDivMod);
multDivMod = value >> -(multDivModOp >> value);
where you can use semantic actions to build the AST nodes "dynamically":
addSub = multDivMod >> -(addSubOp >> multDivMod) [ _val = phx::construct<BinaryOperation>(_val, _1, _2) ];
multDivMod = value >> -(multDivModOp >> value) [ _val = phx::construct<BinaryOperation>(_val, _1, _2) ];
This beats the 'tedious" declarative approach hands-down (which leads to a lot of backtracking, see e.g. Boost spirit poor performance with Alternative parser)
the literal rule will parse an integer as a double, because it isn't strict:
literal %= qi::double_ | qi::int_ | quotedString;
you can fix this like:
qi::real_parser<double, qi::strict_real_policies<double> > strict_double;
literal = quotedString | strict_double | qi::int_;
FunctionCall should adapt functionName as an Identifier (not std::string)
BOOST_FUSION_ADAPT_STRUCT(FunctionCall, (Identifier, functionName)(std::vector<Expression>, args))
You Expression operator<< could (should) be a boost::static_visitor so that you
eliminate magic type switch numbers
get compiler checking of completeness of the switch
can leverage overload resolution to switch on variant member types
Using c++11, the code could still be inside the one function:
std::ostream& operator<<(std::ostream& os, const Expression& expr)
{
os << "Expression ";
struct v : boost::static_visitor<> {
v(std::ostream& os) : os(os) {}
std::ostream& os;
void operator()(Literal const& e) const { os << "(literal: " << e << ")"; }
void operator()(Identifier const& e) const { os << "(identifier: " << e.name << ")"; }
void operator()(UnaryOperation const& e) const { os << "(unary op: " << boost::fusion::as_vector(e) << ")"; }
void operator()(BinaryOperation const& e) const { os << "(binary op: " << boost::fusion::as_vector(e) << ")"; }
void operator()(FunctionCall const& e) const {
os << "(function call: " << e.functionName << "(";
if (e.args.size() > 0) os << e.args.front();
for (auto it = e.args.begin() + 1; it != e.args.end(); it++) { os << ", " << *it; }
os << ")";
}
};
boost::apply_visitor(v(os), expr);
return os;
}
you can use the BOOST_SPIRIT_DEBUG_NODES macro to name your rules
BOOST_SPIRIT_DEBUG_NODES(
(start)(expression)(identifier)(literal)(quotedString)
(equal)(lowerGreater)(shift)(addSub)(multDivMod)(value)
)
you should include from the spirit/include/ directory, which then relays to spirit/home/ or phoenix/include/ instead of including them directly.
Here is a fully working sample, that also improved the grammar rules for readability Live On Coliru:
//#define BOOST_SPIRIT_DEBUG
#define BOOST_SPIRIT_USE_PHOENIX_V3
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/variant.hpp>
#include <iostream>
#include <string>
#include <vector>
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;
namespace ascii = boost::spirit::ascii;
enum class UnaryOperator
{
NOT,
PLUS,
MINUS,
};
std::ostream& operator<<(std::ostream& os, const UnaryOperator op)
{
switch (op)
{
case UnaryOperator::NOT: return os << "!";
case UnaryOperator::PLUS: return os << "+";
case UnaryOperator::MINUS: return os << "-";
}
assert(false);
}
enum class BinaryOperator
{
ADD, SUBTRACT, MULTIPLY, DIVIDE,
MODULO,
LEFT_SHIFT, RIGHT_SHIFT,
EQUAL, NOT_EQUAL,
LOWER, LOWER_EQUAL,
GREATER, GREATER_EQUAL,
};
std::ostream& operator<<(std::ostream& os, const BinaryOperator op)
{
switch (op)
{
case BinaryOperator::ADD: return os << "+";
case BinaryOperator::SUBTRACT: return os << "-";
case BinaryOperator::MULTIPLY: return os << "*";
case BinaryOperator::DIVIDE: return os << "/";
case BinaryOperator::MODULO: return os << "%";
case BinaryOperator::LEFT_SHIFT: return os << "<<";
case BinaryOperator::RIGHT_SHIFT: return os << ">>";
case BinaryOperator::EQUAL: return os << "==";
case BinaryOperator::NOT_EQUAL: return os << "!=";
case BinaryOperator::LOWER: return os << "<";
case BinaryOperator::LOWER_EQUAL: return os << "<=";
case BinaryOperator::GREATER: return os << ">";
case BinaryOperator::GREATER_EQUAL: return os << ">=";
}
assert(false);
}
typedef boost::variant<
double,
int,
std::string
> Literal;
struct Identifier
{
std::string name;
};
BOOST_FUSION_ADAPT_STRUCT(Identifier, (std::string, name))
struct UnaryOperation;
struct BinaryOperation;
struct FunctionCall;
typedef boost::variant<
Literal,
Identifier,
boost::recursive_wrapper<UnaryOperation>,
boost::recursive_wrapper<BinaryOperation>,
boost::recursive_wrapper<FunctionCall>
> Expression;
struct UnaryOperation
{
Expression rhs;
UnaryOperator op;
};
BOOST_FUSION_ADAPT_STRUCT(UnaryOperation, (Expression,rhs)(UnaryOperator,op))
struct BinaryOperation
{
Expression rhs;
BinaryOperator op;
Expression lhs;
BinaryOperation() {}
BinaryOperation(Expression rhs, BinaryOperator op, Expression lhs) : rhs(rhs), op(op), lhs(lhs) {}
};
BOOST_FUSION_ADAPT_STRUCT(BinaryOperation, (Expression,rhs)(BinaryOperator,op)(Expression,lhs))
struct FunctionCall
{
Identifier functionName;
std::vector<Expression> args;
};
BOOST_FUSION_ADAPT_STRUCT(FunctionCall, (Identifier, functionName)(std::vector<Expression>, args))
struct Program
{
std::vector<Expression> statements;
};
BOOST_FUSION_ADAPT_STRUCT(Program, (std::vector<Expression>, statements))
std::ostream& operator<<(std::ostream& os, const Expression& expr)
{
os << "Expression ";
struct v : boost::static_visitor<> {
v(std::ostream& os) : os(os) {}
std::ostream& os;
void operator()(Literal const& e) const { os << "(literal: " << e << ")"; }
void operator()(Identifier const& e) const { os << "(identifier: " << e.name << ")"; }
void operator()(UnaryOperation const& e) const { os << "(unary op: " << boost::fusion::as_vector(e) << ")"; }
void operator()(BinaryOperation const& e) const { os << "(binary op: " << boost::fusion::as_vector(e) << ")"; }
void operator()(FunctionCall const& e) const {
os << "(function call: " << e.functionName << "(";
if (e.args.size() > 0) os << e.args.front();
for (auto it = e.args.begin() + 1; it != e.args.end(); it++) { os << ", " << *it; }
os << ")";
}
};
boost::apply_visitor(v(os), expr);
return os;
}
std::ostream& operator<<(std::ostream& os, const Program& prog)
{
os << "Program" << std::endl << "{" << std::endl;
for (const Expression& expr : prog.statements)
{
std::cout << "\t" << expr << std::endl;
}
os << "}" << std::endl;
return os;
}
template<typename Iterator, typename Skipper>
struct BoltGrammar : qi::grammar<Iterator, Skipper, Program()>
{
BoltGrammar() : BoltGrammar::base_type(start, "start")
{
using namespace qi::labels;
equalOp.add
("==", BinaryOperator::EQUAL)
("!=", BinaryOperator::NOT_EQUAL);
lowerGreaterOp.add
("<", BinaryOperator::LOWER)
("<=", BinaryOperator::LOWER_EQUAL)
(">", BinaryOperator::GREATER)
(">=", BinaryOperator::GREATER_EQUAL);
shiftOp.add
("<<", BinaryOperator::LEFT_SHIFT)
(">>", BinaryOperator::RIGHT_SHIFT);
addSubOp.add
("+", BinaryOperator::ADD)
("-", BinaryOperator::SUBTRACT);
multDivModOp.add
("*", BinaryOperator::MULTIPLY)
("/", BinaryOperator::DIVIDE)
("%", BinaryOperator::MODULO);
equal = lowerGreater [ _val=_1 ] >> -(equalOp >> lowerGreater) [ _val = phx::construct<BinaryOperation>(_val, _1, _2) ];
lowerGreater = shift [ _val=_1 ] >> -(lowerGreaterOp >> shift) [ _val = phx::construct<BinaryOperation>(_val, _1, _2) ];
shift = addSub [ _val=_1 ] >> -(shiftOp >> addSub) [ _val = phx::construct<BinaryOperation>(_val, _1, _2) ];
addSub = multDivMod [ _val=_1 ] >> -(addSubOp >> multDivMod) [ _val = phx::construct<BinaryOperation>(_val, _1, _2) ];
multDivMod = value [ _val=_1 ] >> -(multDivModOp >> value) [ _val = phx::construct<BinaryOperation>(_val, _1, _2) ];
start = qi::eps >> -expression % ';';
expression = '(' >> expression >> ')' | equal | value;
value = identifier | literal;
identifier = qi::lexeme[ascii::char_("a-zA-Z") >> *ascii::char_("0-9a-zA-Z")];
qi::real_parser<double, qi::strict_real_policies<double> > strict_double;
literal = quotedString | strict_double | qi::int_;
quotedString = qi::lexeme['"' >> +(ascii::char_ - '"') >> '"'];
qi::on_error<qi::fail>(start, std::cout << phx::val("Error! Expecting: ") << _4 << phx::val(" here: \"") << phx::construct<std::string>(_3, _2) << phx::val("\"") << std::endl);
BOOST_SPIRIT_DEBUG_NODES((start)(expression)(identifier)(literal)(quotedString)
(equal)(lowerGreater)(shift)(addSub)(multDivMod)(value)
)
}
qi::symbols<char, BinaryOperator> equalOp, lowerGreaterOp, shiftOp, addSubOp, multDivModOp;
qi::rule<Iterator, Skipper, Expression()> equal, lowerGreater, shift, addSub, multDivMod;
qi::rule<Iterator, Skipper, Expression()> value;
qi::rule<Iterator, Skipper, Program()> start;
qi::rule<Iterator, Skipper, Expression()> expression;
qi::rule<Iterator, Skipper, Identifier()> identifier;
qi::rule<Iterator, Skipper, Literal()> literal;
qi::rule<Iterator, Skipper, std::string()> quotedString;
};
typedef std::string::iterator Iterator;
typedef boost::spirit::ascii::space_type Skipper;
int main()
{
BoltGrammar<Iterator, Skipper> grammar;
std::string str("3; 4.2; \"lounge <c++>\"; 3 + 4;");
Program prog;
Iterator iter = str.begin(), last = str.end();
bool r = phrase_parse(iter, last, grammar, ascii::space, prog);
if (r && iter == last)
{
std::cout << "Parsing succeeded: " << prog << std::endl;
}
else
{
std::cout << "Parsing failed, remaining: " << std::string(iter, last) << std::endl;
}
return 0;
}
Prints:
Parsing succeeded: Program
{
Expression (literal: 3)
Expression (literal: 4.2)
Expression (literal: lounge <c++>)
Expression (binary op: (Expression (literal: 3) + Expression (literal: 4)))
}