I must have missed something with the boost::recursive_wrapper thing, I always get an error:
error: no matching function for call to 'boost::tuples::tuple, std::allocator >, client::compare_attr_op_t, std::basic_string, std::allocator >, boost::tuples::null_type, boost::tuples::null_type, boost::tuples::null_type, boost::tuples::null_type, boost::tuples::null_type, boost::tuples::null_type, boost::tuples::null_type>::tuple(client::expression_value&)'
for the line defining the "expression" in the grammar: expression %= attribute_test | boolean_expression;
Any idea?
namespace client {
enum compare_attr_op_t {
cao_eq,
cao_neq
};
enum boolean_op_t {
bo_and,
bo_or,
bo_not
};
struct expression_value;
typedef boost::tuple<std::string, compare_attr_op_t, std::string> attribute_test_value;
typedef boost::tuple< expression_value, boolean_op_t, expression_value > boolean_expression_value;
typedef boost::variant< attribute_test_value, boost::recursive_wrapper<boolean_expression_value> > sub_expression_value;
struct expression_value {
sub_expression_value value;
};
}
BOOST_FUSION_ADAPT_STRUCT(
client::expression_value,
(client::sub_expression_value, value)
)
namespace client {
struct compare_attr_ : qi::symbols<char, compare_attr_op_t>
{
compare_attr_()
{
add
("=" , cao_eq)
("!=" , cao_neq)
;
}
} compare_attr;
struct boolean_op_ : qi::symbols<char, boolean_op_t>
{
boolean_op_()
{
add
("&" , bo_and)
("|" , bo_or)
;
}
} boolean_op;
template <typename Iterator>
struct attribute_conditions : qi::grammar<Iterator, expression_value(), ascii::space_type>
{
attribute_conditions() : attribute_conditions::base_type(expression)
{
using qi::eps;
using qi::lit;
using qi::_val;
using qi::lexeme;
using qi::_1;
using qi::_2;
using qi::_3;
using ascii::char_;
using ascii::alnum;
using ascii::alpha;
expression %= attribute_test | boolean_expression;
boolean_expression %= expression >> boolean_op >> expression;
attribute_test %= (attribute_name >> compare_attr >> attribute_value)[std::cout << _1 << ' ' << _2 << ' ' << _3];
attribute_name %= alpha >> *alnum;
attribute_value %= lexeme['"' > +(char_ - '"') > '"'];
}
qi::rule<Iterator, expression_value(), ascii::space_type> expression;
qi::rule<Iterator, boolean_expression_value(), ascii::space_type> boolean_expression;
qi::rule<Iterator, attribute_test_value(), ascii::space_type> attribute_test;
qi::rule<Iterator, std::string(), ascii::space_type> attribute_name;
qi::rule<Iterator, std::string(), ascii::space_type> attribute_value;
};
}
I threw it all and started from scratch, giving me this nice and working implementation:
namespace client {
enum compare_attr_op_t {
cao_eq,
cao_neq
};
enum boolean_op_t {
bo_and,
bo_or,
bo_not
};
struct expression_value;
struct or_op_value;
struct and_expression_value;
struct and_op_value;
struct not_op_value;
typedef boost::tuple<std::string, compare_attr_op_t, std::string> attribute_test_value;
typedef boost::variant<attribute_test_value, boost::recursive_wrapper<expression_value>, boost::recursive_wrapper<not_op_value> > node_value;
struct not_op_value {
boolean_op_t operation;
node_value rnode;
};
struct and_op_value {
boolean_op_t operation;
node_value rnode;
};
struct and_expression_value {
node_value lnode;
std::vector<and_op_value> ops;
};
struct or_op_value {
boolean_op_t operation;
and_expression_value rnode;
};
struct expression_value {
and_expression_value lnode;
std::vector<or_op_value> ops;
};
}
BOOST_FUSION_ADAPT_STRUCT(
client::not_op_value,
(client::boolean_op_t, operation)
(client::node_value, rnode)
)
BOOST_FUSION_ADAPT_STRUCT(
client::and_expression_value,
(client::node_value, lnode)
(std::vector<client::and_op_value>, ops)
)
BOOST_FUSION_ADAPT_STRUCT(
client::and_op_value,
(client::boolean_op_t, operation)
(client::node_value, rnode)
)
BOOST_FUSION_ADAPT_STRUCT(
client::expression_value,
(client::and_expression_value, lnode)
(std::vector<client::or_op_value>, ops)
)
BOOST_FUSION_ADAPT_STRUCT(
client::or_op_value,
(client::boolean_op_t, operation)
(client::and_expression_value, rnode)
)
namespace client {
struct compare_attr_ : qi::symbols<char, compare_attr_op_t>
{
compare_attr_()
{
add
("=" , cao_eq)
("!=" , cao_neq)
;
}
} compare_attr;
struct boolean_op_and_t : qi::symbols<char, boolean_op_t>
{
boolean_op_and_t()
{
add
("&" , bo_and)
("and" , bo_and)
;
}
} boolean_op_and;
struct boolean_op_or_t : qi::symbols<char, boolean_op_t>
{
boolean_op_or_t()
{
add
("|" , bo_or)
("or" , bo_or)
;
}
} boolean_op_or;
struct boolean_op_not_t : qi::symbols<char, boolean_op_t>
{
boolean_op_not_t()
{
add
("!" , bo_not)
("not" , bo_not)
;
}
} boolean_op_not;
template <typename Iterator>
struct attribute_conditions : qi::grammar<Iterator, expression_value(), ascii::space_type>
{
attribute_conditions() : attribute_conditions::base_type(expression)
{
using qi::eps;
using qi::lit;
using qi::_val;
using qi::lexeme;
using qi::_1;
using qi::_2;
using qi::_3;
using ascii::char_;
using ascii::alnum;
using ascii::alpha;
using ascii::no_case;
expression %= and_expression >> *(no_case[boolean_op_or] >> and_expression);
and_expression %= node >> *(no_case[boolean_op_and] >> node);
node %= attribute_test | ('(' >> expression >> ')') | not_operation;
not_operation %= no_case[boolean_op_not] >> node;
attribute_test %= (attribute_name >> no_case[compare_attr] >> attribute_value);
attribute_name %= alpha >> *char_("A-Za-z0-9_");
attribute_value %= lexeme['"' > +(char_ - '"') > '"'];
}
qi::rule<Iterator, expression_value(), ascii::space_type> expression;
qi::rule<Iterator, and_expression_value(), ascii::space_type> and_expression;
qi::rule<Iterator, not_op_value(), ascii::space_type> not_operation;
qi::rule<Iterator, node_value(), ascii::space_type> node;
qi::rule<Iterator, attribute_test_value(), ascii::space_type> attribute_test;
qi::rule<Iterator, std::string(), ascii::space_type> attribute_name;
qi::rule<Iterator, std::string(), ascii::space_type> attribute_value;
};
}
This compiles OK for me using the following includes in Visual Studio C++ 2010/Boost 1.36.0. Possibly your compiler cannot handle the templates here.
#include <boost/tuple/tuple.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/adapted.hpp>
using namespace boost::spirit;
Related
I have this example code, which parses the string str correctly.
How to I make it work if there any extra characters before and/or after the string? For example if I did str = std::string("AAA") + str + std::string("AAA")
frame.h
#define BOOST_SPIRIT_USE_PHOENIX_V3
//#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/adapted/std_pair.hpp>
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;
namespace ascii = boost::spirit::ascii;
struct frame
{
std::string addr;
std::string func;
std::string file;
std::string fullname;
std::string line;
std::map<std::string, std::string> kv;
};
template <typename Iterator>
struct argsArray : qi::grammar<Iterator, std::map<std::string, std::string>()>
{
argsArray() : argsArray::base_type(query)
{
query =
qi::lit("args=[") >> pair >> *(qi::lit(',') >> pair) >> qi::lit(']');
pair = qi::lit("{name=") >> quoted_string >> qi::lit(",value=") >>
quoted_string >> qi::lit("}");
key = qi::char_("a-zA-Z_") >> *qi::char_("a-zA-Z_0-9");
quoted_string %= boost::spirit::lexeme['"' >> +(qi::char_ - '"') >> '"'];
}
qi::rule<Iterator, std::map<std::string, std::string>()> query;
qi::rule<Iterator, std::pair<std::string, std::string>()> pair;
qi::rule<Iterator, std::string()> key;
qi::rule<Iterator, std::string()> quoted_string;
};
template <typename Iterator>
struct frameParser : qi::grammar<Iterator, frame(), ascii::space_type>
{
frameParser() : frameParser::base_type(frame_rule)
{
static const auto _addr = phx::bind(&frame::addr, qi::_r1);
static const auto _func = phx::bind(&frame::func, qi::_r1);
static const auto _file = phx::bind(&frame::file, qi::_r1);
static const auto _fullname = phx::bind(&frame::fullname, qi::_r1);
static const auto _line = phx::bind(&frame::line, qi::_r1);
static const auto _kv = phx::bind(&frame::kv, qi::_r1);
func = qi::lit("func=") >> quoted_string;
addr = qi::lit("addr=") >> quoted_string;
file = qi::lit("file=") >> quoted_string;
fullname = qi::lit("fullname=") >> quoted_string;
line = qi::lit("line=") >> quoted_string;
func_rule = func[_func = qi::_1];
addr_rule = addr[_addr = qi::_1];
file_rule = file[_file = qi::_1];
fullname_rule = fullname[_fullname = qi::_1];
line_rule = line[_line = qi::_1];
kv_rule = arrTest[_kv = qi::_1];
quoted_string %= boost::spirit::lexeme['"' >> +(qi::char_ - '"') >> '"'];
frame_rule = qi::lit("frame={") >>
(addr_rule(qi::_val) ^ qi::lit(',') ^ func_rule(qi::_val) ^
qi::lit(',') ^ file_rule(qi::_val) ^ qi::lit(',') ^
fullname_rule(qi::_val) ^ qi::lit(',') ^ line_rule(qi::_val) ^
qi::lit(',') ^ kv_rule(qi::_val)) >>
qi::lit('}');
BOOST_SPIRIT_DEBUG_NODES(
(frame_rule)(func_rule)(addr_rule)(fullname_rule)(line_rule))
}
qi::rule<Iterator, void(frame&), ascii::space_type> func_rule, addr_rule,
file_rule, fullname_rule, line_rule, kv_rule;
qi::rule<Iterator, frame(), ascii::space_type> frame_rule;
qi::rule<Iterator, std::string()> addr, func, file, fullname, line;
qi::rule<Iterator, std::string()> quoted_string;
argsArray<Iterator> arrTest;
};
test.cc
#include <iostream>
#include "gtest/gtest.h"
#include "parser/frame.h"
TEST(ParseFrameString, Test1)
{
std::string str = R"(frame={addr="0x0000000000414008",)"
R"(func="main",)"
R"(args=[{name="argc",value="1"},)"
R"({name="argv",value="0x7fffffffe1a8"}],)"
R"(file="/home/stiopa/development/gdbFront/main.cc",)"
R"(fullname="/home/stiopa/development/gdbFront/main.cc",)"
R"(line="90"}")";
typedef std::string::const_iterator It;
const frameParser<It> g;
It iter(str.begin()), end(str.end());
frame frame;
bool r = phrase_parse(iter, end, g, boost::spirit::ascii::space, frame);
EXPECT_EQ(r, true);
EXPECT_EQ(frame.addr, "0x0000000000414008");
EXPECT_EQ(frame.func, "main");
std::map<std::string, std::string> kv{{"argc", "1"},
{"argv", "0x7fffffffe1a8"}};
EXPECT_EQ(frame.kv, kv);
EXPECT_EQ(frame.file, "/home/stiopa/development/gdbFront/main.cc");
EXPECT_EQ(frame.fullname, "/home/stiopa/development/gdbFront/main.cc");
EXPECT_EQ(frame.line, "90");
}
The simple, low-tech solution would be to use qi::seek from the repository:
#include <boost/spirit/repository/include/qi_seek.hpp>
namespace qir = boost::spirit::repository::qi;
And then:
bool r = phrase_parse(iter, end, qir::seek[g], boost::spirit::ascii::space, frame);
DEMO
Live On Coliru
#define BOOST_SPIRIT_USE_PHOENIX_V3
//#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/adapted/std_pair.hpp>
#include <boost/spirit/repository/include/qi_seek.hpp>
namespace qi = boost::spirit::qi;
namespace qir = boost::spirit::repository::qi;
namespace phx = boost::phoenix;
namespace ascii = boost::spirit::ascii;
struct frame
{
std::string addr;
std::string func;
std::string file;
std::string fullname;
std::string line;
std::map<std::string, std::string> kv;
};
template <typename Iterator>
struct argsArray : qi::grammar<Iterator, std::map<std::string, std::string>()>
{
argsArray() : argsArray::base_type(query)
{
query =
qi::lit("args=[") >> pair >> *(qi::lit(',') >> pair) >> qi::lit(']');
pair = qi::lit("{name=") >> quoted_string >> qi::lit(",value=") >>
quoted_string >> qi::lit("}");
key = qi::char_("a-zA-Z_") >> *qi::char_("a-zA-Z_0-9");
quoted_string %= boost::spirit::lexeme['"' >> +(qi::char_ - '"') >> '"'];
}
qi::rule<Iterator, std::map<std::string, std::string>()> query;
qi::rule<Iterator, std::pair<std::string, std::string>()> pair;
qi::rule<Iterator, std::string()> key;
qi::rule<Iterator, std::string()> quoted_string;
};
template <typename Iterator>
struct frameParser : qi::grammar<Iterator, frame(), ascii::space_type>
{
frameParser() : frameParser::base_type(frame_rule)
{
static const auto _addr = phx::bind(&frame::addr, qi::_r1);
static const auto _func = phx::bind(&frame::func, qi::_r1);
static const auto _file = phx::bind(&frame::file, qi::_r1);
static const auto _fullname = phx::bind(&frame::fullname, qi::_r1);
static const auto _line = phx::bind(&frame::line, qi::_r1);
static const auto _kv = phx::bind(&frame::kv, qi::_r1);
func = qi::lit("func=") >> quoted_string;
addr = qi::lit("addr=") >> quoted_string;
file = qi::lit("file=") >> quoted_string;
fullname = qi::lit("fullname=") >> quoted_string;
line = qi::lit("line=") >> quoted_string;
func_rule = func[_func = qi::_1];
addr_rule = addr[_addr = qi::_1];
file_rule = file[_file = qi::_1];
fullname_rule = fullname[_fullname = qi::_1];
line_rule = line[_line = qi::_1];
kv_rule = arrTest[_kv = qi::_1];
quoted_string %= boost::spirit::lexeme['"' >> +(qi::char_ - '"') >> '"'];
frame_rule = qi::lit("frame={") >>
(addr_rule(qi::_val) ^ qi::lit(',') ^ func_rule(qi::_val) ^
qi::lit(',') ^ file_rule(qi::_val) ^ qi::lit(',') ^
fullname_rule(qi::_val) ^ qi::lit(',') ^ line_rule(qi::_val) ^
qi::lit(',') ^ kv_rule(qi::_val)) >>
qi::lit('}');
BOOST_SPIRIT_DEBUG_NODES(
(frame_rule)(func_rule)(addr_rule)(fullname_rule)(line_rule))
}
qi::rule<Iterator, void(frame&), ascii::space_type> func_rule, addr_rule,
file_rule, fullname_rule, line_rule, kv_rule;
qi::rule<Iterator, frame(), ascii::space_type> frame_rule;
qi::rule<Iterator, std::string()> addr, func, file, fullname, line;
qi::rule<Iterator, std::string()> quoted_string;
argsArray<Iterator> arrTest;
};
#include <iostream>
//#include "parser/frame.h"
int main()
{
std::string str = R"(frame={addr="0x0000000000414008",)"
R"(func="main",)"
R"(args=[{name="argc",value="1"},)"
R"({name="argv",value="0x7fffffffe1a8"}],)"
R"(file="/home/stiopa/development/gdbFront/main.cc",)"
R"(fullname="/home/stiopa/development/gdbFront/main.cc",)"
R"(line="90"}")";
str = "AAA" + str + "AAA";
typedef std::string::const_iterator It;
const frameParser<It> g;
It iter(str.begin()), end(str.end());
frame frame;
bool r = phrase_parse(iter, end, qir::seek[g], boost::spirit::ascii::space, frame);
assert(r == true);
assert(frame.addr == "0x0000000000414008");
assert(frame.func == "main");
std::map<std::string, std::string> kv{{"argc", "1"},
{"argv", "0x7fffffffe1a8"}};
assert(frame.kv == kv);
assert(frame.file == "/home/stiopa/development/gdbFront/main.cc");
assert(frame.fullname == "/home/stiopa/development/gdbFront/main.cc");
assert(frame.line == "90");
}
Tests still pass.
And here's a free code review. Please see
Boost Spirit: "Semantic actions are evil"?
Boost spirit skipper issues
you didn't parse the delimiting ',' correctly at all. You must require it, unless end-of-frame ('}'). You cannot accept multiple in a row
Live On Coliru
#define BOOST_SPIRIT_USE_PHOENIX_V3
//#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/adapted/std_pair.hpp>
#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/repository/include/qi_seek.hpp>
namespace qi = boost::spirit::qi;
namespace qir = boost::spirit::repository::qi;
namespace phx = boost::phoenix;
namespace ascii = boost::spirit::ascii;
struct frame {
std::string addr;
std::string func;
std::string file;
std::string fullname;
std::string line;
std::map<std::string, std::string> kv;
};
BOOST_FUSION_ADAPT_STRUCT(frame, addr, func, file, fullname, line, kv)
template <typename Iterator>
struct argsArray : qi::grammar<Iterator, std::map<std::string, std::string>()>
{
argsArray() : argsArray::base_type(query)
{
query = "args=[" >> pair >> *(',' >> pair) >> ']';
pair = "{name=" >> quoted_string >> ",value=" >> quoted_string >> "}";
key = qi::char_("a-zA-Z_") >> *qi::char_("a-zA-Z_0-9");
quoted_string = '"' >> +(qi::char_ - '"') >> '"';
}
private:
qi::rule<Iterator, std::map<std::string, std::string>()> query;
qi::rule<Iterator, std::pair<std::string, std::string>()> pair;
qi::rule<Iterator, std::string()> key;
qi::rule<Iterator, std::string()> quoted_string;
};
template <typename Iterator>
struct frameParser : qi::grammar<Iterator, frame(), ascii::space_type>
{
frameParser() : frameParser::base_type(frame_rule)
{
quoted_string = '"' >> +(qi::char_ - '"') >> '"';
delim = (&qi::lit('}')) | ',';
field_rule = qi::lexeme [ qi::lit(qi::_r1) >> '=' ] >> quoted_string >> delim;
kv_rule = arrTest >> delim;
frame_rule = "frame={" >>
(field_rule(+"addr") ^
field_rule(+"func") ^
field_rule(+"file") ^
field_rule(+"fullname") ^
field_rule(+"line") ^
kv_rule
) >> '}';
BOOST_SPIRIT_DEBUG_NODES((frame_rule)(field_rule))
}
private:
qi::rule<Iterator> delim;
qi::rule<Iterator, std::string(char const*), ascii::space_type> field_rule;
qi::rule<Iterator, std::map<std::string, std::string>()> kv_rule;
qi::rule<Iterator, frame(), ascii::space_type> frame_rule;
qi::rule<Iterator, std::string()> quoted_string;
argsArray<Iterator> arrTest;
};
#include <iostream>
//#include "parser/frame.h"
int main()
{
std::string str = R"(frame={addr="0x0000000000414008",)"
R"(func="main",)"
R"(args=[{name="argc",value="1"},)"
R"({name="argv",value="0x7fffffffe1a8"}],)"
R"(file="/home/stiopa/development/gdbFront/main.cc",)"
R"(fullname="/home/stiopa/development/gdbFront/main.cc",)"
R"(line="90"}")";
str = "AAA" + str + "AAA";
typedef std::string::const_iterator It;
const frameParser<It> g;
It iter(str.begin()), end(str.end());
frame frame;
bool r = phrase_parse(iter, end, qir::seek[g], boost::spirit::ascii::space, frame);
if (iter != end)
std::cout << "Remaining unparsed: '" << std::string(iter,end) << "'\n";
assert(r == true);
assert(frame.addr == "0x0000000000414008");
assert(frame.func == "main");
std::map<std::string, std::string> kv{{"argc", "1"},
{"argv", "0x7fffffffe1a8"}};
assert(frame.kv == kv);
assert(frame.file == "/home/stiopa/development/gdbFront/main.cc");
assert(frame.fullname == "/home/stiopa/development/gdbFront/main.cc");
assert(frame.line == "90");
}
Prints:
Remaining unparsed: '"AAA'
Tests still pass.
Note your original sample input had a trailing ", which you simply ignored.
I managed to parse a pgn file thanks to the Boost Spirit library, but it fails as soon as there is some characters I did not "anticipated".
Here is my Spirit grammar :
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
BOOST_FUSION_ADAPT_STRUCT(
loloof64::pgn_tag,
(std::string, key),
(std::string, value)
)
BOOST_FUSION_ADAPT_STRUCT(
loloof64::game_move,
(unsigned, move_number),
(std::string, move_turn),
(std::string, white_move),
(std::string, black_move),
(std::string, result)
)
BOOST_FUSION_ADAPT_STRUCT(
loloof64::pgn_game,
(std::vector<loloof64::pgn_tag>, header),
(std::vector<loloof64::game_move>, moves)
)
namespace loloof64 {
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace phoenix = boost::phoenix;
template <typename Iterator>
struct pgn_parser : qi::grammar<Iterator, std::vector<pgn_game>, qi::unused_type>
{
pgn_parser() : pgn_parser::base_type(games)
{
using qi::lexeme;
using ascii::char_;
using qi::uint_;
using qi::alnum;
using qi::space;
using qi::omit;
using qi::eol;
using qi::lit;
quoted_string %= lexeme[lit('"') >> *(char_ - '"') >> lit('"')];
tag %=
'['
>> +alnum
>> omit[+space]
>> quoted_string
>> ']'
>> omit[+eol]
;
header %= +tag;
move_turn %= qi::string("...") | qi::string(".");
regular_move %=
+char_("a-hNBRQK")
>> +char_("a-h1-8x=NBRQK")
>> -qi::string("e.p.")
;
castle_move %= qi::string("O-O-O") | qi::string("O-O");
single_move %=
(regular_move | castle_move) >> -(char_('+') | char_('#'))
;
result %= qi::string("1-0") | qi::string("0-1") | qi::string("1/2-1/2") | qi::string("*");
full_move %=
uint_
>> move_turn
>> omit[*space]
>> single_move
>> -(omit[+space] >> single_move)
>> -(omit[+space] >> result)
;
game_description %= full_move
>> *(omit[*space] >> full_move);
single_game %=
-header
>> game_description
;
games %=
single_game
>> *(omit[*(space|eol)] >> single_game)
;
}
qi::rule<Iterator, pgn_tag(), qi::unused_type> tag;
qi::rule<Iterator, std::vector<pgn_tag>, qi::unused_type> header;
qi::rule<Iterator, std::string(), qi::unused_type> quoted_string;
qi::rule<Iterator, std::string(), qi::unused_type> result;
qi::rule<Iterator, std::string(), qi::unused_type> regular_move;
qi::rule<Iterator, std::string(), qi::unused_type> castle_move;
qi::rule<Iterator, std::string(), qi::unused_type> single_move;
qi::rule<Iterator, std::string(), qi::unused_type> move_turn;
qi::rule<Iterator, game_move(), qi::unused_type> full_move;
qi::rule<Iterator, std::vector<game_move>, qi::unused_type> game_description;
qi::rule<Iterator, pgn_game, qi::unused_type> single_game;
qi::rule<Iterator, std::vector<pgn_game>, qi::unused_type> games;
};
}
How could I simply consume any character I could not "anticipate" ? I mean, how could I ignore any character that I don't want in none of my grammar rule ?
As for testing purposes :
here my parser header (pgn_games_extractor.hpp)
#ifndef PGNGAMESEXTRACTOR_HPP
#define PGNGAMESEXTRACTOR_HPP
#include <string>
#include <vector>
#include <fstream>
#include <stdexcept>
namespace loloof64 {
struct pgn_tag {
std::string key;
std::string value;
};
struct game_move {
unsigned move_number;
std::string move_turn;
std::string white_move;
std::string black_move;
std::string result;
};
struct pgn_game {
std::vector<pgn_tag> header;
std::vector<game_move> moves;
};
class PgnGamesExtractor
{
public:
PgnGamesExtractor(std::string inputFilePath);
PgnGamesExtractor(std::ifstream &inputFile);
/*
Both constructos may throw PgnParsingException (if bad pgn format) and InputFileException (if missing file)
*/
std::vector<pgn_game> getGames() const { return games; }
virtual ~PgnGamesExtractor();
protected:
private:
std::vector<pgn_game> games;
void parseInput(std::ifstream &inputFile);
};
class PgnParsingException : public std::runtime_error
{
public:
PgnParsingException(std::string message): std::runtime_error(message){}
};
class InputFileException : public std::runtime_error
{
public:
InputFileException(std::string message) : std::runtime_error(message){}
};
}
#endif // PGNGAMESEXTRACTOR_HPP
Here is my parser source (pgn_games_extractor.cpp) :
#include "pgn_games_extractor.hpp"
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
BOOST_FUSION_ADAPT_STRUCT(
loloof64::pgn_tag,
(std::string, key),
(std::string, value)
)
BOOST_FUSION_ADAPT_STRUCT(
loloof64::game_move,
(unsigned, move_number),
(std::string, move_turn),
(std::string, white_move),
(std::string, black_move),
(std::string, result)
)
BOOST_FUSION_ADAPT_STRUCT(
loloof64::pgn_game,
(std::vector<loloof64::pgn_tag>, header),
(std::vector<loloof64::game_move>, moves)
)
namespace loloof64 {
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace phoenix = boost::phoenix;
template <typename Iterator>
struct pgn_parser : qi::grammar<Iterator, std::vector<pgn_game>, qi::unused_type>
{
pgn_parser() : pgn_parser::base_type(games)
{
using qi::lexeme;
using ascii::char_;
using qi::uint_;
using qi::alnum;
using qi::space;
using qi::omit;
using qi::eol;
using qi::lit;
quoted_string %= lexeme[lit('"') >> *(char_ - '"') >> lit('"')];
tag %=
'['
>> +alnum
>> omit[+space]
>> quoted_string
>> ']'
>> omit[+eol]
;
header %= +tag;
move_turn %= qi::string("...") | qi::string(".");
regular_move %=
+char_("a-hNBRQK")
>> +char_("a-h1-8x=NBRQK")
>> -qi::string("e.p.")
;
castle_move %= qi::string("O-O-O") | qi::string("O-O");
single_move %=
(regular_move | castle_move) >> -(char_('+') | char_('#'))
;
result %= qi::string("1-0") | qi::string("0-1") | qi::string("1/2-1/2") | qi::string("*");
full_move %=
uint_
>> move_turn
>> omit[*space]
>> single_move
>> -(omit[+space] >> single_move)
>> -(omit[+space] >> result)
;
game_description %= full_move
>> *(omit[*space] >> full_move);
single_game %=
-header
>> game_description
;
games %=
single_game
>> *(omit[*(space|eol)] >> single_game)
;
}
qi::rule<Iterator, pgn_tag(), qi::unused_type> tag;
qi::rule<Iterator, std::vector<pgn_tag>, qi::unused_type> header;
qi::rule<Iterator, std::string(), qi::unused_type> quoted_string;
qi::rule<Iterator, std::string(), qi::unused_type> result;
qi::rule<Iterator, std::string(), qi::unused_type> regular_move;
qi::rule<Iterator, std::string(), qi::unused_type> castle_move;
qi::rule<Iterator, std::string(), qi::unused_type> single_move;
qi::rule<Iterator, std::string(), qi::unused_type> move_turn;
qi::rule<Iterator, game_move(), qi::unused_type> full_move;
qi::rule<Iterator, std::vector<game_move>, qi::unused_type> game_description;
qi::rule<Iterator, pgn_game, qi::unused_type> single_game;
qi::rule<Iterator, std::vector<pgn_game>, qi::unused_type> games;
};
}
loloof64::PgnGamesExtractor::PgnGamesExtractor(std::string inputFilePath)
{
std::ifstream inputFile(inputFilePath);
parseInput(inputFile);
}
loloof64::PgnGamesExtractor::PgnGamesExtractor(std::ifstream &inputFile)
{
parseInput(inputFile);
}
loloof64::PgnGamesExtractor::~PgnGamesExtractor()
{
//dtor
}
void loloof64::PgnGamesExtractor::parseInput(std::ifstream &inputFile)
{
using namespace std;
if (! inputFile) throw InputFileException("File does not exist !");
string content("");
getline(inputFile, content, (char) inputFile.eof());
if (inputFile.fail() || inputFile.bad()) throw new InputFileException("Could not read the input file !");
loloof64::pgn_parser<string::const_iterator> parser;
std::vector<loloof64::pgn_game> temp_games;
string::const_iterator iter = content.begin();
string::const_iterator end = content.end();
bool success = boost::spirit::qi::phrase_parse(iter, end, parser, boost::spirit::qi::eol, temp_games);
if (success && iter == end)
{
games = temp_games;
}
else
{
string error_fragment(iter, end);
string error_message("");
error_message = "Failed to parse the input at :'" + error_fragment + "' !";
throw PgnParsingException(error_message);
}
}
I am asking this question because I could not parse the following pgn : ScotchGambitPgn.zip. I think it is because of an encoding issue with this file.
I am using Spirit 2 and C++ 11 (Gnu)
As requested the simple X3 translation.
fewer lines of code (10 lines)
compilation time down from 7.4s to 3.6s (clang)
compilation time down from 11.4s to 6.0s (gcc5)
runtime down from 0.80s to 0.55s (clang and gcc)
The outputs are identical (exactly).
Live On Coliru
//#define BOOST_SPIRIT_DEBUG
#ifndef PGNGAMESEXTRACTOR_HPP
#define PGNGAMESEXTRACTOR_HPP
#include <string>
#include <vector>
#include <fstream>
#include <stdexcept>
namespace loloof64 {
struct pgn_tag {
std::string key;
std::string value;
};
struct game_move {
unsigned move_number;
std::string white_move;
std::string black_move;
enum result_t { white_won, black_won, draw, undecided } result;
};
struct pgn_game {
std::vector<pgn_tag> header;
std::vector<game_move> moves;
};
class PgnGamesExtractor {
public:
PgnGamesExtractor(std::string inputFilePath);
PgnGamesExtractor(std::istream &inputFile);
/*
Both constructos may throw PgnParsingException (if bad pgn format) and InputFileException (if missing file)
*/
std::vector<pgn_game> getGames() const { return games; }
virtual ~PgnGamesExtractor();
protected:
private:
std::vector<pgn_game> games;
void parseInput(std::istream &inputFile);
};
class PgnParsingException : public virtual std::runtime_error {
public:
PgnParsingException(std::string message) : std::runtime_error(message) {}
};
class InputFileException : public virtual std::runtime_error {
public:
InputFileException(std::string message) : std::runtime_error(message) {}
};
}
#endif // PGNGAMESEXTRACTOR_HPP
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/include/support_istream_iterator.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
BOOST_FUSION_ADAPT_STRUCT(loloof64::pgn_tag, key, value)
BOOST_FUSION_ADAPT_STRUCT(loloof64::game_move, move_number, white_move, black_move, result)
BOOST_FUSION_ADAPT_STRUCT(loloof64::pgn_game, header, moves)
namespace loloof64 {
namespace pgn_parser {
using namespace boost::spirit::x3;
static std::string const no_move;
static auto const result = []{
symbols<game_move::result_t> table;
table.add
("1-0", game_move::white_won)
("0-1", game_move::black_won)
("1/2-1/2", game_move::draw)
("*", game_move::undecided);
return table;
}();
static auto const quoted_string = lexeme['"' >> *~char_('"') >> '"'];
static auto const tag = '[' >> +alnum >> quoted_string >> ']';
static auto const header = +tag;
static auto const regular_move = as_parser("O-O-O") | "O-O" | (+char_("a-hNBRQK") >> +char_("a-h1-8x=NBRQK") >> -lit("e.p."));
static auto const single_move = rule<struct single_move_, std::string> { "single_move" }
= raw [ lexeme [ regular_move >> -char_("+#")] ];
static auto const full_move = rule<struct full_move_, game_move> { "full_move" }
= uint_
>> (lexeme["..." >> attr(no_move)] | "." >> single_move)
>> (single_move | attr(no_move))
>> -result;
static auto const game_description = +full_move;
static auto const single_game = rule<struct single_game_, pgn_game> { "single_game" }
= -header >> game_description;
static auto const games = *single_game;
}
}
loloof64::PgnGamesExtractor::PgnGamesExtractor(std::string inputFilePath) {
std::ifstream inputFile(inputFilePath);
parseInput(inputFile);
}
loloof64::PgnGamesExtractor::PgnGamesExtractor(std::istream &inputFile) { parseInput(inputFile); }
loloof64::PgnGamesExtractor::~PgnGamesExtractor() {
// dtor
}
void loloof64::PgnGamesExtractor::parseInput(std::istream &inputFile) {
if (inputFile.fail() || inputFile.bad())
throw new InputFileException("Could not read the input file !");
typedef boost::spirit::istream_iterator It;
std::vector<loloof64::pgn_game> temp_games;
It iter(inputFile >> std::noskipws), end;
bool success = boost::spirit::x3::phrase_parse(iter, end, pgn_parser::games, boost::spirit::x3::space, temp_games);
if (success && iter == end) {
games.swap(temp_games);
} else {
std::string error_fragment(iter, end);
throw PgnParsingException("Failed to parse the input at :'" + error_fragment + "' !");
}
}
#include <iostream>
int main() {
loloof64::PgnGamesExtractor pge("ScotchGambit.pgn");
std::cout << "Parsed " << pge.getGames().size() << " games\n";
for (auto& g : pge.getGames())
for (auto& m : g.moves)
std::cout << m.move_number << ".\t" << m.white_move << "\t" << m.black_move << "\n";
}
For what it's worth, here's significantly simplified:
Live On Coliru
//#define BOOST_SPIRIT_DEBUG
#ifndef PGNGAMESEXTRACTOR_HPP
#define PGNGAMESEXTRACTOR_HPP
#include <string>
#include <vector>
#include <fstream>
#include <stdexcept>
namespace loloof64 {
struct pgn_tag {
std::string key;
std::string value;
};
struct game_move {
unsigned move_number;
std::string white_move;
std::string black_move;
enum result_t { white_won, black_won, draw, undecided } result;
};
struct pgn_game {
std::vector<pgn_tag> header;
std::vector<game_move> moves;
};
class PgnGamesExtractor {
public:
PgnGamesExtractor(std::string inputFilePath);
PgnGamesExtractor(std::istream &inputFile);
/*
Both constructos may throw PgnParsingException (if bad pgn format) and InputFileException (if missing file)
*/
std::vector<pgn_game> getGames() const { return games; }
virtual ~PgnGamesExtractor();
protected:
private:
std::vector<pgn_game> games;
void parseInput(std::istream &inputFile);
};
class PgnParsingException : public virtual std::runtime_error {
public:
PgnParsingException(std::string message) : std::runtime_error(message) {}
};
class InputFileException : public virtual std::runtime_error {
public:
InputFileException(std::string message) : std::runtime_error(message) {}
};
}
#endif // PGNGAMESEXTRACTOR_HPP
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
BOOST_FUSION_ADAPT_STRUCT(loloof64::pgn_tag, key, value)
BOOST_FUSION_ADAPT_STRUCT(loloof64::game_move, move_number, white_move, black_move, result)
BOOST_FUSION_ADAPT_STRUCT(loloof64::pgn_game, header, moves)
namespace loloof64 {
namespace qi = boost::spirit::qi;
template <typename Iterator> struct pgn_parser : qi::grammar<Iterator, std::vector<pgn_game>, qi::space_type> {
pgn_parser() : pgn_parser::base_type(games) {
using namespace qi;
const std::string no_move;
result.add
("1-0", game_move::white_won)
("0-1", game_move::black_won)
("1/2-1/2", game_move::draw)
("*", game_move::undecided);
quoted_string = '"' >> *~char_('"') >> '"';
tag = '[' >> +alnum >> quoted_string >> ']';
header = +tag;
regular_move = lit("O-O-O") | "O-O" | (+char_("a-hNBRQK") >> +char_("a-h1-8x=NBRQK") >> -lit("e.p."));
single_move = raw [ regular_move >> -char_("+#") ];
full_move = uint_
>> (lexeme["..." >> attr(no_move)] | "." >> single_move)
>> (single_move | attr(no_move))
>> -result;
game_description = +full_move;
single_game = -header >> game_description;
games = *single_game;
BOOST_SPIRIT_DEBUG_NODES(
(tag)(header)(quoted_string)(regular_move)(single_move)
(full_move)(game_description)(single_game)(games)
)
}
private:
qi::rule<Iterator, pgn_tag(), qi::space_type> tag;
qi::rule<Iterator, std::vector<pgn_tag>, qi::space_type> header;
qi::rule<Iterator, game_move(), qi::space_type> full_move;
qi::rule<Iterator, std::vector<game_move>, qi::space_type> game_description;
qi::rule<Iterator, pgn_game, qi::space_type> single_game;
qi::rule<Iterator, std::vector<pgn_game>, qi::space_type> games;
// lexemes
qi::symbols<char, game_move::result_t> result;
qi::rule<Iterator, std::string()> quoted_string;
qi::rule<Iterator> regular_move;
qi::rule<Iterator, std::string()> single_move;
};
}
loloof64::PgnGamesExtractor::PgnGamesExtractor(std::string inputFilePath) {
std::ifstream inputFile(inputFilePath);
parseInput(inputFile);
}
loloof64::PgnGamesExtractor::PgnGamesExtractor(std::istream &inputFile) { parseInput(inputFile); }
loloof64::PgnGamesExtractor::~PgnGamesExtractor() {
// dtor
}
void loloof64::PgnGamesExtractor::parseInput(std::istream &inputFile) {
if (inputFile.fail() || inputFile.bad())
throw new InputFileException("Could not read the input file !");
typedef boost::spirit::istream_iterator It;
loloof64::pgn_parser<It> parser;
std::vector<loloof64::pgn_game> temp_games;
It iter(inputFile >> std::noskipws), end;
bool success = boost::spirit::qi::phrase_parse(iter, end, parser, boost::spirit::qi::space, temp_games);
if (success && iter == end) {
games.swap(temp_games);
} else {
std::string error_fragment(iter, end);
throw PgnParsingException("Failed to parse the input at :'" + error_fragment + "' !");
}
}
int main() {
loloof64::PgnGamesExtractor pge(std::cin); // "ScotchGambit.pgn"
std::cout << "Parsed " << pge.getGames().size() << " games\n";
for (auto& g : pge.getGames())
for (auto& m : g.moves)
std::cout << m.move_number << ".\t" << m.white_move << "\t" << m.black_move << "\n";
}
Notes:
don't read full file in memory (boost::spirit::istream_iterator)
don't manually skip (use skippers)
don't explicitly lexeme (Boost spirit skipper issues)
don't use %= if not needed
don't synthesize unneeded attributes (use raw[])
treat optional parts of move as optional, don't store assymetric magic flags like "..." (look for no_move)
don't be overly specific (use istream& instead of ifstream&)
Probably some other things I forgot. Output is e.g.
Parsed 6166 games
1. e4 e5
2. Nf3 Nc6
3. d4 exd4
4. Bc4 Qf6
5. O-O d6
6. Ng5 Nh6
7. f4 Be7
8. e5 Qg6
9. exd6 cxd6
10. c3 dxc3
11. Nxc3 O-O
12. Nd5 Bd7
13. Rf3 Bg4
14. Bd3 Bxf3
15. Qxf3 f5
16. Bc4 Kh8
17. Nxe7 Nxe7
18. Qxb7 Qf6
19. Be3 Rfb8
20. Qd7 Rd8
21. Qb7 d5
22. Bb3 Nc6
23. Bxd5 Nd4
24. Rd1 Ne2+
25. Kf1 Rab8
26. Qxa7 Rxb2
27. Ne6 Qxe6
28. Bxe6 Rxd1+
29. Kf2
1. e4 e5
2. Nf3 Nc6
3. d4 exd4
4. Bc4 Bc5
5. Ng5 Ne5
6. Bxf7+ Nxf7
7. Nxf7 Bb4+
8. c3 dxc3
9. bxc3 Bxc3+
10. Nxc3 Kxf7
11. Qd5+ Kf8
12. Ba3+ d6
13. e5 Qg5
14. exd6 Qxd5
Indeed the problem is with Veronica. Or, actually, it's with Ver?nica. Where ? is the code unit <93> - which, lacking codepage/encoding information could mean anything really.
You're using ascii::char and this requires 7-bit only characters.
Easily fix it by changing
using ascii::char_;
into
using qi::char_;
I declared rules of my grammar as static const. That worked fine till I tried to use cross-recursive rules (rule1 is defined using rule2 which is defined using rule1). The source code still can be built, but segfaults on parsing source containing such cross-recursive case.
Here's a simplified code of the grammar:
template < typename Iterator >
class Skipper : public qi::grammar<Iterator> {
public:
Skipper ( ) : Skipper::base_type(_skip_rule) { }
private:
static qi::rule<Iterator> const
_comment,
_skip_rule;
};
template < typename Iterator >
typename qi::rule<Iterator> const
Skipper<Iterator>::_comment(
boost::spirit::repository::confix("/*", "*/")[*(qi::char_ - "*/")] // Multi-line
| boost::spirit::repository::confix("//", qi::eol)[*(qi::char_ - qi::eol)] // Single-line
);
template < typename Iterator >
typename qi::rule<Iterator> const
Skipper<Iterator>::_skip_rule(qi::ascii::space | _comment);
template < typename Iterator, typename Skipper >
class Grammar : public qi::grammar<Iterator, Skipper > {
public:
Grammar ( ) : Grammar::base_type(expression) { }
private:
static qi::rule<Iterator, Skipper> const
// Tokens
scalar_literal,
identifier,
// Rules
operand,
expression;
};
template < typename Iterator, typename Skipper >
typename qi::rule<Iterator, Skipper> const
Grammar<Iterator, Skipper>::scalar_literal(qi::uint_ | qi::int_);
template < typename Iterator, typename Skipper >
typename qi::rule<Iterator, Skipper> const
Grammar<Iterator, Skipper>::identifier(qi::lexeme[(qi::alpha | '_') >> *(qi::alnum | '_')]);
template < typename Iterator, typename Skipper >
typename qi::rule<Iterator, Skipper> const
Grammar<Iterator, Skipper>::operand((scalar_literal | identifier | ('(' >> expression >> ')')));
template < typename Iterator, typename Skipper >
typename qi::rule<Iterator, Skipper> const
Grammar<Iterator, Skipper>::expression(operand);
(expression rule is made identical to operand to make the code easier to understand; of course it should be more complicated yet based on operand). operand declaration uses expression one and vice versa. That segfaults when trying to parse_phrase for example (123). I suppose that it's because of "forward" using of expression; same happens if I put expression declaration before the operand one. So in what way should these rules be declared to avoid runtime error?
First off, the static has nothing to do with it:
Live On Coliru fails just as badly:
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/repository/include/qi.hpp>
namespace qi = boost::spirit::qi;
template <typename Iterator>
struct Skipper : qi::grammar<Iterator> {
Skipper() : Skipper::base_type(_skip_rule) { }
private:
qi::rule<Iterator> const
_comment {
boost::spirit::repository::confix("/*", "*/") [*(qi::char_ - "*/")] // Multi-line
| boost::spirit::repository::confix("//", qi::eol) [*(qi::char_ - qi::eol)] // Single-line
},
_skip_rule {
qi::ascii::space | _comment
};
};
template <typename Iterator, typename Skipper>
struct Grammar : qi::grammar<Iterator, Skipper> {
Grammar() : Grammar::base_type(expression) { }
private:
qi::rule<Iterator, Skipper> const
// Tokens
scalar_literal { qi::uint_ | qi::int_ },
identifier { qi::lexeme[(qi::alpha | '_') >> *(qi::alnum | '_')] },
// Rules
operand { (scalar_literal | identifier | ('(' >> expression >> ')')) },
expression { operand };
};
int main() {
using It = std::string::const_iterator;
Skipper<It> s;
Grammar<It, Skipper<It> > p;
std::string const input = "(123)";
It f = input.begin(), l = input.end();
bool ok = qi::phrase_parse(f,l,p,s);
if (ok) std::cout << "Parse success\n";
else std::cout << "Parse failed\n";
if (f!=l) std::cout << "Remaining input: '" << std::string(f,l) << "'\n";
}
Secondly, the skipper has nothing to with things:
Live On Coliru fails just as badly:
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/repository/include/qi.hpp>
namespace qi = boost::spirit::qi;
template <typename Iterator, typename Skipper = qi::ascii::space_type>
struct Grammar : qi::grammar<Iterator, Skipper> {
Grammar() : Grammar::base_type(expression) { }
private:
qi::rule<Iterator, Skipper> const
// Tokens
scalar_literal { qi::uint_ | qi::int_ },
identifier { qi::lexeme[(qi::alpha | '_') >> *(qi::alnum | '_')] },
// Rules
operand { (scalar_literal | identifier | ('(' >> expression >> ')')) },
expression { operand };
};
int main() {
using It = std::string::const_iterator;
Grammar<It> p;
std::string const input = "(123)";
It f = input.begin(), l = input.end();
bool ok = qi::phrase_parse(f,l,p,qi::ascii::space);
if (ok) std::cout << "Parse success\n";
else std::cout << "Parse failed\n";
if (f!=l) std::cout << "Remaining input: '" << std::string(f,l) << "'\n";
}
Thirdly, the timing of initialization has to do with it:
Live On Coliru succeeds:
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/repository/include/qi.hpp>
namespace qi = boost::spirit::qi;
template <typename Iterator, typename Skipper = qi::ascii::space_type>
struct Grammar : qi::grammar<Iterator, Skipper> {
Grammar() : Grammar::base_type(expression) {
scalar_literal = qi::uint_ | qi::int_;
identifier = (qi::alpha | '_') >> *(qi::alnum | '_');
// Rules
operand = (scalar_literal | identifier | ('(' >> expression >> ')'));
expression = operand;
}
private:
qi::rule<Iterator> scalar_literal, identifier; // Tokens
qi::rule<Iterator, Skipper> operand, expression; // Rules
};
int main() {
using It = std::string::const_iterator;
Grammar<It> p;
std::string const input = "(123)";
It f = input.begin(), l = input.end();
bool ok = qi::phrase_parse(f,l,p,qi::ascii::space);
if (ok) std::cout << "Parse success\n";
else std::cout << "Parse failed\n";
if (f!=l) std::cout << "Remaining input: '" << std::string(f,l) << "'\n";
}
Prints
Parse success
Finally, you can have all the cake and eat it too:
Live On Coliru
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/repository/include/qi.hpp>
namespace qi = boost::spirit::qi;
namespace parsing {
namespace detail {
template <typename Iterator>
struct Skipper : qi::grammar<Iterator> {
Skipper() : Skipper::base_type(_skip_rule) {
_comment = boost::spirit::repository::confix("/*", "*/") [*(qi::char_ - "*/")] // Multi-line
| boost::spirit::repository::confix("//", qi::eol) [*(qi::char_ - qi::eol)] // Single-line
;
_skip_rule = qi::ascii::space | _comment;
}
private:
qi::rule<Iterator> _comment, _skip_rule;
};
template <typename Iterator, typename Skipper = Skipper<Iterator> >
struct Grammar : qi::grammar<Iterator, Skipper> {
Grammar() : Grammar::base_type(expression) {
scalar_literal = qi::uint_ | qi::int_;
identifier = (qi::alpha | '_') >> *(qi::alnum | '_');
// Rules
operand = (scalar_literal | identifier | ('(' >> expression >> ')'));
expression = operand;
}
private:
qi::rule<Iterator> scalar_literal, identifier; // Tokens
qi::rule<Iterator, Skipper> operand, expression; // Rules
};
}
template <typename Iterator, typename Skipper = detail::Skipper<Iterator> >
struct facade {
template <typename Range> static bool parse(Range const& input) {
Iterator f = boost::begin(input), l = boost::end(input);
bool ok = qi::phrase_parse(f, l, _parser, _skipper);
if (f!=l)
std::cout << "Remaining input: '" << std::string(f,l) << "'\n";
return ok;
}
private:
static const detail::Skipper<Iterator> _skipper;
static const detail::Grammar<Iterator, Skipper> _parser;
};
template <class I, class S> const detail::Skipper<I> facade<I,S>::_skipper = {};
template <class I, class S> const detail::Grammar<I, S> facade<I,S>::_parser = {};
}
int main() {
using It = std::string::const_iterator;
std::string const input = "(123)";
bool ok = parsing::facade<It>::parse(input);
if (ok) std::cout << "Parse success\n";
else std::cout << "Parse failed\n";
}
Note that the result is the same, the parser/skipper are every bit as static and const as in the original code, the code is a lot easier to maintain (and has a bit more structure to it at the same time).
This is basically where the Singletons-are-bad theme meets the inner-const-is-problematic theme. You don't need to make the fields const. You don't need to make the instances static.
Just, create only one instance if you prefer. Also, it's not a problem that the parser is now copyable (you don't have to copy it; but now you can).
I have some complicated structures and i want to extract their data from a text using
boost::spirit library (I've selected this one for efficiency purpose).
but i will ask my question in simpler way.
assume, we have two structures like these:
struct person
{
std::string name;
uint8_t age;
};
and
struct fruit
{
std::string color;
std::double average_weight;
};
and our text that included these data is presented below:
"... (jane, 23) (david, 19) (mary, 30) [yello,100] [green, 60.6] [red, 30.5]"
now, the problem is "extracting these data in suitable format"
for example by call handler for each struct or push_back them on vector.
any help would be greatly appreciated!
is there any code sample about that?!
call handlers for parsed structures.
#include <string>
#define BOOST_RESULT_OF_USE_DECLTYPE
#include <boost/fusion/adapted.hpp>
#include <boost/spirit/home/qi.hpp>
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace phoenix = boost::phoenix;
namespace fusion = boost::fusion;
struct person
{
std::string name;
uint8_t age;
};
BOOST_FUSION_ADAPT_STRUCT
(
person,
(std::string, name)
(uint8_t, age)
);
struct fruit
{
std::string color;
double average_weight;
};
BOOST_FUSION_ADAPT_STRUCT
(
fruit,
(std::string, color)
(double, average_weight)
);
template <typename _Iterator>
struct parser :
qi::grammar<_Iterator, void(), ascii::space_type>
{
parser() :
parser::base_type(main)
{
main =
*(
_person[ ([](const person &person_)
{
// Add handler here
}) ]
| _fruit[ ([](const fruit &fruit_)
{
// Add handler here
}) ]
);
_person = qi::lit('(') >> *(qi::char_ - ',') >> ',' >> qi::ushort_ >> ')';
_fruit = qi::lit('[') >> *(qi::char_ - ',') >> ',' >> qi::double_ >> ']';
}
qi::rule<_Iterator, void(), ascii::space_type> main;
qi::rule<_Iterator, person(), ascii::space_type> _person;
qi::rule<_Iterator, fruit(), ascii::space_type> _fruit;
};
int main()
{
typedef std::string::const_iterator iterator;
std::string input_ = "(jane, 23000) (david, 19) (mary, 30) [yello,100] [green, 60.6] [red, 30.5]";
iterator iterator_ = std::begin(input_);
bool result_ = qi::phrase_parse(iterator_, iterator(std::end(input_)), parser<iterator>(), ascii::space)
&& iterator_ == std::end(input_);
return 0;
}
P.S. Not all compiler can build that code because of lambdas in semantic actions. (msvs don't) In this case you have to use something else (phoenix::bind for example)
store parsed structures in a vector
typedef boost::variant <
person,
fruit
> variant;
template <typename _Iterator>
struct parser :
qi::grammar<_Iterator, std::vector < variant > (), ascii::space_type>
{
parser() :
parser::base_type(main)
{
main = *(_person | _fruit);
_person = qi::lit('(') >> *(qi::char_ - ',') >> ',' >> qi::ushort_ >> ')';
_fruit = qi::lit('[') >> *(qi::char_ - ',') >> ',' >> qi::double_ >> ']';
}
qi::rule<_Iterator, std::vector < variant > (), ascii::space_type> main;
qi::rule<_Iterator, person(), ascii::space_type> _person;
qi::rule<_Iterator, fruit(), ascii::space_type> _fruit;
};
^ No it is not. This was part of the problem, but if review the code as is right now, it already does what the pointed out question/answer shows ... and the errors are still not triggered.
I have this boost spirit parser for string literal. It works. Now I would like to start handle errors when it fail. I copied the on_error handle 1-1 from the mini xml example and it compiles, but it is never triggered (no errors are outputted).
This is the parser:
#define BOOST_SPIRIT_USE_PHOENIX_V3
#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/home/support/iterators/line_pos_iterator.hpp>
#include <boost/spirit/include/phoenix_fusion.hpp>
#include <boost/spirit/include/phoenix_stl.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
namespace qi = boost::spirit::qi;
struct my_handler_f
{
template <typename...> struct result { typedef void type; };
template <typename... T>
void operator()(T&&...) const {
std::cout << "\nmy_handler_f() invoked with " << sizeof...(T) << " arguments\n";
}
};
struct append_utf8_f
{
template <typename, typename>
struct result { typedef void type; };
template <typename INT>
void operator()(INT in, std::string& to) const
{
auto out = std::back_inserter(to);
boost::utf8_output_iterator<decltype(out)> convert(out);
*convert++ = in;
}
};
struct get_line_f
{
template <typename> struct result { typedef size_t type; };
template <typename It> size_t operator()(It const& pos_iter) const
{
return get_line(pos_iter);
}
};
struct RangePosition { size_t beginLine, endLine; };
struct String : public RangePosition
{
String()
: RangePosition()
, value()
, source()
{
}
std::string value;
std::string source;
};
BOOST_FUSION_ADAPT_STRUCT(String,
(std::string, value)
(std::string, source)
(size_t, beginLine)
(size_t, endLine)
)
template <typename Iterator>
struct source_string : qi::grammar<Iterator, String(), qi::space_type>
{
struct escape_symbols : qi::symbols<char, char>
{
escape_symbols()
{
add
("\'" , '\'')
("\"" , '\"')
("\?" , '\?')
("\\" , '\\')
("0" , '\0')
("a" , '\a')
("b" , '\b')
("f" , '\f')
("n" , '\n')
("r" , '\r')
("t" , '\t')
("v" , '\v')
;
}
} escape_symbol;
source_string() : source_string::base_type(start)
{
using qi::raw;
using qi::_val;
using qi::_1;
using qi::_2;
using qi::_3;
using qi::_4;
using qi::space;
using qi::omit;
using qi::no_case;
using qi::print;
using qi::eps;
using qi::on_error;
using qi::fail;
using qi::lit;
namespace phx = boost::phoenix;
using phx::at_c;
using phx::begin;
using phx::end;
using phx::construct;
using phx::ref;
using phx::val;
escape %= escape_symbol;
character %= (no_case["\\x"] > hex12)
| ("\\" > (oct123 | escape))
| (print - (lit('"') | '\\'));
unicode = ("\\u" > hex4[append_utf8(_1, _val)])
| ("\\U" > hex8[append_utf8(_1, _val)]);
string_section %= '"' > *(unicode | character) > '"';
string %= string_section % omit[*space];
main = raw [
string[at_c<0>(_val) = _1]
]
[
at_c<1>(_val) = construct<std::string>(begin(_1), end(_1)),
at_c<2>(_val) = get_line_(begin(_1)),
at_c<3>(_val) = get_line_(end(_1))
];
start %= eps > main;
on_error<fail>(start, my_handler);
}
boost::phoenix::function<my_handler_f> my_handler;
qi::rule<Iterator, std::string()> escape;
qi::uint_parser<char, 16, 1, 2> hex12;
qi::uint_parser<char, 8, 1, 3> oct123;
qi::rule<Iterator, std::string()> character;
qi::uint_parser<uint16_t, 16, 4, 4> hex4;
qi::uint_parser<uint32_t, 16, 8, 8> hex8;
boost::phoenix::function<append_utf8_f> append_utf8;
qi::rule<Iterator, std::string()> unicode;
qi::rule<Iterator, std::string()> string_section;
qi::rule<Iterator, std::string()> string;
boost::phoenix::function<get_line_f> get_line_;
qi::rule<Iterator, String(), qi::space_type> main;
qi::rule<Iterator, String(), qi::space_type> start;
};
and this is the test code
int main()
{
std::string str[] =
{
"\"\\u1234\\U0002345\"",
//"\"te\"\"st\"",
//"\"te\" \"st\"",
//"\"te\" \n \"st\"",
//"\"\"",
//"\"\\\"\"",
//"\"test\"",
//"\"test\" something",
//"\"\\\'\\\"\\\?\\\\\\a\\b\\f\\n\\r\\t\\v\"",
//"\"\\x61cd\\X3012\\x7z\"",
//"\"\\141cd\\06012\\78\\778\"",
"\"te",
//"\"te\nst\"",
//"\"test\\\"",
//"\"te\\st\"",
//
};
typedef boost::spirit::line_pos_iterator<std::string::const_iterator> Iterator;
for (size_t i = 0; i < sizeof(str) / sizeof(str[0]); ++i)
{
source_string<Iterator> g;
Iterator iter(str[i].begin());
Iterator end(str[i].end());
String string;
bool r = phrase_parse(iter, end, g, qi::space, string);
if (r)
std::cout << string.beginLine << "-" << string.endLine << ": " << string.value << " === " << string.source << "\n";
else
std::cout << "Parsing failed\n";
}
}