I have this example code, which parses the string str correctly.
How to I make it work if there any extra characters before and/or after the string? For example if I did str = std::string("AAA") + str + std::string("AAA")
frame.h
#define BOOST_SPIRIT_USE_PHOENIX_V3
//#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/adapted/std_pair.hpp>
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;
namespace ascii = boost::spirit::ascii;
struct frame
{
std::string addr;
std::string func;
std::string file;
std::string fullname;
std::string line;
std::map<std::string, std::string> kv;
};
template <typename Iterator>
struct argsArray : qi::grammar<Iterator, std::map<std::string, std::string>()>
{
argsArray() : argsArray::base_type(query)
{
query =
qi::lit("args=[") >> pair >> *(qi::lit(',') >> pair) >> qi::lit(']');
pair = qi::lit("{name=") >> quoted_string >> qi::lit(",value=") >>
quoted_string >> qi::lit("}");
key = qi::char_("a-zA-Z_") >> *qi::char_("a-zA-Z_0-9");
quoted_string %= boost::spirit::lexeme['"' >> +(qi::char_ - '"') >> '"'];
}
qi::rule<Iterator, std::map<std::string, std::string>()> query;
qi::rule<Iterator, std::pair<std::string, std::string>()> pair;
qi::rule<Iterator, std::string()> key;
qi::rule<Iterator, std::string()> quoted_string;
};
template <typename Iterator>
struct frameParser : qi::grammar<Iterator, frame(), ascii::space_type>
{
frameParser() : frameParser::base_type(frame_rule)
{
static const auto _addr = phx::bind(&frame::addr, qi::_r1);
static const auto _func = phx::bind(&frame::func, qi::_r1);
static const auto _file = phx::bind(&frame::file, qi::_r1);
static const auto _fullname = phx::bind(&frame::fullname, qi::_r1);
static const auto _line = phx::bind(&frame::line, qi::_r1);
static const auto _kv = phx::bind(&frame::kv, qi::_r1);
func = qi::lit("func=") >> quoted_string;
addr = qi::lit("addr=") >> quoted_string;
file = qi::lit("file=") >> quoted_string;
fullname = qi::lit("fullname=") >> quoted_string;
line = qi::lit("line=") >> quoted_string;
func_rule = func[_func = qi::_1];
addr_rule = addr[_addr = qi::_1];
file_rule = file[_file = qi::_1];
fullname_rule = fullname[_fullname = qi::_1];
line_rule = line[_line = qi::_1];
kv_rule = arrTest[_kv = qi::_1];
quoted_string %= boost::spirit::lexeme['"' >> +(qi::char_ - '"') >> '"'];
frame_rule = qi::lit("frame={") >>
(addr_rule(qi::_val) ^ qi::lit(',') ^ func_rule(qi::_val) ^
qi::lit(',') ^ file_rule(qi::_val) ^ qi::lit(',') ^
fullname_rule(qi::_val) ^ qi::lit(',') ^ line_rule(qi::_val) ^
qi::lit(',') ^ kv_rule(qi::_val)) >>
qi::lit('}');
BOOST_SPIRIT_DEBUG_NODES(
(frame_rule)(func_rule)(addr_rule)(fullname_rule)(line_rule))
}
qi::rule<Iterator, void(frame&), ascii::space_type> func_rule, addr_rule,
file_rule, fullname_rule, line_rule, kv_rule;
qi::rule<Iterator, frame(), ascii::space_type> frame_rule;
qi::rule<Iterator, std::string()> addr, func, file, fullname, line;
qi::rule<Iterator, std::string()> quoted_string;
argsArray<Iterator> arrTest;
};
test.cc
#include <iostream>
#include "gtest/gtest.h"
#include "parser/frame.h"
TEST(ParseFrameString, Test1)
{
std::string str = R"(frame={addr="0x0000000000414008",)"
R"(func="main",)"
R"(args=[{name="argc",value="1"},)"
R"({name="argv",value="0x7fffffffe1a8"}],)"
R"(file="/home/stiopa/development/gdbFront/main.cc",)"
R"(fullname="/home/stiopa/development/gdbFront/main.cc",)"
R"(line="90"}")";
typedef std::string::const_iterator It;
const frameParser<It> g;
It iter(str.begin()), end(str.end());
frame frame;
bool r = phrase_parse(iter, end, g, boost::spirit::ascii::space, frame);
EXPECT_EQ(r, true);
EXPECT_EQ(frame.addr, "0x0000000000414008");
EXPECT_EQ(frame.func, "main");
std::map<std::string, std::string> kv{{"argc", "1"},
{"argv", "0x7fffffffe1a8"}};
EXPECT_EQ(frame.kv, kv);
EXPECT_EQ(frame.file, "/home/stiopa/development/gdbFront/main.cc");
EXPECT_EQ(frame.fullname, "/home/stiopa/development/gdbFront/main.cc");
EXPECT_EQ(frame.line, "90");
}
The simple, low-tech solution would be to use qi::seek from the repository:
#include <boost/spirit/repository/include/qi_seek.hpp>
namespace qir = boost::spirit::repository::qi;
And then:
bool r = phrase_parse(iter, end, qir::seek[g], boost::spirit::ascii::space, frame);
DEMO
Live On Coliru
#define BOOST_SPIRIT_USE_PHOENIX_V3
//#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/adapted/std_pair.hpp>
#include <boost/spirit/repository/include/qi_seek.hpp>
namespace qi = boost::spirit::qi;
namespace qir = boost::spirit::repository::qi;
namespace phx = boost::phoenix;
namespace ascii = boost::spirit::ascii;
struct frame
{
std::string addr;
std::string func;
std::string file;
std::string fullname;
std::string line;
std::map<std::string, std::string> kv;
};
template <typename Iterator>
struct argsArray : qi::grammar<Iterator, std::map<std::string, std::string>()>
{
argsArray() : argsArray::base_type(query)
{
query =
qi::lit("args=[") >> pair >> *(qi::lit(',') >> pair) >> qi::lit(']');
pair = qi::lit("{name=") >> quoted_string >> qi::lit(",value=") >>
quoted_string >> qi::lit("}");
key = qi::char_("a-zA-Z_") >> *qi::char_("a-zA-Z_0-9");
quoted_string %= boost::spirit::lexeme['"' >> +(qi::char_ - '"') >> '"'];
}
qi::rule<Iterator, std::map<std::string, std::string>()> query;
qi::rule<Iterator, std::pair<std::string, std::string>()> pair;
qi::rule<Iterator, std::string()> key;
qi::rule<Iterator, std::string()> quoted_string;
};
template <typename Iterator>
struct frameParser : qi::grammar<Iterator, frame(), ascii::space_type>
{
frameParser() : frameParser::base_type(frame_rule)
{
static const auto _addr = phx::bind(&frame::addr, qi::_r1);
static const auto _func = phx::bind(&frame::func, qi::_r1);
static const auto _file = phx::bind(&frame::file, qi::_r1);
static const auto _fullname = phx::bind(&frame::fullname, qi::_r1);
static const auto _line = phx::bind(&frame::line, qi::_r1);
static const auto _kv = phx::bind(&frame::kv, qi::_r1);
func = qi::lit("func=") >> quoted_string;
addr = qi::lit("addr=") >> quoted_string;
file = qi::lit("file=") >> quoted_string;
fullname = qi::lit("fullname=") >> quoted_string;
line = qi::lit("line=") >> quoted_string;
func_rule = func[_func = qi::_1];
addr_rule = addr[_addr = qi::_1];
file_rule = file[_file = qi::_1];
fullname_rule = fullname[_fullname = qi::_1];
line_rule = line[_line = qi::_1];
kv_rule = arrTest[_kv = qi::_1];
quoted_string %= boost::spirit::lexeme['"' >> +(qi::char_ - '"') >> '"'];
frame_rule = qi::lit("frame={") >>
(addr_rule(qi::_val) ^ qi::lit(',') ^ func_rule(qi::_val) ^
qi::lit(',') ^ file_rule(qi::_val) ^ qi::lit(',') ^
fullname_rule(qi::_val) ^ qi::lit(',') ^ line_rule(qi::_val) ^
qi::lit(',') ^ kv_rule(qi::_val)) >>
qi::lit('}');
BOOST_SPIRIT_DEBUG_NODES(
(frame_rule)(func_rule)(addr_rule)(fullname_rule)(line_rule))
}
qi::rule<Iterator, void(frame&), ascii::space_type> func_rule, addr_rule,
file_rule, fullname_rule, line_rule, kv_rule;
qi::rule<Iterator, frame(), ascii::space_type> frame_rule;
qi::rule<Iterator, std::string()> addr, func, file, fullname, line;
qi::rule<Iterator, std::string()> quoted_string;
argsArray<Iterator> arrTest;
};
#include <iostream>
//#include "parser/frame.h"
int main()
{
std::string str = R"(frame={addr="0x0000000000414008",)"
R"(func="main",)"
R"(args=[{name="argc",value="1"},)"
R"({name="argv",value="0x7fffffffe1a8"}],)"
R"(file="/home/stiopa/development/gdbFront/main.cc",)"
R"(fullname="/home/stiopa/development/gdbFront/main.cc",)"
R"(line="90"}")";
str = "AAA" + str + "AAA";
typedef std::string::const_iterator It;
const frameParser<It> g;
It iter(str.begin()), end(str.end());
frame frame;
bool r = phrase_parse(iter, end, qir::seek[g], boost::spirit::ascii::space, frame);
assert(r == true);
assert(frame.addr == "0x0000000000414008");
assert(frame.func == "main");
std::map<std::string, std::string> kv{{"argc", "1"},
{"argv", "0x7fffffffe1a8"}};
assert(frame.kv == kv);
assert(frame.file == "/home/stiopa/development/gdbFront/main.cc");
assert(frame.fullname == "/home/stiopa/development/gdbFront/main.cc");
assert(frame.line == "90");
}
Tests still pass.
And here's a free code review. Please see
Boost Spirit: "Semantic actions are evil"?
Boost spirit skipper issues
you didn't parse the delimiting ',' correctly at all. You must require it, unless end-of-frame ('}'). You cannot accept multiple in a row
Live On Coliru
#define BOOST_SPIRIT_USE_PHOENIX_V3
//#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/adapted/std_pair.hpp>
#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/repository/include/qi_seek.hpp>
namespace qi = boost::spirit::qi;
namespace qir = boost::spirit::repository::qi;
namespace phx = boost::phoenix;
namespace ascii = boost::spirit::ascii;
struct frame {
std::string addr;
std::string func;
std::string file;
std::string fullname;
std::string line;
std::map<std::string, std::string> kv;
};
BOOST_FUSION_ADAPT_STRUCT(frame, addr, func, file, fullname, line, kv)
template <typename Iterator>
struct argsArray : qi::grammar<Iterator, std::map<std::string, std::string>()>
{
argsArray() : argsArray::base_type(query)
{
query = "args=[" >> pair >> *(',' >> pair) >> ']';
pair = "{name=" >> quoted_string >> ",value=" >> quoted_string >> "}";
key = qi::char_("a-zA-Z_") >> *qi::char_("a-zA-Z_0-9");
quoted_string = '"' >> +(qi::char_ - '"') >> '"';
}
private:
qi::rule<Iterator, std::map<std::string, std::string>()> query;
qi::rule<Iterator, std::pair<std::string, std::string>()> pair;
qi::rule<Iterator, std::string()> key;
qi::rule<Iterator, std::string()> quoted_string;
};
template <typename Iterator>
struct frameParser : qi::grammar<Iterator, frame(), ascii::space_type>
{
frameParser() : frameParser::base_type(frame_rule)
{
quoted_string = '"' >> +(qi::char_ - '"') >> '"';
delim = (&qi::lit('}')) | ',';
field_rule = qi::lexeme [ qi::lit(qi::_r1) >> '=' ] >> quoted_string >> delim;
kv_rule = arrTest >> delim;
frame_rule = "frame={" >>
(field_rule(+"addr") ^
field_rule(+"func") ^
field_rule(+"file") ^
field_rule(+"fullname") ^
field_rule(+"line") ^
kv_rule
) >> '}';
BOOST_SPIRIT_DEBUG_NODES((frame_rule)(field_rule))
}
private:
qi::rule<Iterator> delim;
qi::rule<Iterator, std::string(char const*), ascii::space_type> field_rule;
qi::rule<Iterator, std::map<std::string, std::string>()> kv_rule;
qi::rule<Iterator, frame(), ascii::space_type> frame_rule;
qi::rule<Iterator, std::string()> quoted_string;
argsArray<Iterator> arrTest;
};
#include <iostream>
//#include "parser/frame.h"
int main()
{
std::string str = R"(frame={addr="0x0000000000414008",)"
R"(func="main",)"
R"(args=[{name="argc",value="1"},)"
R"({name="argv",value="0x7fffffffe1a8"}],)"
R"(file="/home/stiopa/development/gdbFront/main.cc",)"
R"(fullname="/home/stiopa/development/gdbFront/main.cc",)"
R"(line="90"}")";
str = "AAA" + str + "AAA";
typedef std::string::const_iterator It;
const frameParser<It> g;
It iter(str.begin()), end(str.end());
frame frame;
bool r = phrase_parse(iter, end, qir::seek[g], boost::spirit::ascii::space, frame);
if (iter != end)
std::cout << "Remaining unparsed: '" << std::string(iter,end) << "'\n";
assert(r == true);
assert(frame.addr == "0x0000000000414008");
assert(frame.func == "main");
std::map<std::string, std::string> kv{{"argc", "1"},
{"argv", "0x7fffffffe1a8"}};
assert(frame.kv == kv);
assert(frame.file == "/home/stiopa/development/gdbFront/main.cc");
assert(frame.fullname == "/home/stiopa/development/gdbFront/main.cc");
assert(frame.line == "90");
}
Prints:
Remaining unparsed: '"AAA'
Tests still pass.
Note your original sample input had a trailing ", which you simply ignored.
Related
I managed to parse a pgn file thanks to the Boost Spirit library, but it fails as soon as there is some characters I did not "anticipated".
Here is my Spirit grammar :
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
BOOST_FUSION_ADAPT_STRUCT(
loloof64::pgn_tag,
(std::string, key),
(std::string, value)
)
BOOST_FUSION_ADAPT_STRUCT(
loloof64::game_move,
(unsigned, move_number),
(std::string, move_turn),
(std::string, white_move),
(std::string, black_move),
(std::string, result)
)
BOOST_FUSION_ADAPT_STRUCT(
loloof64::pgn_game,
(std::vector<loloof64::pgn_tag>, header),
(std::vector<loloof64::game_move>, moves)
)
namespace loloof64 {
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace phoenix = boost::phoenix;
template <typename Iterator>
struct pgn_parser : qi::grammar<Iterator, std::vector<pgn_game>, qi::unused_type>
{
pgn_parser() : pgn_parser::base_type(games)
{
using qi::lexeme;
using ascii::char_;
using qi::uint_;
using qi::alnum;
using qi::space;
using qi::omit;
using qi::eol;
using qi::lit;
quoted_string %= lexeme[lit('"') >> *(char_ - '"') >> lit('"')];
tag %=
'['
>> +alnum
>> omit[+space]
>> quoted_string
>> ']'
>> omit[+eol]
;
header %= +tag;
move_turn %= qi::string("...") | qi::string(".");
regular_move %=
+char_("a-hNBRQK")
>> +char_("a-h1-8x=NBRQK")
>> -qi::string("e.p.")
;
castle_move %= qi::string("O-O-O") | qi::string("O-O");
single_move %=
(regular_move | castle_move) >> -(char_('+') | char_('#'))
;
result %= qi::string("1-0") | qi::string("0-1") | qi::string("1/2-1/2") | qi::string("*");
full_move %=
uint_
>> move_turn
>> omit[*space]
>> single_move
>> -(omit[+space] >> single_move)
>> -(omit[+space] >> result)
;
game_description %= full_move
>> *(omit[*space] >> full_move);
single_game %=
-header
>> game_description
;
games %=
single_game
>> *(omit[*(space|eol)] >> single_game)
;
}
qi::rule<Iterator, pgn_tag(), qi::unused_type> tag;
qi::rule<Iterator, std::vector<pgn_tag>, qi::unused_type> header;
qi::rule<Iterator, std::string(), qi::unused_type> quoted_string;
qi::rule<Iterator, std::string(), qi::unused_type> result;
qi::rule<Iterator, std::string(), qi::unused_type> regular_move;
qi::rule<Iterator, std::string(), qi::unused_type> castle_move;
qi::rule<Iterator, std::string(), qi::unused_type> single_move;
qi::rule<Iterator, std::string(), qi::unused_type> move_turn;
qi::rule<Iterator, game_move(), qi::unused_type> full_move;
qi::rule<Iterator, std::vector<game_move>, qi::unused_type> game_description;
qi::rule<Iterator, pgn_game, qi::unused_type> single_game;
qi::rule<Iterator, std::vector<pgn_game>, qi::unused_type> games;
};
}
How could I simply consume any character I could not "anticipate" ? I mean, how could I ignore any character that I don't want in none of my grammar rule ?
As for testing purposes :
here my parser header (pgn_games_extractor.hpp)
#ifndef PGNGAMESEXTRACTOR_HPP
#define PGNGAMESEXTRACTOR_HPP
#include <string>
#include <vector>
#include <fstream>
#include <stdexcept>
namespace loloof64 {
struct pgn_tag {
std::string key;
std::string value;
};
struct game_move {
unsigned move_number;
std::string move_turn;
std::string white_move;
std::string black_move;
std::string result;
};
struct pgn_game {
std::vector<pgn_tag> header;
std::vector<game_move> moves;
};
class PgnGamesExtractor
{
public:
PgnGamesExtractor(std::string inputFilePath);
PgnGamesExtractor(std::ifstream &inputFile);
/*
Both constructos may throw PgnParsingException (if bad pgn format) and InputFileException (if missing file)
*/
std::vector<pgn_game> getGames() const { return games; }
virtual ~PgnGamesExtractor();
protected:
private:
std::vector<pgn_game> games;
void parseInput(std::ifstream &inputFile);
};
class PgnParsingException : public std::runtime_error
{
public:
PgnParsingException(std::string message): std::runtime_error(message){}
};
class InputFileException : public std::runtime_error
{
public:
InputFileException(std::string message) : std::runtime_error(message){}
};
}
#endif // PGNGAMESEXTRACTOR_HPP
Here is my parser source (pgn_games_extractor.cpp) :
#include "pgn_games_extractor.hpp"
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
BOOST_FUSION_ADAPT_STRUCT(
loloof64::pgn_tag,
(std::string, key),
(std::string, value)
)
BOOST_FUSION_ADAPT_STRUCT(
loloof64::game_move,
(unsigned, move_number),
(std::string, move_turn),
(std::string, white_move),
(std::string, black_move),
(std::string, result)
)
BOOST_FUSION_ADAPT_STRUCT(
loloof64::pgn_game,
(std::vector<loloof64::pgn_tag>, header),
(std::vector<loloof64::game_move>, moves)
)
namespace loloof64 {
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace phoenix = boost::phoenix;
template <typename Iterator>
struct pgn_parser : qi::grammar<Iterator, std::vector<pgn_game>, qi::unused_type>
{
pgn_parser() : pgn_parser::base_type(games)
{
using qi::lexeme;
using ascii::char_;
using qi::uint_;
using qi::alnum;
using qi::space;
using qi::omit;
using qi::eol;
using qi::lit;
quoted_string %= lexeme[lit('"') >> *(char_ - '"') >> lit('"')];
tag %=
'['
>> +alnum
>> omit[+space]
>> quoted_string
>> ']'
>> omit[+eol]
;
header %= +tag;
move_turn %= qi::string("...") | qi::string(".");
regular_move %=
+char_("a-hNBRQK")
>> +char_("a-h1-8x=NBRQK")
>> -qi::string("e.p.")
;
castle_move %= qi::string("O-O-O") | qi::string("O-O");
single_move %=
(regular_move | castle_move) >> -(char_('+') | char_('#'))
;
result %= qi::string("1-0") | qi::string("0-1") | qi::string("1/2-1/2") | qi::string("*");
full_move %=
uint_
>> move_turn
>> omit[*space]
>> single_move
>> -(omit[+space] >> single_move)
>> -(omit[+space] >> result)
;
game_description %= full_move
>> *(omit[*space] >> full_move);
single_game %=
-header
>> game_description
;
games %=
single_game
>> *(omit[*(space|eol)] >> single_game)
;
}
qi::rule<Iterator, pgn_tag(), qi::unused_type> tag;
qi::rule<Iterator, std::vector<pgn_tag>, qi::unused_type> header;
qi::rule<Iterator, std::string(), qi::unused_type> quoted_string;
qi::rule<Iterator, std::string(), qi::unused_type> result;
qi::rule<Iterator, std::string(), qi::unused_type> regular_move;
qi::rule<Iterator, std::string(), qi::unused_type> castle_move;
qi::rule<Iterator, std::string(), qi::unused_type> single_move;
qi::rule<Iterator, std::string(), qi::unused_type> move_turn;
qi::rule<Iterator, game_move(), qi::unused_type> full_move;
qi::rule<Iterator, std::vector<game_move>, qi::unused_type> game_description;
qi::rule<Iterator, pgn_game, qi::unused_type> single_game;
qi::rule<Iterator, std::vector<pgn_game>, qi::unused_type> games;
};
}
loloof64::PgnGamesExtractor::PgnGamesExtractor(std::string inputFilePath)
{
std::ifstream inputFile(inputFilePath);
parseInput(inputFile);
}
loloof64::PgnGamesExtractor::PgnGamesExtractor(std::ifstream &inputFile)
{
parseInput(inputFile);
}
loloof64::PgnGamesExtractor::~PgnGamesExtractor()
{
//dtor
}
void loloof64::PgnGamesExtractor::parseInput(std::ifstream &inputFile)
{
using namespace std;
if (! inputFile) throw InputFileException("File does not exist !");
string content("");
getline(inputFile, content, (char) inputFile.eof());
if (inputFile.fail() || inputFile.bad()) throw new InputFileException("Could not read the input file !");
loloof64::pgn_parser<string::const_iterator> parser;
std::vector<loloof64::pgn_game> temp_games;
string::const_iterator iter = content.begin();
string::const_iterator end = content.end();
bool success = boost::spirit::qi::phrase_parse(iter, end, parser, boost::spirit::qi::eol, temp_games);
if (success && iter == end)
{
games = temp_games;
}
else
{
string error_fragment(iter, end);
string error_message("");
error_message = "Failed to parse the input at :'" + error_fragment + "' !";
throw PgnParsingException(error_message);
}
}
I am asking this question because I could not parse the following pgn : ScotchGambitPgn.zip. I think it is because of an encoding issue with this file.
I am using Spirit 2 and C++ 11 (Gnu)
As requested the simple X3 translation.
fewer lines of code (10 lines)
compilation time down from 7.4s to 3.6s (clang)
compilation time down from 11.4s to 6.0s (gcc5)
runtime down from 0.80s to 0.55s (clang and gcc)
The outputs are identical (exactly).
Live On Coliru
//#define BOOST_SPIRIT_DEBUG
#ifndef PGNGAMESEXTRACTOR_HPP
#define PGNGAMESEXTRACTOR_HPP
#include <string>
#include <vector>
#include <fstream>
#include <stdexcept>
namespace loloof64 {
struct pgn_tag {
std::string key;
std::string value;
};
struct game_move {
unsigned move_number;
std::string white_move;
std::string black_move;
enum result_t { white_won, black_won, draw, undecided } result;
};
struct pgn_game {
std::vector<pgn_tag> header;
std::vector<game_move> moves;
};
class PgnGamesExtractor {
public:
PgnGamesExtractor(std::string inputFilePath);
PgnGamesExtractor(std::istream &inputFile);
/*
Both constructos may throw PgnParsingException (if bad pgn format) and InputFileException (if missing file)
*/
std::vector<pgn_game> getGames() const { return games; }
virtual ~PgnGamesExtractor();
protected:
private:
std::vector<pgn_game> games;
void parseInput(std::istream &inputFile);
};
class PgnParsingException : public virtual std::runtime_error {
public:
PgnParsingException(std::string message) : std::runtime_error(message) {}
};
class InputFileException : public virtual std::runtime_error {
public:
InputFileException(std::string message) : std::runtime_error(message) {}
};
}
#endif // PGNGAMESEXTRACTOR_HPP
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/include/support_istream_iterator.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
BOOST_FUSION_ADAPT_STRUCT(loloof64::pgn_tag, key, value)
BOOST_FUSION_ADAPT_STRUCT(loloof64::game_move, move_number, white_move, black_move, result)
BOOST_FUSION_ADAPT_STRUCT(loloof64::pgn_game, header, moves)
namespace loloof64 {
namespace pgn_parser {
using namespace boost::spirit::x3;
static std::string const no_move;
static auto const result = []{
symbols<game_move::result_t> table;
table.add
("1-0", game_move::white_won)
("0-1", game_move::black_won)
("1/2-1/2", game_move::draw)
("*", game_move::undecided);
return table;
}();
static auto const quoted_string = lexeme['"' >> *~char_('"') >> '"'];
static auto const tag = '[' >> +alnum >> quoted_string >> ']';
static auto const header = +tag;
static auto const regular_move = as_parser("O-O-O") | "O-O" | (+char_("a-hNBRQK") >> +char_("a-h1-8x=NBRQK") >> -lit("e.p."));
static auto const single_move = rule<struct single_move_, std::string> { "single_move" }
= raw [ lexeme [ regular_move >> -char_("+#")] ];
static auto const full_move = rule<struct full_move_, game_move> { "full_move" }
= uint_
>> (lexeme["..." >> attr(no_move)] | "." >> single_move)
>> (single_move | attr(no_move))
>> -result;
static auto const game_description = +full_move;
static auto const single_game = rule<struct single_game_, pgn_game> { "single_game" }
= -header >> game_description;
static auto const games = *single_game;
}
}
loloof64::PgnGamesExtractor::PgnGamesExtractor(std::string inputFilePath) {
std::ifstream inputFile(inputFilePath);
parseInput(inputFile);
}
loloof64::PgnGamesExtractor::PgnGamesExtractor(std::istream &inputFile) { parseInput(inputFile); }
loloof64::PgnGamesExtractor::~PgnGamesExtractor() {
// dtor
}
void loloof64::PgnGamesExtractor::parseInput(std::istream &inputFile) {
if (inputFile.fail() || inputFile.bad())
throw new InputFileException("Could not read the input file !");
typedef boost::spirit::istream_iterator It;
std::vector<loloof64::pgn_game> temp_games;
It iter(inputFile >> std::noskipws), end;
bool success = boost::spirit::x3::phrase_parse(iter, end, pgn_parser::games, boost::spirit::x3::space, temp_games);
if (success && iter == end) {
games.swap(temp_games);
} else {
std::string error_fragment(iter, end);
throw PgnParsingException("Failed to parse the input at :'" + error_fragment + "' !");
}
}
#include <iostream>
int main() {
loloof64::PgnGamesExtractor pge("ScotchGambit.pgn");
std::cout << "Parsed " << pge.getGames().size() << " games\n";
for (auto& g : pge.getGames())
for (auto& m : g.moves)
std::cout << m.move_number << ".\t" << m.white_move << "\t" << m.black_move << "\n";
}
For what it's worth, here's significantly simplified:
Live On Coliru
//#define BOOST_SPIRIT_DEBUG
#ifndef PGNGAMESEXTRACTOR_HPP
#define PGNGAMESEXTRACTOR_HPP
#include <string>
#include <vector>
#include <fstream>
#include <stdexcept>
namespace loloof64 {
struct pgn_tag {
std::string key;
std::string value;
};
struct game_move {
unsigned move_number;
std::string white_move;
std::string black_move;
enum result_t { white_won, black_won, draw, undecided } result;
};
struct pgn_game {
std::vector<pgn_tag> header;
std::vector<game_move> moves;
};
class PgnGamesExtractor {
public:
PgnGamesExtractor(std::string inputFilePath);
PgnGamesExtractor(std::istream &inputFile);
/*
Both constructos may throw PgnParsingException (if bad pgn format) and InputFileException (if missing file)
*/
std::vector<pgn_game> getGames() const { return games; }
virtual ~PgnGamesExtractor();
protected:
private:
std::vector<pgn_game> games;
void parseInput(std::istream &inputFile);
};
class PgnParsingException : public virtual std::runtime_error {
public:
PgnParsingException(std::string message) : std::runtime_error(message) {}
};
class InputFileException : public virtual std::runtime_error {
public:
InputFileException(std::string message) : std::runtime_error(message) {}
};
}
#endif // PGNGAMESEXTRACTOR_HPP
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
BOOST_FUSION_ADAPT_STRUCT(loloof64::pgn_tag, key, value)
BOOST_FUSION_ADAPT_STRUCT(loloof64::game_move, move_number, white_move, black_move, result)
BOOST_FUSION_ADAPT_STRUCT(loloof64::pgn_game, header, moves)
namespace loloof64 {
namespace qi = boost::spirit::qi;
template <typename Iterator> struct pgn_parser : qi::grammar<Iterator, std::vector<pgn_game>, qi::space_type> {
pgn_parser() : pgn_parser::base_type(games) {
using namespace qi;
const std::string no_move;
result.add
("1-0", game_move::white_won)
("0-1", game_move::black_won)
("1/2-1/2", game_move::draw)
("*", game_move::undecided);
quoted_string = '"' >> *~char_('"') >> '"';
tag = '[' >> +alnum >> quoted_string >> ']';
header = +tag;
regular_move = lit("O-O-O") | "O-O" | (+char_("a-hNBRQK") >> +char_("a-h1-8x=NBRQK") >> -lit("e.p."));
single_move = raw [ regular_move >> -char_("+#") ];
full_move = uint_
>> (lexeme["..." >> attr(no_move)] | "." >> single_move)
>> (single_move | attr(no_move))
>> -result;
game_description = +full_move;
single_game = -header >> game_description;
games = *single_game;
BOOST_SPIRIT_DEBUG_NODES(
(tag)(header)(quoted_string)(regular_move)(single_move)
(full_move)(game_description)(single_game)(games)
)
}
private:
qi::rule<Iterator, pgn_tag(), qi::space_type> tag;
qi::rule<Iterator, std::vector<pgn_tag>, qi::space_type> header;
qi::rule<Iterator, game_move(), qi::space_type> full_move;
qi::rule<Iterator, std::vector<game_move>, qi::space_type> game_description;
qi::rule<Iterator, pgn_game, qi::space_type> single_game;
qi::rule<Iterator, std::vector<pgn_game>, qi::space_type> games;
// lexemes
qi::symbols<char, game_move::result_t> result;
qi::rule<Iterator, std::string()> quoted_string;
qi::rule<Iterator> regular_move;
qi::rule<Iterator, std::string()> single_move;
};
}
loloof64::PgnGamesExtractor::PgnGamesExtractor(std::string inputFilePath) {
std::ifstream inputFile(inputFilePath);
parseInput(inputFile);
}
loloof64::PgnGamesExtractor::PgnGamesExtractor(std::istream &inputFile) { parseInput(inputFile); }
loloof64::PgnGamesExtractor::~PgnGamesExtractor() {
// dtor
}
void loloof64::PgnGamesExtractor::parseInput(std::istream &inputFile) {
if (inputFile.fail() || inputFile.bad())
throw new InputFileException("Could not read the input file !");
typedef boost::spirit::istream_iterator It;
loloof64::pgn_parser<It> parser;
std::vector<loloof64::pgn_game> temp_games;
It iter(inputFile >> std::noskipws), end;
bool success = boost::spirit::qi::phrase_parse(iter, end, parser, boost::spirit::qi::space, temp_games);
if (success && iter == end) {
games.swap(temp_games);
} else {
std::string error_fragment(iter, end);
throw PgnParsingException("Failed to parse the input at :'" + error_fragment + "' !");
}
}
int main() {
loloof64::PgnGamesExtractor pge(std::cin); // "ScotchGambit.pgn"
std::cout << "Parsed " << pge.getGames().size() << " games\n";
for (auto& g : pge.getGames())
for (auto& m : g.moves)
std::cout << m.move_number << ".\t" << m.white_move << "\t" << m.black_move << "\n";
}
Notes:
don't read full file in memory (boost::spirit::istream_iterator)
don't manually skip (use skippers)
don't explicitly lexeme (Boost spirit skipper issues)
don't use %= if not needed
don't synthesize unneeded attributes (use raw[])
treat optional parts of move as optional, don't store assymetric magic flags like "..." (look for no_move)
don't be overly specific (use istream& instead of ifstream&)
Probably some other things I forgot. Output is e.g.
Parsed 6166 games
1. e4 e5
2. Nf3 Nc6
3. d4 exd4
4. Bc4 Qf6
5. O-O d6
6. Ng5 Nh6
7. f4 Be7
8. e5 Qg6
9. exd6 cxd6
10. c3 dxc3
11. Nxc3 O-O
12. Nd5 Bd7
13. Rf3 Bg4
14. Bd3 Bxf3
15. Qxf3 f5
16. Bc4 Kh8
17. Nxe7 Nxe7
18. Qxb7 Qf6
19. Be3 Rfb8
20. Qd7 Rd8
21. Qb7 d5
22. Bb3 Nc6
23. Bxd5 Nd4
24. Rd1 Ne2+
25. Kf1 Rab8
26. Qxa7 Rxb2
27. Ne6 Qxe6
28. Bxe6 Rxd1+
29. Kf2
1. e4 e5
2. Nf3 Nc6
3. d4 exd4
4. Bc4 Bc5
5. Ng5 Ne5
6. Bxf7+ Nxf7
7. Nxf7 Bb4+
8. c3 dxc3
9. bxc3 Bxc3+
10. Nxc3 Kxf7
11. Qd5+ Kf8
12. Ba3+ d6
13. e5 Qg5
14. exd6 Qxd5
Indeed the problem is with Veronica. Or, actually, it's with Ver?nica. Where ? is the code unit <93> - which, lacking codepage/encoding information could mean anything really.
You're using ascii::char and this requires 7-bit only characters.
Easily fix it by changing
using ascii::char_;
into
using qi::char_;
Developping with Boost Spirit 2, I am trying to follow example in order to get progression (will add semantic actions later) in my pgn parser (see also related previous question). But I can't manage to avoid compilation errors : cpp
#include "pgn_games_extractor.h"
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <tuple>
#include <iostream>
BOOST_FUSION_ADAPT_STRUCT(loloof64::pgn_tag, key, value)
BOOST_FUSION_ADAPT_STRUCT(loloof64::game_move, move_number, white_move, black_move, result)
BOOST_FUSION_ADAPT_STRUCT(loloof64::pgn_game, header, moves)
namespace loloof64 {
namespace qi = boost::spirit::qi;
typedef std::tuple<std::size_t, game_move> move_t;
typedef std::tuple<std::vector<pgn_tag>, std::vector<move_t>> game_t;
typedef std::tuple<std::size_t, std::vector<game_t>> pgn_t;
template <typename Iterator> struct pgn_parser : qi::grammar<Iterator, std::vector<pgn_game>, qi::space_type> {
pgn_parser() : pgn_parser::base_type(games) {
using namespace qi;
CurrentPos<Iterator> filepos;
const std::string no_move;
result.add
("1-0", result_t::white_won)
("0-1", result_t::black_won)
("1/2-1/2", result_t::draw)
("*", result_t::undecided);
quoted_string = '"' >> *~char_('"') >> '"';
tag = '[' >> +alnum >> quoted_string >> ']';
header = +tag;
regular_move = lit("O-O-O") | "O-O" | (+char_("a-hNBRQK") >> +char_("a-h1-8x=NBRQK") >> -lit("e.p."));
single_move = raw [ regular_move >> -char_("+#") ];
full_move = filepos.current_pos >> uint_
>> (lexeme["..." >> attr(no_move)] | "." >> single_move)
>> (single_move | attr(no_move))
>> -result;
game_description = +full_move;
single_game = -header >> game_description;
games = filepos.save_start_pos >> *single_game;
BOOST_SPIRIT_DEBUG_NODES(
(tag)(header)(quoted_string)(regular_move)(single_move)
(full_move)(game_description)(single_game)(games)
)
}
private:
qi::rule<Iterator, pgn_tag(), qi::space_type> tag;
qi::rule<Iterator, std::vector<pgn_tag>, qi::space_type> header;
qi::rule<Iterator, move_t(), qi::space_type> full_move;
qi::rule<Iterator, std::vector<move_t>, qi::space_type> game_description;
qi::rule<Iterator, game_t(), qi::space_type> single_game;
qi::rule<Iterator, pgn_t(), qi::space_type> games;
// lexemes
qi::symbols<char, result_t> result;
qi::rule<Iterator, std::string()> quoted_string;
qi::rule<Iterator> regular_move;
qi::rule<Iterator, std::string()> single_move;
};
}
loloof64::PgnGamesExtractor::PgnGamesExtractor(std::string inputFilePath) {
std::ifstream inputFile(inputFilePath);
parseInput(inputFile);
}
loloof64::PgnGamesExtractor::PgnGamesExtractor(std::istream &inputFile) { parseInput(inputFile); }
loloof64::PgnGamesExtractor::~PgnGamesExtractor() {
// dtor
}
void loloof64::PgnGamesExtractor::parseInput(std::istream &inputFile) {
if (inputFile.fail() || inputFile.bad())
throw new InputFileException("Could not read the input file !");
typedef boost::spirit::istream_iterator It;
loloof64::pgn_parser<It> parser;
std::vector<loloof64::pgn_game> temp_games;
It iter(inputFile >> std::noskipws), end;
//////////////////////////////////
std::cout << "About to parse the file" << std::endl;
//////////////////////////////////
bool success = boost::spirit::qi::phrase_parse(iter, end, parser, boost::spirit::qi::space, temp_games);
//////////////////////////////////
std::cout << "Finished to parse the file" << std::endl;
//////////////////////////////////
if (success && iter == end) {
games.swap(temp_games);
} else {
std::string error_fragment(iter, end);
throw PgnParsingException("Failed to parse the input at :'" + error_fragment + "' !");
}
}
and the header file : header.
#ifndef PGNGAMESEXTRACTOR_HPP
#define PGNGAMESEXTRACTOR_HPP
#include <string>
#include <vector>
#include <fstream>
#include <stdexcept>
#include <boost/fusion/adapted/std_tuple.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/repository/include/qi_iter_pos.hpp>
namespace loloof64 {
namespace phx = boost::phoenix;
namespace qi = boost::spirit::qi;
/*
* This class has been taken from http://marko-editor.com/articles/position_tracking/
*/
template<typename Iterator>
struct CurrentPos {
CurrentPos() {
save_start_pos = qi::omit[boost::spirit::repository::qi::iter_pos[
phx::bind(&CurrentPos::setStartPos, this, qi::_1)]];
current_pos = boost::spirit::repository::qi::iter_pos[
qi::_val = phx::bind(&CurrentPos::getCurrentPos, this, qi::_1)];
}
qi::rule<Iterator> save_start_pos;
qi::rule<Iterator, std::size_t()> current_pos;
private:
void setStartPos(const Iterator &iterator) {
start_pos_ = iterator;
}
std::size_t getCurrentPos(const Iterator &iterator) {
return std::distance(start_pos_, iterator);
}
Iterator start_pos_;
};
enum result_t { white_won, black_won, draw, undecided };
struct pgn_tag {
std::string key;
std::string value;
};
struct game_move {
unsigned move_number;
std::string white_move;
std::string black_move;
result_t result;
};
struct pgn_game {
std::vector<pgn_tag> header;
std::vector<game_move> moves;
};
class PgnGamesExtractor {
public:
PgnGamesExtractor(std::string inputFilePath);
PgnGamesExtractor(std::istream &inputFile);
/*
Both constructos may throw PgnParsingException (if bad pgn format) and InputFileException (if missing file)
*/
std::vector<pgn_game> getGames() const { return games; }
virtual ~PgnGamesExtractor();
protected:
private:
std::vector<pgn_game> games;
void parseInput(std::istream &inputFile);
};
class PgnParsingException : public virtual std::runtime_error {
public:
PgnParsingException(std::string message) : std::runtime_error(message) {}
};
class InputFileException : public virtual std::runtime_error {
public:
InputFileException(std::string message) : std::runtime_error(message) {}
};
}
#endif // PGNGAMESEXTRACTOR_HPP
I did not post the compilation errors as there are too many and as the files can be easily tested.
Of course, it's not gonna work well with a streaming interface. You can retain the start iterator, but
you won't know the stream length ahead of time (unless you get it out-of-band)
calculating the current position (distance from the start iterator) each time is going to be horrendously inefficient.
Since you mentioned in a comment you were parsing files, you should consider using memory mapping (boost::iostream::mapped_file_source or mmap e.g.). That way, the distance calculation is instantaneous, using pointer arithmetic on the random-access iterators.
Here's a working example, with the following changes/notes:
using memory mapped input data3
omit[] in save_start_pos is useless (there is no declared attribute)
getCurrentPos was horrifically inefficient (to the extent that just using omit[current_pos] in the full_move rule slowed the parsing down several orders of magnitude.
This is because boost::spirit::istream_iterator holds on to all previously read state in a deque and traversing them doesn't come for free when doing std::distance
Your CurrentPos<Iterator> filepos; instance goes out of scope after construction! This means that invoking save_start_pos/current_pos is Undefined Behaviour¹. Move it out of the constructor.
A subtler point is to use full_move %= ... when you add the semantic action (see docs and blog)
You changed the types on some of the rules to include position information, alongside the AST types. That's both unnecessary and flawed: the AST types would not be compatible with the tuple<size_t, T> versions of the rules.
Besides, e.g. the games rule didn't even expose a position, because save_start_pos synthesizes unused_type (no attribute).
So, drop the whole tuple business, and just work with the state of the filepos member inside your semantic action:
full_move %=
omit[filepos.current_pos [ reportProgress(_1) ]] >>
uint_
>> (lexeme["..." >> attr(no_move)] | "." >> single_move)
>> (single_move | attr(no_move))
>> -result;
Finally, as a demonstration on how to report strictly increasing progress indications², I included a simple phoenix actor:
struct reportProgress_f {
size_t total_;
mutable double pct = 0.0;
reportProgress_f(size_t total) : total_(total) {}
template<typename T>
void operator()(T pos) const {
double newpct = pos * 100.0 / total_;
if ((newpct - pct) > 10) {
//sleep(1); // because it's way too fast otherwise...
pct = newpct;
std::cerr << "\rProgress " << std::fixed << std::setprecision(1) << pct << std::flush;
};
}
};
phx::function<reportProgress_f> reportProgress;
Note reportProgress needs to be constructed with knowledge about start and end iterators, see the constructor for pgn_parser
¹ in the recorded live stream you can see I spotted the error on the first reading, then forgot about after I made it to compile. The program crashed, dutifully :) Then I remembered.
² even in the face of backtracking
3 (not strictly required, but I guess the goal wasn't to simply make it so slow you actually need the progress indicator?)
Live On Coliru
#ifndef PGNGAMESEXTRACTOR_HPP
#define PGNGAMESEXTRACTOR_HPP
#include <string>
#include <vector>
#include <fstream>
#include <stdexcept>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/repository/include/qi_iter_pos.hpp>
namespace loloof64 {
namespace phx = boost::phoenix;
namespace qi = boost::spirit::qi;
namespace qr = boost::spirit::repository::qi;
/*
* This class has been taken from http://marko-editor.com/articles/position_tracking/
*/
template<typename Iterator>
struct CurrentPos {
CurrentPos() {
save_start_pos = qr::iter_pos [phx::bind(&CurrentPos::setStartPos, this, qi::_1)] >> qi::eps;
current_pos = qr::iter_pos [qi::_val = phx::bind(&CurrentPos::getCurrentPos, this, qi::_1)] >> qi::eps;
}
qi::rule<Iterator> save_start_pos;
qi::rule<Iterator, std::size_t()> current_pos;
private:
void setStartPos(const Iterator &iterator) {
start_pos_ = iterator;
}
std::size_t getCurrentPos(const Iterator &iterator) {
return std::distance(start_pos_, iterator);
}
Iterator start_pos_;
};
enum result_t { white_won, black_won, draw, undecided };
struct pgn_tag {
std::string key;
std::string value;
};
struct game_move {
unsigned move_number;
std::string white_move;
std::string black_move;
result_t result;
};
struct pgn_game {
std::vector<pgn_tag> header;
std::vector<game_move> moves;
};
class PgnGamesExtractor {
public:
PgnGamesExtractor(std::string const& inputFilePath);
/*
Both constructos may throw PgnParsingException (if bad pgn format) and InputFileException (if missing file)
*/
std::vector<pgn_game> getGames() const { return games; }
virtual ~PgnGamesExtractor();
protected:
private:
std::vector<pgn_game> games;
void parseInput(std::string const&);
};
class PgnParsingException : public virtual std::runtime_error {
public:
PgnParsingException(std::string message) : std::runtime_error(message) {}
};
class InputFileException : public virtual std::runtime_error {
public:
InputFileException(std::string message) : std::runtime_error(message) {}
};
}
#endif // PGNGAMESEXTRACTOR_HPP
//#include "pgn_games_extractor.h"
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <iostream>
#include <iomanip>
BOOST_FUSION_ADAPT_STRUCT(loloof64::pgn_tag, key, value)
BOOST_FUSION_ADAPT_STRUCT(loloof64::game_move, move_number, white_move, black_move, result)
BOOST_FUSION_ADAPT_STRUCT(loloof64::pgn_game, header, moves)
namespace loloof64 {
namespace qi = boost::spirit::qi;
template <typename Iterator> struct pgn_parser : qi::grammar<Iterator, std::vector<pgn_game>(), qi::space_type> {
pgn_parser(Iterator start, Iterator end)
: pgn_parser::base_type(games),
reportProgress(std::distance(start, end))
{
using namespace qi;
const std::string no_move;
result.add
("1-0", result_t::white_won)
("0-1", result_t::black_won)
("1/2-1/2", result_t::draw)
("*", result_t::undecided);
quoted_string = '"' >> *~char_('"') >> '"';
tag = '[' >> +alnum >> quoted_string >> ']';
header = +tag;
regular_move = lit("O-O-O") | "O-O" | (+char_("a-hNBRQK") >> +char_("a-h1-8x=NBRQK") >> -lit("e.p."));
single_move = raw [ regular_move >> -char_("+#") ];
full_move %=
omit[filepos.current_pos [ reportProgress(_1) ]] >>
uint_
>> (lexeme["..." >> attr(no_move)] | "." >> single_move)
>> (single_move | attr(no_move))
>> -result;
game_description = +full_move;
single_game = -header >> game_description;
games = filepos.save_start_pos >> *single_game;
BOOST_SPIRIT_DEBUG_NODES(
(tag)(header)(quoted_string)(regular_move)(single_move)
(full_move)(game_description)(single_game)(games)
)
}
private:
struct reportProgress_f {
size_t total_;
mutable double pct = 0.0;
reportProgress_f(size_t total) : total_(total) {}
template<typename T>
void operator()(T pos) const {
double newpct = pos * 100.0 / total_;
if ((newpct - pct) > 10) {
//sleep(1); // because it's way too fast otherwise...
pct = newpct;
std::cerr << "\rProgress " << std::fixed << std::setprecision(1) << pct << " " << std::flush;
};
}
};
phx::function<reportProgress_f> reportProgress;
CurrentPos<Iterator> filepos;
qi::rule<Iterator, pgn_tag(), qi::space_type> tag;
qi::rule<Iterator, std::vector<pgn_tag>, qi::space_type> header;
qi::rule<Iterator, game_move(), qi::space_type> full_move;
qi::rule<Iterator, std::vector<game_move>, qi::space_type> game_description;
qi::rule<Iterator, pgn_game(), qi::space_type> single_game;
qi::rule<Iterator, std::vector<pgn_game>(), qi::space_type> games;
// lexemes
qi::symbols<char, result_t> result;
qi::rule<Iterator, std::string()> quoted_string;
qi::rule<Iterator> regular_move;
qi::rule<Iterator, std::string()> single_move;
};
}
#include <boost/iostreams/device/mapped_file.hpp>
loloof64::PgnGamesExtractor::~PgnGamesExtractor() {
// dtor
}
loloof64::PgnGamesExtractor::PgnGamesExtractor(std::string const& inputFilePath) {
parseInput(inputFilePath);
}
void loloof64::PgnGamesExtractor::parseInput(std::string const& inputFilePath) {
boost::iostreams::mapped_file_source mf(inputFilePath);
//if (inputFile.fail() || inputFile.bad())
//throw new InputFileException("Could not read the input file !");
typedef char const* It;
std::vector<loloof64::pgn_game> temp_games;
/* It iter(inputFile >> std::noskipws), end; */
auto iter = mf.begin();
auto end = mf.end();
loloof64::pgn_parser<It> parser(iter, end);
//////////////////////////////////
//std::cout << "About to parse the file" << std::endl;
//////////////////////////////////
bool success = boost::spirit::qi::phrase_parse(iter, end, parser, boost::spirit::qi::space, temp_games);
//////////////////////////////////
//std::cout << "Finished to parse the file" << std::endl;
//////////////////////////////////
if (success && iter == end) {
games.swap(temp_games);
} else {
std::string error_fragment(iter, end);
throw PgnParsingException("Failed to parse the input at :'" + error_fragment + "' !");
}
}
int main() {
loloof64::PgnGamesExtractor pge("ScotchGambit.pgn");
std::cout << "Parsed " << pge.getGames().size() << " games\n";
for (auto& g : pge.getGames())
for (auto& m : g.moves)
std::cout << m.move_number << ".\t" << m.white_move << "\t" << m.black_move << "\n";
}
With sample output
Progress 32.6
Progress 44.5
Progress 55.5
Progress 67.2
Progress 77.2
Progress 89.1
Progress 100.0Parsed 1 games
1. e4 e5
2. Nf3 Nc6
3. d4 exd4
4. Bc4 Qf6
5. O-O d6
6. Ng5 Nh6
7. f4 Be7
8. e5 Qg6
9. exd6 cxd6
10. c3 dxc3
11. Nxc3 O-O
12. Nd5 Bd7
13. Rf3 Bg4
14. Bd3 Bxf3
15. Qxf3 f5
16. Bc4 Kh8
17. Nxe7 Nxe7
18. Qxb7 Qf6
19. Be3 Rfb8
20. Qd7 Rd8
21. Qb7 d5
22. Bb3 Nc6
23. Bxd5 Nd4
24. Rd1 Ne2+
25. Kf1 Rab8
26. Qxa7 Rxb2
27. Ne6 Qxe6
28. Bxe6 Rxd1+
29. Kf2
Note that on a terminal, the progress indication will self-update using a carriage-return instead of printing separate lines
Solved the problem by following this Sehe video tutorial
Also, one should notice that, as this time he is using a boost::iostreams::mapped_file_source instead of a ifstream as I did, the process is really speeding up ! So the progress bar is not needed any more for this process.
Cpp file and Hpp file
I need to parse following EBNF expression with Boost::Spirit.
period ::= date_part [time_part] , date_part [time_part]
time_part ::= hours:minutes[:seconds]
date_part ::= day.month.year
For example, 10.06.2014 10:00:15, 11.07.2014. I made my grammar in two ways, but can't exactly get working example.
1) First attempt
struct Parser: grammar<std::string::const_iterator, space_type>
{
Parser(): Parser::base_type(datetime_)
{
using boost::spirit::int_;
using boost::spirit::qi::_1;
using boost::spirit::qi::_2;
using boost::spirit::qi::_val;
datetime_ =
(date_ >> time_)
[
_val =
phoenix::construct<ptime>
(
date(_1[2]), _1[1], _1[0]),
hours(_2[0]) + minutes(_2[1]) + seconds[_2[0]]
)
|
_val =
phoenix::construct<ptime>
(
date(_1[2]), _1[1], _1[0]),
seconds(0)
)
];
date_ %= int_ % '.';
time_ %= int_ % ':';
BOOST_SPIRIT_DEBUG_NODE(datetime_);
BOOST_SPIRIT_DEBUG_NODE(date_);
BOOST_SPIRIT_DEBUG_NODE(time_);
}
rule<std::string::const_iterator, std::vector<int>(), space_type> date_, time_;
rule<std::string::const_iterator, ptime(), space_type> datetime_;
}
Parser parser;
std::string strTest("10.06.2014 10:00:15, 11.07.2014");
std::string::const_iterator it_begin(strTest.begin());
std::string::const_iterator it_end(strTest.end());
bool result = phrase_parse(it_begin, it_end, parser, space);
Errors:
/media/Data/Projects/Qt/Planner/parser.h:108: ошибка: no matching function for call to 'boost::gregorian::date::date(boost::phoenix::detail::make_index_composite<boost::phoenix::actor<boost::spirit::argument<0> >, int>::type)'
And so on. I can't cast boost::spirit::argument<0> to int or date::years_type. I tryed date((int)_1[2]), (int)_1[1], (int)_1[0])) and dynamic_cast<int>(_1[2]), but with no success (.
2) Second attempt
struct Parser: grammar<std::string::const_itearator, space_type>
{
Parser(ConditionTree& a_lTree):
Parser::base_type(time_period_),
m_lTree(a_lTree)
{
using boost::spirit::int_;
using boost::spirit::qi::_1;
using boost::spirit::qi::_2;
using boost::spirit::qi::_3;
using boost::spirit::qi::_4;
using boost::spirit::qi::_5;
using boost::spirit::qi::_val;
time_period_ = ( datetime_ > ',' > datetime_ ) [ _val = phoenix::construct<time_period>((int)_1, (int)_3) ];
datetime_ = (date_ >> time_duration_) [ _val = phoenix::construct<ptime>((int)_1, (int)_2) | _val = phoenix::construct<ptime>((int)_1, seconds(0)) ] ;
date_ = (int_ > '.' > int_ > '.' > int_) [ _val = phoenix::construct<date>((int)_5, (int)_3, (int)_1) ];
time_duration_ = (int_ > ':' > int_ > ':' > int_) [ _val = phoenix::construct<time_duration>((int)_1, (int)_3, (int)_5, 0)];
BOOST_SPIRIT_DEBUG_NODE(time_period_);
BOOST_SPIRIT_DEBUG_NODE(datetime_);
BOOST_SPIRIT_DEBUG_NODE(date_);
BOOST_SPIRIT_DEBUG_NODE(time_duration_);
}
rule<std::string::const_itarator, time_period(), space_type> time_period_;
rule<std::string::const_itarator, ptime(), space_type> datetime_;
rule<std::string::const_itarator, date(), space_type> date_;
rule<std::string::const_itarator, time_duration(), space_type> time_duration_;
ConditionTree& m_lTree;
};
Error:
/media/Data/Projects/Qt/Planner/parser.h:114: ошибка: invalid cast from type 'const _1_type {aka const boost::phoenix::actor<boost::spirit::argument<0> >}' to type 'int'...
Why I can't cast boost::spirit::argument<0> to int????
Better question, why would you be able to cast a placeholder type to a specific primitive type?
The place holder is a lazy actor only, so you should use Phoenix cast_ to cast it, if at all (hint: this should not be necessary): Live On Coliru
Output
<period_>
<try>10.06.2014 10:00:15,</try>
<date_>
<try>10.06.2014 10:00:15,</try>
<success> 10:00:15, 11.07.201</success>
<attributes>[[10, 6, 2014]]</attributes>
</date_>
<time_>
<try> 10:00:15, 11.07.201</try>
<success>, 11.07.2014</success>
<attributes>[[10, 0, 15]]</attributes>
</time_>
<date_>
<try> 11.07.2014</try>
<success></success>
<attributes>[[11, 7, 2014]]</attributes>
</date_>
<time_>
<try></try>
<fail/>
</time_>
<success></success>
<attributes>[[[[10, 6, 2014], [10, 0, 15]], [[11, 7, 2014], [empty]]]]</attributes>
</period_>
Parse success
Full Sample
#define BOOST_SPIRIT_DEBUG
#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;
namespace Ast {
using boost::optional;
struct date { unsigned day, month, year; };
struct time { unsigned hours, minutes, seconds; };
struct date_time { date date_part; optional<time> time_part; };
struct period { date_time start, end; };
}
BOOST_FUSION_ADAPT_STRUCT(Ast::date, (unsigned,day)(unsigned,month)(unsigned,year))
BOOST_FUSION_ADAPT_STRUCT(Ast::time, (unsigned,hours)(unsigned,minutes)(unsigned,seconds))
BOOST_FUSION_ADAPT_STRUCT(Ast::date_time, (Ast::date,date_part)(Ast::optional<Ast::time>, time_part))
BOOST_FUSION_ADAPT_STRUCT(Ast::period, (Ast::date_time,start)(Ast::date_time,end))
template <typename Iterator>
struct Parser : qi::grammar<Iterator, Ast::period(), qi::space_type>
{
int test;
Parser() : Parser::base_type(period_)
{
using namespace qi;
static const int_parser<unsigned, 10, 2, 2> _2digit = {};
static const int_parser<unsigned, 10, 4, 4> _4digit = {};
time_ = _2digit >> ":" >> _2digit >> ":" >> _2digit;
date_ = _2digit >> "." >> _2digit >> "." >> _4digit;
date_time_ = date_ >> -time_;
period_ = date_time_ >> "," >> date_time_;
BOOST_SPIRIT_DEBUG_NODES((period_)(time_)(date_))
}
private:
qi::rule<Iterator, Ast::period(), qi::space_type> period_;
qi::rule<Iterator, Ast::date(), qi::space_type> date_;
qi::rule<Iterator, Ast::time(), qi::space_type> time_;
qi::rule<Iterator, Ast::date_time(), qi::space_type> date_time_;
};
int main()
{
using It = std::string::const_iterator;
Parser<It> parser;
std::string input("10.06.2014 10:00:15, 11.07.2014");
It f(input.begin()), l(input.end());
Ast::period parsed;
bool ok = qi::phrase_parse(f, l, parser, qi::space, parsed);
if (ok)
{
std::cout << "Parse success\n";
}
else
{
std::cout << "Parse failed\n";
}
if (f!=l)
std::cout << "Remaining unparsed input: '" << std::string(f,l) << "'\n";
}
I have some complicated structures and i want to extract their data from a text using
boost::spirit library (I've selected this one for efficiency purpose).
but i will ask my question in simpler way.
assume, we have two structures like these:
struct person
{
std::string name;
uint8_t age;
};
and
struct fruit
{
std::string color;
std::double average_weight;
};
and our text that included these data is presented below:
"... (jane, 23) (david, 19) (mary, 30) [yello,100] [green, 60.6] [red, 30.5]"
now, the problem is "extracting these data in suitable format"
for example by call handler for each struct or push_back them on vector.
any help would be greatly appreciated!
is there any code sample about that?!
call handlers for parsed structures.
#include <string>
#define BOOST_RESULT_OF_USE_DECLTYPE
#include <boost/fusion/adapted.hpp>
#include <boost/spirit/home/qi.hpp>
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace phoenix = boost::phoenix;
namespace fusion = boost::fusion;
struct person
{
std::string name;
uint8_t age;
};
BOOST_FUSION_ADAPT_STRUCT
(
person,
(std::string, name)
(uint8_t, age)
);
struct fruit
{
std::string color;
double average_weight;
};
BOOST_FUSION_ADAPT_STRUCT
(
fruit,
(std::string, color)
(double, average_weight)
);
template <typename _Iterator>
struct parser :
qi::grammar<_Iterator, void(), ascii::space_type>
{
parser() :
parser::base_type(main)
{
main =
*(
_person[ ([](const person &person_)
{
// Add handler here
}) ]
| _fruit[ ([](const fruit &fruit_)
{
// Add handler here
}) ]
);
_person = qi::lit('(') >> *(qi::char_ - ',') >> ',' >> qi::ushort_ >> ')';
_fruit = qi::lit('[') >> *(qi::char_ - ',') >> ',' >> qi::double_ >> ']';
}
qi::rule<_Iterator, void(), ascii::space_type> main;
qi::rule<_Iterator, person(), ascii::space_type> _person;
qi::rule<_Iterator, fruit(), ascii::space_type> _fruit;
};
int main()
{
typedef std::string::const_iterator iterator;
std::string input_ = "(jane, 23000) (david, 19) (mary, 30) [yello,100] [green, 60.6] [red, 30.5]";
iterator iterator_ = std::begin(input_);
bool result_ = qi::phrase_parse(iterator_, iterator(std::end(input_)), parser<iterator>(), ascii::space)
&& iterator_ == std::end(input_);
return 0;
}
P.S. Not all compiler can build that code because of lambdas in semantic actions. (msvs don't) In this case you have to use something else (phoenix::bind for example)
store parsed structures in a vector
typedef boost::variant <
person,
fruit
> variant;
template <typename _Iterator>
struct parser :
qi::grammar<_Iterator, std::vector < variant > (), ascii::space_type>
{
parser() :
parser::base_type(main)
{
main = *(_person | _fruit);
_person = qi::lit('(') >> *(qi::char_ - ',') >> ',' >> qi::ushort_ >> ')';
_fruit = qi::lit('[') >> *(qi::char_ - ',') >> ',' >> qi::double_ >> ']';
}
qi::rule<_Iterator, std::vector < variant > (), ascii::space_type> main;
qi::rule<_Iterator, person(), ascii::space_type> _person;
qi::rule<_Iterator, fruit(), ascii::space_type> _fruit;
};
I use boost spirit to parse a color. That worked quite well,
but after I changed the the iterator type, the skipper stopped working.
"rgb(1.0,1.0,0.5)" // this works
" rgb(0.2,0.2,0.2)" // this fails
Here is the header:
struct ColorGrammar : public qi::grammar<StringIterator, Color(), chs::space_type>
{
//! Iterator type for this grammar
typedef StringIterator ItType;
//! Skipper type used in this grammar
typedef chs::space_type Skipper;
//! Rule to parse a number with up to 3 digits
qi::uint_parser<uint8, 10, 1, 3> number;
//! Rule to parse a hex digit
qi::uint_parser<uint8, 16, 1, 1> hexdigit;
ColorGrammar();
//! Rule for rgb(...)
qi::rule<ItType, Color(), qi::locals<float, float>, Skipper> rule_rgb;
//! Rule for rgba(...)
qi::rule<ItType, Color(), qi::locals<float, float, float>, Skipper> rule_rgba;
//! Mainrule
qi::rule<ItType, Color(), Skipper> rule_color;
};
Here is the cpp
ColorGrammar::ColorGrammar()
: ColorGrammar::base_type(rule_color, "color-grammar")
{
using namespace qi::labels;
using boost::phoenix::construct;
auto& _1 = qi::_1;
rule_rgb = '(' >> qi::float_[_a = _1] >> ',' >> qi::float_[_b = _1] >> ',' >> qi::float_[_val = phx::construct<Color>(_a, _b, _1)] >> ')';
rule_rgba = '(' >> qi::float_[_a = _1] >> ',' >> qi::float_[_b = _1] >> ',' >> qi::float_[_c = _1] >> ',' >> qi::float_[_val = phx::construct<Color>(_a, _b, _c, _1)] >> ')';
rule_color = (qi::lit("rgb") >> rule_rgb)
| (qi::lit("rgba") >> rule_rgba);
}
And the call:
Color out;
StringIterator begin = str.cbegin();
StringIterator end = str.cend();
bool result = qi::phrase_parse(begin, end, color_, chs::space, out);
I'm sure, it is only a little misstake, but I am not able to see it.
Maybe i watched too long at the source... can you see a misstake?
I can't see what's wrong: I've taken the effort to reconstruct your SSCCE.
http://liveworkspace.org/code/1pDtmn$1
In the process, it seems I must have removed the problem. I suggest you do the same.
Oh, and this is how I'd write this:
no more phoenix
no more constructors
no more qi::locals
no more needless copying
using expectation points
In short: no more fuss.
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/adapted/struct.hpp>
#include <cstdint>
namespace qi = boost::spirit::qi;
namespace chs = boost::spirit::ascii; //qi;
typedef std::string::const_iterator StringIterator;
struct Color
{
float r,g,b,a;
};
BOOST_FUSION_ADAPT_STRUCT(Color, (float, r)(float, g)(float, b)(float, a))
template <typename ItType, typename Skipper>
struct ColorGrammar : public qi::grammar<StringIterator, Color(), Skipper>
{
ColorGrammar()
: ColorGrammar::base_type(rule_color, "color-grammar")
{
using namespace qi;
rule_rgb = lit("rgb") >> '(' > float_ > ',' > float_ > ',' > float_ > attr(1.0f) > ')';
rule_rgba = lit("rgba") >> '(' > float_ > ',' > float_ > ',' > float_ > ',' > float_ > ')';
rule_color = rule_rgb | rule_rgba;
}
private:
qi::uint_parser<uint8_t, 10, 1, 3> number; // unused
qi::uint_parser<uint8_t, 16, 1, 1> hexdigit; // unused
qi::rule<ItType, Color(), Skipper> rule_rgb, rule_rgba, rule_color;
};
int main()
{
Color out;
std::string str = " rgb ( 0.3 , .4 , 0.5 )";
StringIterator begin = str.cbegin();
StringIterator end = str.cend();
ColorGrammar<StringIterator, chs::space_type> color_;
bool result = qi::phrase_parse(begin, end, color_, chs::space, out);
std::cout << std::boolalpha << result << '\n';
std::cout << "remains: '" << std::string(begin, end) << "'\n";
}
Live on http://liveworkspace.org/code/35htD$3