Related
This does not compile (code below).
There was another question here with the same error. But I don't understand the answer. I already tried inserting qi::eps in places -- but without success.
I also tried already adding meta functions (boost::spirit::raits::is_container) for the types used -- but this also does not help.
I also tried using the same variant containing all types I need to use everywhere. Same problem.
Has anybody gotten this working for a lexer returning something else than double or int or string? And for the parser also returning non-trivial objects?
I've tried implementing semantic functions everywhere returning default objects. But this also does not help.
Here comes the code:
// spirit_error.cpp : Defines the entry point for the console application.
//
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/phoenix/object.hpp>
#include <boost/spirit/include/qi_char_class.hpp>
#include <boost/spirit/include/phoenix_bind.hpp>
#include <boost/mpl/index_of.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/intrusive_ptr.hpp>
#include <boost/smart_ptr/intrusive_ref_counter.hpp>
namespace lex = boost::spirit::lex;
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace frank
{
class ref_counter:public boost::intrusive_ref_counter<ref_counter>
{ public:
virtual ~ref_counter(void)
{
}
};
class symbol:public ref_counter
{ public:
typedef boost::intrusive_ptr<const symbol> symbolPtr;
typedef std::vector<symbolPtr> symbolVector;
struct push_scope
{ push_scope()
{
}
~push_scope(void)
{
}
};
};
class nature:public symbol
{ public:
enum enumAttribute
{ eAbstol,
eAccess,
eDDT,
eIDT,
eUnits
};
struct empty
{ bool operator<(const empty&) const
{ return false;
}
friend std::ostream &operator<<(std::ostream &_r, const empty&)
{ return _r;
}
};
typedef boost::variant<empty, std::string> attributeValue;
};
class discipline:public symbol
{ public:
enum enumDomain
{ eDiscrete,
eContinuous
};
};
class type:public ref_counter
{ public:
typedef boost::intrusive_ptr<type> typePtr;
};
struct myIterator:std::iterator<std::random_access_iterator_tag, char, std::ptrdiff_t, const char*, const char&>
{ std::string *m_p;
std::size_t m_iPos;
myIterator(void)
:m_p(nullptr),
m_iPos(~std::size_t(0))
{
}
myIterator(std::string &_r, const bool _bEnd = false)
:m_p(&_r),
m_iPos(_bEnd ? ~std::size_t(0) : 0)
{
}
myIterator(const myIterator &_r)
:m_p(_r.m_p),
m_iPos(_r.m_iPos)
{
}
myIterator &operator=(const myIterator &_r)
{ if (this != &_r)
{ m_p = _r.m_p;
m_iPos = _r.m_iPos;
}
return *this;
}
const char &operator*(void) const
{ return m_p->at(m_iPos);
}
bool operator==(const myIterator &_r) const
{ return m_p == _r.m_p && m_iPos == _r.m_iPos;
}
bool operator!=(const myIterator &_r) const
{ return m_p != _r.m_p || m_iPos != _r.m_iPos;
}
myIterator &operator++(void)
{ ++m_iPos;
if (m_iPos == m_p->size())
m_iPos = ~std::size_t(0);
return *this;
}
myIterator operator++(int)
{ const myIterator s(*this);
operator++();
return s;
}
myIterator &operator--(void)
{ --m_iPos;
return *this;
}
myIterator operator--(int)
{ const myIterator s(*this);
operator--();
return s;
}
bool operator<(const myIterator &_r) const
{ if (m_p == _r.m_p)
return m_iPos < _r.m_iPos;
else
return m_p < _r.m_p;
}
std::ptrdiff_t operator-(const myIterator &_r) const
{ return m_iPos - _r.m_iPos;
}
};
struct onInclude
{ auto operator()(myIterator &_rStart, myIterator &_rEnd) const
{ // erase what has been matched (the include statement)
_rStart.m_p->erase(_rStart.m_iPos, _rEnd.m_iPos - _rStart.m_iPos);
// and insert the contents of the file
_rStart.m_p->insert(_rStart.m_iPos, "abcd");
_rEnd = _rStart;
return lex::pass_flags::pass_ignore;
}
};
template<typename LEXER>
class lexer:public lex::lexer<LEXER>
{ public:
lex::token_def<type::typePtr> m_sKW_real, m_sKW_integer, m_sKW_string;
lex::token_def<lex::omit> m_sLineComment, m_sCComment;
lex::token_def<lex::omit> m_sWS;
lex::token_def<lex::omit> m_sSemicolon, m_sEqual, m_sColon, m_sInclude, m_sCharOP, m_sCharCP,
m_sComma;
lex::token_def<std::string> m_sIdentifier, m_sString;
lex::token_def<double> m_sReal;
lex::token_def<int> m_sInteger;
lex::token_def<lex::omit> m_sKW_units, m_sKW_access, m_sKW_idt_nature, m_sKW_ddt_nature, m_sKW_abstol,
m_sKW_nature, m_sKW_endnature, m_sKW_continuous, m_sKW_discrete,
m_sKW_potential, m_sKW_flow, m_sKW_domain, m_sKW_discipline, m_sKW_enddiscipline, m_sKW_module,
m_sKW_endmodule, m_sKW_parameter;
//typedef const type *typePtr;
template<typename T>
struct extractValue
{ T operator()(const myIterator &_rStart, const myIterator &_rEnd) const
{ return boost::lexical_cast<T>(std::string(_rStart, _rEnd));
}
};
struct extractString
{ std::string operator()(const myIterator &_rStart, const myIterator &_rEnd) const
{ const auto s = std::string(_rStart, _rEnd);
return s.substr(1, s.size() - 2);
}
};
lexer(void)
:m_sWS("[ \\t\\n\\r]+"),
m_sKW_parameter("\"parameter\""),
m_sKW_real("\"real\""),
m_sKW_integer("\"integer\""),
m_sKW_string("\"string\""),
m_sLineComment("\\/\\/[^\\n]*"),
m_sCComment("\\/\\*"
"("
"[^*]"
"|" "[\\n]"
"|" "([*][^/])"
")*"
"\\*\\/"),
m_sSemicolon("\";\""),
m_sEqual("\"=\""),
m_sColon("\":\""),
m_sCharOP("\"(\""),
m_sCharCP("\")\""),
m_sComma("\",\""),
m_sIdentifier("[a-zA-Z_]+[a-zA-Z0-9_]*"),
m_sString("[\\\"]"
//"("
// "(\\[\"])"
// "|"
//"[^\"]"
//")*"
"[^\\\"]*"
"[\\\"]"),
m_sKW_units("\"units\""),
m_sKW_access("\"access\""),
m_sKW_idt_nature("\"idt_nature\""),
m_sKW_ddt_nature("\"ddt_nature\""),
m_sKW_abstol("\"abstol\""),
m_sKW_nature("\"nature\""),
m_sKW_endnature("\"endnature\""),
m_sKW_continuous("\"continuous\""),
m_sKW_discrete("\"discrete\""),
m_sKW_domain("\"domain\""),
m_sKW_discipline("\"discipline\""),
m_sKW_enddiscipline("\"enddiscipline\""),
m_sKW_potential("\"potential\""),
m_sKW_flow("\"flow\""),
//realnumber ({uint}{exponent})|((({uint}\.{uint})|(\.{uint})){exponent}?)
//exponent [Ee][+-]?{uint}
//uint [0-9][_0-9]*
m_sReal("({uint}{exponent})"
"|"
"("
"(({uint}[\\.]{uint})|([\\.]{uint})){exponent}?"
")"
),
m_sInteger("{uint}"),
m_sInclude("\"`include\""),
m_sKW_module("\"module\""),
m_sKW_endmodule("\"endmodule\"")
{ this->self.add_pattern
("uint", "[0-9]+")
("exponent", "[eE][\\+\\-]?{uint}");
this->self = m_sSemicolon
| m_sEqual
| m_sColon
| m_sCharOP
| m_sCharCP
| m_sComma
| m_sString[lex::_val = boost::phoenix::bind(extractString(), lex::_start, lex::_end)]
| m_sKW_real//[lex::_val = boost::phoenix::bind(&type::getReal)]
| m_sKW_integer//[lex::_val = boost::phoenix::bind(&type::getInteger)]
| m_sKW_string//[lex::_val = boost::phoenix::bind(&type::getString)]
| m_sKW_parameter
| m_sKW_units
| m_sKW_access
| m_sKW_idt_nature
| m_sKW_ddt_nature
| m_sKW_abstol
| m_sKW_nature
| m_sKW_endnature
| m_sKW_continuous
| m_sKW_discrete
| m_sKW_domain
| m_sKW_discipline
| m_sKW_enddiscipline
| m_sReal[lex::_val = boost::phoenix::bind(extractValue<double>(), lex::_start, lex::_end)]
| m_sInteger[lex::_val = boost::phoenix::bind(extractValue<int>(), lex::_start, lex::_end)]
| m_sKW_potential
| m_sKW_flow
| m_sKW_module
| m_sKW_endmodule
| m_sIdentifier
| m_sInclude [ lex::_state = "INCLUDE" ]
;
this->self("INCLUDE") += m_sString [
lex::_state = "INITIAL", lex::_pass = boost::phoenix::bind(onInclude(), lex::_start, lex::_end)
];
this->self("WS") = m_sWS
| m_sLineComment
| m_sCComment
;
}
};
template<typename Iterator, typename Lexer>
class natureParser:public qi::grammar<Iterator, symbol::symbolPtr(void), qi::in_state_skipper<Lexer> >
{ qi::rule<Iterator, symbol::symbolPtr(void), qi::in_state_skipper<Lexer> > m_sStart;
qi::rule<Iterator, std::pair<nature::enumAttribute, nature::attributeValue>(void), qi::in_state_skipper<Lexer> > m_sProperty;
qi::rule<Iterator, std::string(), qi::in_state_skipper<Lexer> > m_sName;
public:
template<typename Tokens>
natureParser(const Tokens &_rTokens)
:natureParser::base_type(m_sStart)
{ m_sProperty = (_rTokens.m_sKW_units
>> _rTokens.m_sEqual
>> _rTokens.m_sString
>> _rTokens.m_sSemicolon
)
| (_rTokens.m_sKW_access
>> _rTokens.m_sEqual
>> _rTokens.m_sIdentifier
>> _rTokens.m_sSemicolon
)
| (_rTokens.m_sKW_idt_nature
>> _rTokens.m_sEqual
>> _rTokens.m_sIdentifier
>> _rTokens.m_sSemicolon
)
| (_rTokens.m_sKW_ddt_nature
>> _rTokens.m_sEqual
>> _rTokens.m_sIdentifier
>> _rTokens.m_sSemicolon
)
| (_rTokens.m_sKW_abstol
>> _rTokens.m_sEqual
>> _rTokens.m_sReal
>> _rTokens.m_sSemicolon
)
;
m_sName = (_rTokens.m_sColon >> _rTokens.m_sIdentifier);
m_sStart = (_rTokens.m_sKW_nature
>> _rTokens.m_sIdentifier
>> -m_sName
>> _rTokens.m_sSemicolon
>> *(m_sProperty)
>> _rTokens.m_sKW_endnature
);
m_sStart.name("start");
m_sProperty.name("property");
}
};
/*
// Conservative discipline
discipline electrical;
potential Voltage;
flow Current;
enddiscipline
*/
// a parser for a discipline declaration
template<typename Iterator, typename Lexer>
class disciplineParser:public qi::grammar<Iterator, symbol::symbolPtr(void), qi::in_state_skipper<Lexer> >
{ qi::rule<Iterator, symbol::symbolPtr(void), qi::in_state_skipper<Lexer> > m_sStart;
typedef std::pair<bool, boost::intrusive_ptr<const nature> > CPotentialAndNature;
struct empty
{ bool operator<(const empty&) const
{ return false;
}
friend std::ostream &operator<<(std::ostream &_r, const empty&)
{ return _r;
}
};
typedef boost::variant<empty, CPotentialAndNature, discipline::enumDomain> property;
qi::rule<Iterator, discipline::enumDomain(), qi::in_state_skipper<Lexer> > m_sDomain;
qi::rule<Iterator, property(void), qi::in_state_skipper<Lexer> > m_sProperty;
public:
template<typename Tokens>
disciplineParser(const Tokens &_rTokens)
:disciplineParser::base_type(m_sStart)
{ m_sDomain = _rTokens.m_sKW_continuous
| _rTokens.m_sKW_discrete
;
m_sProperty = (_rTokens.m_sKW_potential >> _rTokens.m_sIdentifier >> _rTokens.m_sSemicolon)
| (_rTokens.m_sKW_flow >> _rTokens.m_sIdentifier >> _rTokens.m_sSemicolon)
| (_rTokens.m_sKW_domain >> m_sDomain >> _rTokens.m_sSemicolon)
;
m_sStart = (_rTokens.m_sKW_discipline
>> _rTokens.m_sIdentifier
>> _rTokens.m_sSemicolon
>> *m_sProperty
>> _rTokens.m_sKW_enddiscipline
);
}
};
template<typename Iterator, typename Lexer>
class moduleParser:public qi::grammar<Iterator, symbol::symbolPtr(void), qi::in_state_skipper<Lexer> >
{ public:
qi::rule<Iterator, symbol::symbolPtr(void), qi::in_state_skipper<Lexer> > m_sStart;
qi::rule<Iterator, symbol::symbolVector(void), qi::in_state_skipper<Lexer> > m_sModulePortList;
qi::rule<Iterator, symbol::symbolVector(void), qi::in_state_skipper<Lexer> > m_sPortList;
qi::rule<Iterator, symbol::symbolPtr(void), qi::in_state_skipper<Lexer> > m_sPort;
qi::rule<Iterator, std::shared_ptr<symbol::push_scope>(void), qi::in_state_skipper<Lexer> > m_sModule;
typedef boost::intrusive_ptr<const ref_counter> intrusivePtr;
typedef std::vector<intrusivePtr> vectorOfPtr;
qi::rule<Iterator, vectorOfPtr(void), qi::in_state_skipper<Lexer> > m_sModuleItemList;
qi::rule<Iterator, intrusivePtr(void), qi::in_state_skipper<Lexer> > m_sParameter;
qi::rule<Iterator, intrusivePtr(void), qi::in_state_skipper<Lexer> > m_sModuleItem;
qi::rule<Iterator, type::typePtr(void), qi::in_state_skipper<Lexer> > m_sType;
template<typename Tokens>
moduleParser(const Tokens &_rTokens)
:moduleParser::base_type(m_sStart)
{ m_sPort = _rTokens.m_sIdentifier;
m_sPortList %= m_sPort % _rTokens.m_sComma;
m_sModulePortList %= _rTokens.m_sCharOP >> m_sPortList >> _rTokens.m_sCharCP;
m_sModule = _rTokens.m_sKW_module;
m_sType = _rTokens.m_sKW_real | _rTokens.m_sKW_integer | _rTokens.m_sKW_string;
m_sParameter = _rTokens.m_sKW_parameter
>> m_sType
>> _rTokens.m_sIdentifier
;
m_sModuleItem = m_sParameter;
m_sModuleItemList %= *m_sModuleItem;
m_sStart = (m_sModule
>> _rTokens.m_sIdentifier
>> m_sModulePortList
>> m_sModuleItemList
>> _rTokens.m_sKW_endmodule);
}
};
template<typename Iterator, typename Lexer>
class fileParser:public qi::grammar<Iterator, symbol::symbolVector(void), qi::in_state_skipper<Lexer> >
{ public:
disciplineParser<Iterator, Lexer> m_sDiscipline;
natureParser<Iterator, Lexer> m_sNature;
moduleParser<Iterator, Lexer> m_sModule;
qi::rule<Iterator, symbol::symbolVector(void), qi::in_state_skipper<Lexer> > m_sStart;
qi::rule<Iterator, symbol::symbolPtr(void), qi::in_state_skipper<Lexer> > m_sItem;
//public:
template<typename Tokens>
fileParser(const Tokens &_rTokens)
:fileParser::base_type(m_sStart),
m_sNature(_rTokens),
m_sDiscipline(_rTokens),
m_sModule(_rTokens)
{ m_sItem = m_sDiscipline | m_sNature | m_sModule;
m_sStart = *m_sItem;
}
};
}
int main()
{ std::string sInput = "\
nature Current;\n\
units = \"A\";\n\
access = I;\n\
idt_nature = Charge;\n\
abstol = 1e-12;\n\
endnature\n\
\n\
// Charge in coulombs\n\
nature Charge;\n\
units = \"coul\";\n\
access = Q;\n\
ddt_nature = Current;\n\
abstol = 1e-14;\n\
endnature\n\
\n\
// Potential in volts\n\
nature Voltage;\n\
units = \"V\";\n\
access = V;\n\
idt_nature = Flux;\n\
abstol = 1e-6;\n\
endnature\n\
\n\
discipline electrical;\n\
potential Voltage;\n\
flow Current;\n\
enddiscipline\n\
";
typedef lex::lexertl::token<frank::myIterator, boost::mpl::vector<frank::type::typePtr, std::string, double, int> > token_type;
typedef lex::lexertl::actor_lexer<token_type> lexer_type;
typedef frank::lexer<lexer_type>::iterator_type iterator_type;
typedef frank::fileParser<iterator_type, frank::lexer<lexer_type>::lexer_def> grammar_type;
frank::lexer<lexer_type> sLexer;
grammar_type sParser(sLexer);
frank::symbol::push_scope sPush;
auto pStringBegin = frank::myIterator(sInput);
auto pBegin(sLexer.begin(pStringBegin, frank::myIterator(sInput, true)));
const auto b = qi::phrase_parse(pBegin, sLexer.end(), sParser, qi::in_state("WS")[sLexer.self]);
}
Has anybody gotten this working for a lexer returning something else than double or int or string?
Sure. Simple examples might be found on this site
And for the parser also returning non-trivial objects?
Here's your real problem. Spirit is nice for a subset of parsers that are expressed easily in a eDSL, and has the huge benefit of "magically" mapping to a selection of attributes.
Some of the realities are:
attributes are expected to have value-semantic; using polymorphic attributes is hard (How can I use polymorphic attributes with boost::spirit::qi parsers?, e.g.)
using Lex makes most of the sweet-spot disappear since all "highlevel" parsers (like real_parser, [u]int_parser) are out the window. The Spirit devs are on record they prefer not to use Lex. Moreover, Spirit X3 doesn't have Lex support anymore.
Background Information:
I'd very much consider parsing the source as-is, into direct value-typed AST nodes. I know, this is probably what you consider "trivial objects", but don't be deceived by apparent simplicity: recursive variant trees have some expressive power.
Examples
Here's a trivial AST to represent JSON in <20 LoC: Boost Karma generator for composition of classes¹
Here we represent the Graphviz source format with full fidelity: How to use boost spirit list operator with mandatory minimum amount of elements?
I've since created the code to transform that AST into a domain representation with fully correct ownership, cascading lexically scoped node/edge attributes and cross references. I have just recovered that work and put it up on github if you're interested, mainly because the task is pretty similar in many respects, like the overriding/inheriting of properties and resolving identifiers within scopes: https://github.com/sehe/spirit-graphviz/blob/master/spirit-graphviz.cpp#L660
Suggestions, Ideas
In your case I'd take similar approach to retain simplicity. The code shown doesn't (yet) cover the trickiest ingredients (like nature attribute overrides within a discipline).
Once you start implementing use-cases like resolving compatible disciplines and the absolute tolerances at a given node, you want a domain model with full fidelity. Preferrably, there would be no loss of source information, and immutable AST information².
As a middle ground, you could probably avoid building an entire source-AST in memory only to transform it in one big go, at the top-level you could have:
file = qi::skip(skipper) [
*(m_sDiscipline | m_sNature | m_sModule) [process_ast(_1)]
];
Where process_ast would apply the "trivial" AST representation into the domain types, one at a time. That way you keep only small bits of temporary AST representation around.
The domain representation can be arbitrarily sophisticated to support all your logic and use-cases.
Let's "Show, Don't Tell"
Baking the simplest AST that comes to mind matching the grammar³:
namespace frank { namespace ast {
struct nature {
struct empty{};
std::string name;
std::string inherits;
enum class Attribute { units, access, idt, ddt, abstol };
using Value = boost::variant<int, double, std::string>;
std::map<Attribute, Value> attributes;
};
struct discipline {
enum enumDomain { eUnspecified, eDiscrete, eContinuous };
struct properties_t {
enumDomain domain = eUnspecified;
boost::optional<std::string> flow, potential;
};
std::string name;
properties_t properties;
};
// TODO
using module = qi::unused_type;
using file = std::vector<boost::variant<nature, discipline, module> >;
enum class type { real, integer, string };
} }
This is trivial and maps 1:1 onto the grammar productions, which means we have very little impedance.
Tokens? We Don't Need Lex For That
You can have common token parsers without requiring the complexities of Lex
Yes, Lex (especially statically generated) can potentially improve performance, but
if you need that, I wager Spirit Qi is not your best option anyways
premature optimization...
What I did:
struct tokens {
// implicit lexemes
qi::rule<It, std::string()> string, identifier;
qi::rule<It, double()> real;
qi::rule<It, int()> integer;
qi::rule<It, ast::nature::Value()> value;
qi::rule<It, ast::nature::Attribute()> attribute;
qi::rule<It, ast::discipline::enumDomain()> domain;
struct attribute_sym_t : qi::symbols<char, ast::nature::Attribute> {
attribute_sym_t() {
this->add
("units", ast::nature::Attribute::units)
("access", ast::nature::Attribute::access)
("idt_nature", ast::nature::Attribute::idt)
("ddt_nature", ast::nature::Attribute::ddt)
("abstol", ast::nature::Attribute::abstol);
}
} attribute_sym;
struct domain_sym_t : qi::symbols<char, ast::discipline::enumDomain> {
domain_sym_t() {
this->add
("discrete", ast::discipline::eDiscrete)
("continuous", ast::discipline::eContinuous);
}
} domain_sym;
tokens() {
using namespace qi;
auto kw = qr::distinct(copy(char_("a-zA-Z0-9_")));
string = '"' >> *("\\" >> char_ | ~char_('"')) >> '"';
identifier = char_("a-zA-Z_") >> *char_("a-zA-Z0-9_");
real = double_;
integer = int_;
attribute = kw[attribute_sym];
domain = kw[domain_sym];
value = string | identifier | real | integer;
BOOST_SPIRIT_DEBUG_NODES((string)(identifier)(real)(integer)(value)(domain)(attribute))
}
};
Liberating, isn't it? Note how
all attributes are automatically propagated
strings handle escapes (this bit was commented out in your Lex approach). We don't even need semantic actions to (badly) pry out the unquoted/unescaped value
we used distinct to ensure keyword parsing matches only full identifiers. (See How to parse reserved words correctly in boost spirit).
This is actually where you notice the lack of separate lexer.
On the flipside, this makes context-sensitive keywords a breeze (lex can easily prioritizes keywords over identifiers that occur in places where keywords cannot occur.⁴)
What About Skipping Space/Comments?
We could have added a token, but for reasons of convention I made it a parser:
struct skipParser : qi::grammar<It> {
skipParser() : skipParser::base_type(spaceOrComment) {
using namespace qi;
spaceOrComment = space
| ("//" >> *(char_ - eol) >> (eoi|eol))
| ("/*" >> *(char_ - "*/") >> "*/");
BOOST_SPIRIT_DEBUG_NODES((spaceOrComment))
}
private:
qi::rule<It> spaceOrComment;
};
natureParser
We inherit our AST parsers from tokens:
struct natureParser : tokens, qi::grammar<It, ast::nature(), skipParser> {
And from there it is plain sailing:
property = attribute >> '=' >> value >> ';';
nature
= kw["nature"] >> identifier >> -(':' >> identifier) >> ';'
>> *property
>> kw["endnature"];
disciplineParser
discipline = kw["discipline"] >> identifier >> ';'
>> properties
>> kw["enddiscipline"]
;
properties
= kw["domain"] >> domain >> ';'
^ kw["flow"] >> identifier >> ';'
^ kw["potential"] >> identifier >> ';'
;
This shows a competing approach that uses the permutation operator (^) to parse optional alternatives in any order into a fixed frank::ast::discipline properties struct. Of course, you might elect to have a more generic representation here, like we had with ast::nature.
Module AST is left as an exercise for the reader, though the parser rules are implemented below.
Top Level, Encapsulating The Skipper
I hate having to specify the skipper from the calling code (it's more complex than required, and changing the skipper changes the grammar). So, I encapsulate it in the top-level parser:
struct fileParser : qi::grammar<It, ast::file()> {
fileParser() : fileParser::base_type(file) {
file = qi::skip(qi::copy(m_sSkip)) [
*(m_sDiscipline | m_sNature | m_sModule)
];
BOOST_SPIRIT_DEBUG_NODES((file))
}
private:
disciplineParser m_sDiscipline;
natureParser m_sNature;
moduleParser m_sModule;
skipParser m_sSkip;
qi::rule<It, ast::file()> file;
};
Demo Time
This demo adds operator<< for the enums, and a variant visitor to print some AST details for debug/demonstrational purposes (print_em).
Then we have a test driver:
int main() {
using iterator_type = std::string::const_iterator;
iterator_type iter = sInput.begin(), last = sInput.end();
frank::Parsers<iterator_type>::fileParser parser;
print_em print;
frank::ast::file file;
bool ok = qi::parse(iter, last, parser, file);
if (ok) {
for (auto& symbol : file)
print(symbol);
}
else {
std::cout << "Parse failed\n";
}
if (iter != last) {
std::cout << "Remaining unparsed: '" << std::string(iter,last) << "'\n";
}
}
With the sample input from your question we get the following output:
Live On Coliru
-- Nature
name: Current
inherits:
attribute: units = A
attribute: access = I
attribute: idt = Charge
attribute: abstol = 1e-12
-- Nature
name: Charge
inherits:
attribute: units = coul
attribute: access = Q
attribute: ddt = Current
attribute: abstol = 1e-14
-- Nature
name: Voltage
inherits:
attribute: units = V
attribute: access = V
attribute: idt = Flux
attribute: abstol = 1e-06
-- Discipline
name: electrical
domain: (unspecified)
flow: Current
potential: Voltage
Remaining unparsed: '
'
With BOOST_SPIRIT_DEBUG defined, you get rich debug information: Live On Coliru
Full Listing
Live On Coliru
//#define BOOST_SPIRIT_DEBUG
#include <map>
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapted.hpp>
#include <boost/spirit/repository/include/qi_distinct.hpp>
namespace qi = boost::spirit::qi;
namespace frank { namespace ast {
struct nature {
struct empty{};
std::string name;
std::string inherits;
enum class Attribute { units, access, idt, ddt, abstol };
using Value = boost::variant<int, double, std::string>;
std::map<Attribute, Value> attributes;
};
struct discipline {
enum enumDomain { eUnspecified, eDiscrete, eContinuous };
struct properties_t {
enumDomain domain = eUnspecified;
boost::optional<std::string> flow, potential;
};
std::string name;
properties_t properties;
};
// TODO
using module = qi::unused_type;
using file = std::vector<boost::variant<nature, discipline, module> >;
enum class type { real, integer, string };
} }
BOOST_FUSION_ADAPT_STRUCT(frank::ast::nature, name, inherits, attributes)
BOOST_FUSION_ADAPT_STRUCT(frank::ast::discipline, name, properties)
BOOST_FUSION_ADAPT_STRUCT(frank::ast::discipline::properties_t, domain, flow, potential)
namespace frank {
namespace qr = boost::spirit::repository::qi;
template <typename It> struct Parsers {
struct tokens {
// implicit lexemes
qi::rule<It, std::string()> string, identifier;
qi::rule<It, double()> real;
qi::rule<It, int()> integer;
qi::rule<It, ast::nature::Value()> value;
qi::rule<It, ast::nature::Attribute()> attribute;
qi::rule<It, ast::discipline::enumDomain()> domain;
struct attribute_sym_t : qi::symbols<char, ast::nature::Attribute> {
attribute_sym_t() {
this->add
("units", ast::nature::Attribute::units)
("access", ast::nature::Attribute::access)
("idt_nature", ast::nature::Attribute::idt)
("ddt_nature", ast::nature::Attribute::ddt)
("abstol", ast::nature::Attribute::abstol);
}
} attribute_sym;
struct domain_sym_t : qi::symbols<char, ast::discipline::enumDomain> {
domain_sym_t() {
this->add
("discrete", ast::discipline::eDiscrete)
("continuous", ast::discipline::eContinuous);
}
} domain_sym;
tokens() {
using namespace qi;
auto kw = qr::distinct(copy(char_("a-zA-Z0-9_")));
string = '"' >> *("\\" >> char_ | ~char_('"')) >> '"';
identifier = char_("a-zA-Z_") >> *char_("a-zA-Z0-9_");
real = double_;
integer = int_;
attribute = kw[attribute_sym];
domain = kw[domain_sym];
value = string | identifier | real | integer;
BOOST_SPIRIT_DEBUG_NODES((string)(identifier)(real)(integer)(value)(domain)(attribute))
}
};
struct skipParser : qi::grammar<It> {
skipParser() : skipParser::base_type(spaceOrComment) {
using namespace qi;
spaceOrComment = space
| ("//" >> *(char_ - eol) >> (eoi|eol))
| ("/*" >> *(char_ - "*/") >> "*/");
BOOST_SPIRIT_DEBUG_NODES((spaceOrComment))
}
private:
qi::rule<It> spaceOrComment;
};
struct natureParser : tokens, qi::grammar<It, ast::nature(), skipParser> {
natureParser() : natureParser::base_type(nature) {
using namespace qi;
auto kw = qr::distinct(copy(char_("a-zA-Z0-9_")));
property = attribute >> '=' >> value >> ';';
nature
= kw["nature"] >> identifier >> -(':' >> identifier) >> ';'
>> *property
>> kw["endnature"];
BOOST_SPIRIT_DEBUG_NODES((nature)(property))
}
private:
using Attribute = std::pair<ast::nature::Attribute, ast::nature::Value>;
qi::rule<It, ast::nature(), skipParser> nature;
qi::rule<It, Attribute(), skipParser> property;
using tokens::attribute;
using tokens::value;
using tokens::identifier;
};
struct disciplineParser : tokens, qi::grammar<It, ast::discipline(), skipParser> {
disciplineParser() : disciplineParser::base_type(discipline) {
auto kw = qr::distinct(qi::copy(qi::char_("a-zA-Z0-9_")));
discipline = kw["discipline"] >> identifier >> ';'
>> properties
>> kw["enddiscipline"]
;
properties
= kw["domain"] >> domain >> ';'
^ kw["flow"] >> identifier >> ';'
^ kw["potential"] >> identifier >> ';'
;
BOOST_SPIRIT_DEBUG_NODES((discipline)(properties))
}
private:
qi::rule<It, ast::discipline(), skipParser> discipline;
qi::rule<It, ast::discipline::properties_t(), skipParser> properties;
using tokens::domain;
using tokens::identifier;
};
struct moduleParser : tokens, qi::grammar<It, ast::module(), skipParser> {
moduleParser() : moduleParser::base_type(module) {
auto kw = qr::distinct(qi::copy(qi::char_("a-zA-Z0-9_")));
m_sPort = identifier;
m_sPortList = m_sPort % ',';
m_sModulePortList = '(' >> m_sPortList >> ')';
m_sModule = kw["module"];
m_sType = kw["real"] | kw["integer"] | kw["string"];
m_sParameter = kw["parameter"] >> m_sType >> identifier;
m_sModuleItem = m_sParameter;
m_sModuleItemList = *m_sModuleItem;
module =
(m_sModule >> identifier >> m_sModulePortList >> m_sModuleItemList >> kw["endmodule"]);
}
private:
qi::rule<It, ast::module(), skipParser> module;
qi::rule<It, skipParser> m_sModulePortList;
qi::rule<It, skipParser> m_sPortList;
qi::rule<It, skipParser> m_sPort;
qi::rule<It, skipParser> m_sModule;
qi::rule<It, skipParser> m_sModuleItemList;
qi::rule<It, skipParser> m_sParameter;
qi::rule<It, skipParser> m_sModuleItem;
qi::rule<It, skipParser> m_sType;
using tokens::identifier;
};
struct fileParser : qi::grammar<It, ast::file()> {
fileParser() : fileParser::base_type(file) {
file = qi::skip(qi::copy(m_sSkip)) [
*(m_sDiscipline | m_sNature | m_sModule)
];
BOOST_SPIRIT_DEBUG_NODES((file))
}
private:
disciplineParser m_sDiscipline;
natureParser m_sNature;
moduleParser m_sModule;
skipParser m_sSkip;
qi::rule<It, ast::file()> file;
};
};
}
extern std::string const sInput;
// just for demo
#include <boost/optional/optional_io.hpp>
namespace frank { namespace ast {
//static inline std::ostream &operator<<(std::ostream &os, const nature::empty &) { return os; }
static inline std::ostream &operator<<(std::ostream &os, nature::Attribute a) {
switch(a) {
case nature::Attribute::units: return os << "units";
case nature::Attribute::access: return os << "access";
case nature::Attribute::idt: return os << "idt";
case nature::Attribute::ddt: return os << "ddt";
case nature::Attribute::abstol: return os << "abstol";
};
return os << "?";
}
static inline std::ostream &operator<<(std::ostream &os, discipline::enumDomain d) {
switch(d) {
case discipline::eDiscrete: return os << "discrete";
case discipline::eContinuous: return os << "continuous";
case discipline::eUnspecified: return os << "(unspecified)";
};
return os << "?";
}
} }
struct print_em {
using result_type = void;
template <typename V>
void operator()(V const& variant) const {
boost::apply_visitor(*this, variant);
}
void operator()(frank::ast::nature const& nature) const {
std::cout << "-- Nature\n";
std::cout << "name: " << nature.name << "\n";
std::cout << "inherits: " << nature.inherits << "\n";
for (auto& a : nature.attributes) {
std::cout << "attribute: " << a.first << " = " << a.second << "\n";
}
}
void operator()(frank::ast::discipline const& discipline) const {
std::cout << "-- Discipline\n";
std::cout << "name: " << discipline.name << "\n";
std::cout << "domain: " << discipline.properties.domain << "\n";
std::cout << "flow: " << discipline.properties.flow << "\n";
std::cout << "potential: " << discipline.properties.potential << "\n";
}
void operator()(frank::ast::module const&) const {
std::cout << "-- Module (TODO)\n";
}
};
int main() {
using iterator_type = std::string::const_iterator;
iterator_type iter = sInput.begin(), last = sInput.end();
frank::Parsers<iterator_type>::fileParser parser;
print_em print;
frank::ast::file file;
bool ok = parse(iter, last, parser, file);
if (ok) {
for (auto& symbol : file)
print(symbol);
}
else {
std::cout << "Parse failed\n";
}
if (iter != last) {
std::cout << "Remaining unparsed: '" << std::string(iter,last) << "'\n";
}
}
std::string const sInput = R"(
nature Current;
units = "A";
access = I;
idt_nature = Charge;
abstol = 1e-12;
endnature
// Charge in coulombs
nature Charge;
units = "coul";
access = Q;
ddt_nature = Current;
abstol = 1e-14;
endnature
// Potential in volts
nature Voltage;
units = "V";
access = V;
idt_nature = Flux;
abstol = 1e-6;
endnature
discipline electrical;
potential Voltage;
flow Current;
enddiscipline
)";
¹ incidentally, the other answer there demonstrates the "impedance mismatch" with polymorphic attributes and Spirit - this time on the Karma side of it
² (to prevent subtle bugs that depend on evaluation order or things like that, e.g.)
³ (gleaning some from here but not importing too much complexity that wasn't reflected in your Lex approach)
⁴ (In fact, this is where you'd need state-switching inside the grammar, an area notoriously underdeveloped and practically unusable in Spirit Lex: e.g. when it works how to avoid defining token which matchs everything in boost::spirit::lex or when it goes badly: Boost.Spirit SQL grammar/lexer failure)
One solution would be to use a std::string everywhere and define a boost::variant with everything needed but not use it anywhere in the parser or lexer directly but only serialize & deserialize it into/from the string.
Is this what the originators of boost::spirit intended?
I have this example code, which parses the string str correctly.
How to I make it work if there any extra characters before and/or after the string? For example if I did str = std::string("AAA") + str + std::string("AAA")
frame.h
#define BOOST_SPIRIT_USE_PHOENIX_V3
//#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/adapted/std_pair.hpp>
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;
namespace ascii = boost::spirit::ascii;
struct frame
{
std::string addr;
std::string func;
std::string file;
std::string fullname;
std::string line;
std::map<std::string, std::string> kv;
};
template <typename Iterator>
struct argsArray : qi::grammar<Iterator, std::map<std::string, std::string>()>
{
argsArray() : argsArray::base_type(query)
{
query =
qi::lit("args=[") >> pair >> *(qi::lit(',') >> pair) >> qi::lit(']');
pair = qi::lit("{name=") >> quoted_string >> qi::lit(",value=") >>
quoted_string >> qi::lit("}");
key = qi::char_("a-zA-Z_") >> *qi::char_("a-zA-Z_0-9");
quoted_string %= boost::spirit::lexeme['"' >> +(qi::char_ - '"') >> '"'];
}
qi::rule<Iterator, std::map<std::string, std::string>()> query;
qi::rule<Iterator, std::pair<std::string, std::string>()> pair;
qi::rule<Iterator, std::string()> key;
qi::rule<Iterator, std::string()> quoted_string;
};
template <typename Iterator>
struct frameParser : qi::grammar<Iterator, frame(), ascii::space_type>
{
frameParser() : frameParser::base_type(frame_rule)
{
static const auto _addr = phx::bind(&frame::addr, qi::_r1);
static const auto _func = phx::bind(&frame::func, qi::_r1);
static const auto _file = phx::bind(&frame::file, qi::_r1);
static const auto _fullname = phx::bind(&frame::fullname, qi::_r1);
static const auto _line = phx::bind(&frame::line, qi::_r1);
static const auto _kv = phx::bind(&frame::kv, qi::_r1);
func = qi::lit("func=") >> quoted_string;
addr = qi::lit("addr=") >> quoted_string;
file = qi::lit("file=") >> quoted_string;
fullname = qi::lit("fullname=") >> quoted_string;
line = qi::lit("line=") >> quoted_string;
func_rule = func[_func = qi::_1];
addr_rule = addr[_addr = qi::_1];
file_rule = file[_file = qi::_1];
fullname_rule = fullname[_fullname = qi::_1];
line_rule = line[_line = qi::_1];
kv_rule = arrTest[_kv = qi::_1];
quoted_string %= boost::spirit::lexeme['"' >> +(qi::char_ - '"') >> '"'];
frame_rule = qi::lit("frame={") >>
(addr_rule(qi::_val) ^ qi::lit(',') ^ func_rule(qi::_val) ^
qi::lit(',') ^ file_rule(qi::_val) ^ qi::lit(',') ^
fullname_rule(qi::_val) ^ qi::lit(',') ^ line_rule(qi::_val) ^
qi::lit(',') ^ kv_rule(qi::_val)) >>
qi::lit('}');
BOOST_SPIRIT_DEBUG_NODES(
(frame_rule)(func_rule)(addr_rule)(fullname_rule)(line_rule))
}
qi::rule<Iterator, void(frame&), ascii::space_type> func_rule, addr_rule,
file_rule, fullname_rule, line_rule, kv_rule;
qi::rule<Iterator, frame(), ascii::space_type> frame_rule;
qi::rule<Iterator, std::string()> addr, func, file, fullname, line;
qi::rule<Iterator, std::string()> quoted_string;
argsArray<Iterator> arrTest;
};
test.cc
#include <iostream>
#include "gtest/gtest.h"
#include "parser/frame.h"
TEST(ParseFrameString, Test1)
{
std::string str = R"(frame={addr="0x0000000000414008",)"
R"(func="main",)"
R"(args=[{name="argc",value="1"},)"
R"({name="argv",value="0x7fffffffe1a8"}],)"
R"(file="/home/stiopa/development/gdbFront/main.cc",)"
R"(fullname="/home/stiopa/development/gdbFront/main.cc",)"
R"(line="90"}")";
typedef std::string::const_iterator It;
const frameParser<It> g;
It iter(str.begin()), end(str.end());
frame frame;
bool r = phrase_parse(iter, end, g, boost::spirit::ascii::space, frame);
EXPECT_EQ(r, true);
EXPECT_EQ(frame.addr, "0x0000000000414008");
EXPECT_EQ(frame.func, "main");
std::map<std::string, std::string> kv{{"argc", "1"},
{"argv", "0x7fffffffe1a8"}};
EXPECT_EQ(frame.kv, kv);
EXPECT_EQ(frame.file, "/home/stiopa/development/gdbFront/main.cc");
EXPECT_EQ(frame.fullname, "/home/stiopa/development/gdbFront/main.cc");
EXPECT_EQ(frame.line, "90");
}
The simple, low-tech solution would be to use qi::seek from the repository:
#include <boost/spirit/repository/include/qi_seek.hpp>
namespace qir = boost::spirit::repository::qi;
And then:
bool r = phrase_parse(iter, end, qir::seek[g], boost::spirit::ascii::space, frame);
DEMO
Live On Coliru
#define BOOST_SPIRIT_USE_PHOENIX_V3
//#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/adapted/std_pair.hpp>
#include <boost/spirit/repository/include/qi_seek.hpp>
namespace qi = boost::spirit::qi;
namespace qir = boost::spirit::repository::qi;
namespace phx = boost::phoenix;
namespace ascii = boost::spirit::ascii;
struct frame
{
std::string addr;
std::string func;
std::string file;
std::string fullname;
std::string line;
std::map<std::string, std::string> kv;
};
template <typename Iterator>
struct argsArray : qi::grammar<Iterator, std::map<std::string, std::string>()>
{
argsArray() : argsArray::base_type(query)
{
query =
qi::lit("args=[") >> pair >> *(qi::lit(',') >> pair) >> qi::lit(']');
pair = qi::lit("{name=") >> quoted_string >> qi::lit(",value=") >>
quoted_string >> qi::lit("}");
key = qi::char_("a-zA-Z_") >> *qi::char_("a-zA-Z_0-9");
quoted_string %= boost::spirit::lexeme['"' >> +(qi::char_ - '"') >> '"'];
}
qi::rule<Iterator, std::map<std::string, std::string>()> query;
qi::rule<Iterator, std::pair<std::string, std::string>()> pair;
qi::rule<Iterator, std::string()> key;
qi::rule<Iterator, std::string()> quoted_string;
};
template <typename Iterator>
struct frameParser : qi::grammar<Iterator, frame(), ascii::space_type>
{
frameParser() : frameParser::base_type(frame_rule)
{
static const auto _addr = phx::bind(&frame::addr, qi::_r1);
static const auto _func = phx::bind(&frame::func, qi::_r1);
static const auto _file = phx::bind(&frame::file, qi::_r1);
static const auto _fullname = phx::bind(&frame::fullname, qi::_r1);
static const auto _line = phx::bind(&frame::line, qi::_r1);
static const auto _kv = phx::bind(&frame::kv, qi::_r1);
func = qi::lit("func=") >> quoted_string;
addr = qi::lit("addr=") >> quoted_string;
file = qi::lit("file=") >> quoted_string;
fullname = qi::lit("fullname=") >> quoted_string;
line = qi::lit("line=") >> quoted_string;
func_rule = func[_func = qi::_1];
addr_rule = addr[_addr = qi::_1];
file_rule = file[_file = qi::_1];
fullname_rule = fullname[_fullname = qi::_1];
line_rule = line[_line = qi::_1];
kv_rule = arrTest[_kv = qi::_1];
quoted_string %= boost::spirit::lexeme['"' >> +(qi::char_ - '"') >> '"'];
frame_rule = qi::lit("frame={") >>
(addr_rule(qi::_val) ^ qi::lit(',') ^ func_rule(qi::_val) ^
qi::lit(',') ^ file_rule(qi::_val) ^ qi::lit(',') ^
fullname_rule(qi::_val) ^ qi::lit(',') ^ line_rule(qi::_val) ^
qi::lit(',') ^ kv_rule(qi::_val)) >>
qi::lit('}');
BOOST_SPIRIT_DEBUG_NODES(
(frame_rule)(func_rule)(addr_rule)(fullname_rule)(line_rule))
}
qi::rule<Iterator, void(frame&), ascii::space_type> func_rule, addr_rule,
file_rule, fullname_rule, line_rule, kv_rule;
qi::rule<Iterator, frame(), ascii::space_type> frame_rule;
qi::rule<Iterator, std::string()> addr, func, file, fullname, line;
qi::rule<Iterator, std::string()> quoted_string;
argsArray<Iterator> arrTest;
};
#include <iostream>
//#include "parser/frame.h"
int main()
{
std::string str = R"(frame={addr="0x0000000000414008",)"
R"(func="main",)"
R"(args=[{name="argc",value="1"},)"
R"({name="argv",value="0x7fffffffe1a8"}],)"
R"(file="/home/stiopa/development/gdbFront/main.cc",)"
R"(fullname="/home/stiopa/development/gdbFront/main.cc",)"
R"(line="90"}")";
str = "AAA" + str + "AAA";
typedef std::string::const_iterator It;
const frameParser<It> g;
It iter(str.begin()), end(str.end());
frame frame;
bool r = phrase_parse(iter, end, qir::seek[g], boost::spirit::ascii::space, frame);
assert(r == true);
assert(frame.addr == "0x0000000000414008");
assert(frame.func == "main");
std::map<std::string, std::string> kv{{"argc", "1"},
{"argv", "0x7fffffffe1a8"}};
assert(frame.kv == kv);
assert(frame.file == "/home/stiopa/development/gdbFront/main.cc");
assert(frame.fullname == "/home/stiopa/development/gdbFront/main.cc");
assert(frame.line == "90");
}
Tests still pass.
And here's a free code review. Please see
Boost Spirit: "Semantic actions are evil"?
Boost spirit skipper issues
you didn't parse the delimiting ',' correctly at all. You must require it, unless end-of-frame ('}'). You cannot accept multiple in a row
Live On Coliru
#define BOOST_SPIRIT_USE_PHOENIX_V3
//#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/adapted/std_pair.hpp>
#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/repository/include/qi_seek.hpp>
namespace qi = boost::spirit::qi;
namespace qir = boost::spirit::repository::qi;
namespace phx = boost::phoenix;
namespace ascii = boost::spirit::ascii;
struct frame {
std::string addr;
std::string func;
std::string file;
std::string fullname;
std::string line;
std::map<std::string, std::string> kv;
};
BOOST_FUSION_ADAPT_STRUCT(frame, addr, func, file, fullname, line, kv)
template <typename Iterator>
struct argsArray : qi::grammar<Iterator, std::map<std::string, std::string>()>
{
argsArray() : argsArray::base_type(query)
{
query = "args=[" >> pair >> *(',' >> pair) >> ']';
pair = "{name=" >> quoted_string >> ",value=" >> quoted_string >> "}";
key = qi::char_("a-zA-Z_") >> *qi::char_("a-zA-Z_0-9");
quoted_string = '"' >> +(qi::char_ - '"') >> '"';
}
private:
qi::rule<Iterator, std::map<std::string, std::string>()> query;
qi::rule<Iterator, std::pair<std::string, std::string>()> pair;
qi::rule<Iterator, std::string()> key;
qi::rule<Iterator, std::string()> quoted_string;
};
template <typename Iterator>
struct frameParser : qi::grammar<Iterator, frame(), ascii::space_type>
{
frameParser() : frameParser::base_type(frame_rule)
{
quoted_string = '"' >> +(qi::char_ - '"') >> '"';
delim = (&qi::lit('}')) | ',';
field_rule = qi::lexeme [ qi::lit(qi::_r1) >> '=' ] >> quoted_string >> delim;
kv_rule = arrTest >> delim;
frame_rule = "frame={" >>
(field_rule(+"addr") ^
field_rule(+"func") ^
field_rule(+"file") ^
field_rule(+"fullname") ^
field_rule(+"line") ^
kv_rule
) >> '}';
BOOST_SPIRIT_DEBUG_NODES((frame_rule)(field_rule))
}
private:
qi::rule<Iterator> delim;
qi::rule<Iterator, std::string(char const*), ascii::space_type> field_rule;
qi::rule<Iterator, std::map<std::string, std::string>()> kv_rule;
qi::rule<Iterator, frame(), ascii::space_type> frame_rule;
qi::rule<Iterator, std::string()> quoted_string;
argsArray<Iterator> arrTest;
};
#include <iostream>
//#include "parser/frame.h"
int main()
{
std::string str = R"(frame={addr="0x0000000000414008",)"
R"(func="main",)"
R"(args=[{name="argc",value="1"},)"
R"({name="argv",value="0x7fffffffe1a8"}],)"
R"(file="/home/stiopa/development/gdbFront/main.cc",)"
R"(fullname="/home/stiopa/development/gdbFront/main.cc",)"
R"(line="90"}")";
str = "AAA" + str + "AAA";
typedef std::string::const_iterator It;
const frameParser<It> g;
It iter(str.begin()), end(str.end());
frame frame;
bool r = phrase_parse(iter, end, qir::seek[g], boost::spirit::ascii::space, frame);
if (iter != end)
std::cout << "Remaining unparsed: '" << std::string(iter,end) << "'\n";
assert(r == true);
assert(frame.addr == "0x0000000000414008");
assert(frame.func == "main");
std::map<std::string, std::string> kv{{"argc", "1"},
{"argv", "0x7fffffffe1a8"}};
assert(frame.kv == kv);
assert(frame.file == "/home/stiopa/development/gdbFront/main.cc");
assert(frame.fullname == "/home/stiopa/development/gdbFront/main.cc");
assert(frame.line == "90");
}
Prints:
Remaining unparsed: '"AAA'
Tests still pass.
Note your original sample input had a trailing ", which you simply ignored.
I managed to parse a pgn file thanks to the Boost Spirit library, but it fails as soon as there is some characters I did not "anticipated".
Here is my Spirit grammar :
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
BOOST_FUSION_ADAPT_STRUCT(
loloof64::pgn_tag,
(std::string, key),
(std::string, value)
)
BOOST_FUSION_ADAPT_STRUCT(
loloof64::game_move,
(unsigned, move_number),
(std::string, move_turn),
(std::string, white_move),
(std::string, black_move),
(std::string, result)
)
BOOST_FUSION_ADAPT_STRUCT(
loloof64::pgn_game,
(std::vector<loloof64::pgn_tag>, header),
(std::vector<loloof64::game_move>, moves)
)
namespace loloof64 {
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace phoenix = boost::phoenix;
template <typename Iterator>
struct pgn_parser : qi::grammar<Iterator, std::vector<pgn_game>, qi::unused_type>
{
pgn_parser() : pgn_parser::base_type(games)
{
using qi::lexeme;
using ascii::char_;
using qi::uint_;
using qi::alnum;
using qi::space;
using qi::omit;
using qi::eol;
using qi::lit;
quoted_string %= lexeme[lit('"') >> *(char_ - '"') >> lit('"')];
tag %=
'['
>> +alnum
>> omit[+space]
>> quoted_string
>> ']'
>> omit[+eol]
;
header %= +tag;
move_turn %= qi::string("...") | qi::string(".");
regular_move %=
+char_("a-hNBRQK")
>> +char_("a-h1-8x=NBRQK")
>> -qi::string("e.p.")
;
castle_move %= qi::string("O-O-O") | qi::string("O-O");
single_move %=
(regular_move | castle_move) >> -(char_('+') | char_('#'))
;
result %= qi::string("1-0") | qi::string("0-1") | qi::string("1/2-1/2") | qi::string("*");
full_move %=
uint_
>> move_turn
>> omit[*space]
>> single_move
>> -(omit[+space] >> single_move)
>> -(omit[+space] >> result)
;
game_description %= full_move
>> *(omit[*space] >> full_move);
single_game %=
-header
>> game_description
;
games %=
single_game
>> *(omit[*(space|eol)] >> single_game)
;
}
qi::rule<Iterator, pgn_tag(), qi::unused_type> tag;
qi::rule<Iterator, std::vector<pgn_tag>, qi::unused_type> header;
qi::rule<Iterator, std::string(), qi::unused_type> quoted_string;
qi::rule<Iterator, std::string(), qi::unused_type> result;
qi::rule<Iterator, std::string(), qi::unused_type> regular_move;
qi::rule<Iterator, std::string(), qi::unused_type> castle_move;
qi::rule<Iterator, std::string(), qi::unused_type> single_move;
qi::rule<Iterator, std::string(), qi::unused_type> move_turn;
qi::rule<Iterator, game_move(), qi::unused_type> full_move;
qi::rule<Iterator, std::vector<game_move>, qi::unused_type> game_description;
qi::rule<Iterator, pgn_game, qi::unused_type> single_game;
qi::rule<Iterator, std::vector<pgn_game>, qi::unused_type> games;
};
}
How could I simply consume any character I could not "anticipate" ? I mean, how could I ignore any character that I don't want in none of my grammar rule ?
As for testing purposes :
here my parser header (pgn_games_extractor.hpp)
#ifndef PGNGAMESEXTRACTOR_HPP
#define PGNGAMESEXTRACTOR_HPP
#include <string>
#include <vector>
#include <fstream>
#include <stdexcept>
namespace loloof64 {
struct pgn_tag {
std::string key;
std::string value;
};
struct game_move {
unsigned move_number;
std::string move_turn;
std::string white_move;
std::string black_move;
std::string result;
};
struct pgn_game {
std::vector<pgn_tag> header;
std::vector<game_move> moves;
};
class PgnGamesExtractor
{
public:
PgnGamesExtractor(std::string inputFilePath);
PgnGamesExtractor(std::ifstream &inputFile);
/*
Both constructos may throw PgnParsingException (if bad pgn format) and InputFileException (if missing file)
*/
std::vector<pgn_game> getGames() const { return games; }
virtual ~PgnGamesExtractor();
protected:
private:
std::vector<pgn_game> games;
void parseInput(std::ifstream &inputFile);
};
class PgnParsingException : public std::runtime_error
{
public:
PgnParsingException(std::string message): std::runtime_error(message){}
};
class InputFileException : public std::runtime_error
{
public:
InputFileException(std::string message) : std::runtime_error(message){}
};
}
#endif // PGNGAMESEXTRACTOR_HPP
Here is my parser source (pgn_games_extractor.cpp) :
#include "pgn_games_extractor.hpp"
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
BOOST_FUSION_ADAPT_STRUCT(
loloof64::pgn_tag,
(std::string, key),
(std::string, value)
)
BOOST_FUSION_ADAPT_STRUCT(
loloof64::game_move,
(unsigned, move_number),
(std::string, move_turn),
(std::string, white_move),
(std::string, black_move),
(std::string, result)
)
BOOST_FUSION_ADAPT_STRUCT(
loloof64::pgn_game,
(std::vector<loloof64::pgn_tag>, header),
(std::vector<loloof64::game_move>, moves)
)
namespace loloof64 {
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace phoenix = boost::phoenix;
template <typename Iterator>
struct pgn_parser : qi::grammar<Iterator, std::vector<pgn_game>, qi::unused_type>
{
pgn_parser() : pgn_parser::base_type(games)
{
using qi::lexeme;
using ascii::char_;
using qi::uint_;
using qi::alnum;
using qi::space;
using qi::omit;
using qi::eol;
using qi::lit;
quoted_string %= lexeme[lit('"') >> *(char_ - '"') >> lit('"')];
tag %=
'['
>> +alnum
>> omit[+space]
>> quoted_string
>> ']'
>> omit[+eol]
;
header %= +tag;
move_turn %= qi::string("...") | qi::string(".");
regular_move %=
+char_("a-hNBRQK")
>> +char_("a-h1-8x=NBRQK")
>> -qi::string("e.p.")
;
castle_move %= qi::string("O-O-O") | qi::string("O-O");
single_move %=
(regular_move | castle_move) >> -(char_('+') | char_('#'))
;
result %= qi::string("1-0") | qi::string("0-1") | qi::string("1/2-1/2") | qi::string("*");
full_move %=
uint_
>> move_turn
>> omit[*space]
>> single_move
>> -(omit[+space] >> single_move)
>> -(omit[+space] >> result)
;
game_description %= full_move
>> *(omit[*space] >> full_move);
single_game %=
-header
>> game_description
;
games %=
single_game
>> *(omit[*(space|eol)] >> single_game)
;
}
qi::rule<Iterator, pgn_tag(), qi::unused_type> tag;
qi::rule<Iterator, std::vector<pgn_tag>, qi::unused_type> header;
qi::rule<Iterator, std::string(), qi::unused_type> quoted_string;
qi::rule<Iterator, std::string(), qi::unused_type> result;
qi::rule<Iterator, std::string(), qi::unused_type> regular_move;
qi::rule<Iterator, std::string(), qi::unused_type> castle_move;
qi::rule<Iterator, std::string(), qi::unused_type> single_move;
qi::rule<Iterator, std::string(), qi::unused_type> move_turn;
qi::rule<Iterator, game_move(), qi::unused_type> full_move;
qi::rule<Iterator, std::vector<game_move>, qi::unused_type> game_description;
qi::rule<Iterator, pgn_game, qi::unused_type> single_game;
qi::rule<Iterator, std::vector<pgn_game>, qi::unused_type> games;
};
}
loloof64::PgnGamesExtractor::PgnGamesExtractor(std::string inputFilePath)
{
std::ifstream inputFile(inputFilePath);
parseInput(inputFile);
}
loloof64::PgnGamesExtractor::PgnGamesExtractor(std::ifstream &inputFile)
{
parseInput(inputFile);
}
loloof64::PgnGamesExtractor::~PgnGamesExtractor()
{
//dtor
}
void loloof64::PgnGamesExtractor::parseInput(std::ifstream &inputFile)
{
using namespace std;
if (! inputFile) throw InputFileException("File does not exist !");
string content("");
getline(inputFile, content, (char) inputFile.eof());
if (inputFile.fail() || inputFile.bad()) throw new InputFileException("Could not read the input file !");
loloof64::pgn_parser<string::const_iterator> parser;
std::vector<loloof64::pgn_game> temp_games;
string::const_iterator iter = content.begin();
string::const_iterator end = content.end();
bool success = boost::spirit::qi::phrase_parse(iter, end, parser, boost::spirit::qi::eol, temp_games);
if (success && iter == end)
{
games = temp_games;
}
else
{
string error_fragment(iter, end);
string error_message("");
error_message = "Failed to parse the input at :'" + error_fragment + "' !";
throw PgnParsingException(error_message);
}
}
I am asking this question because I could not parse the following pgn : ScotchGambitPgn.zip. I think it is because of an encoding issue with this file.
I am using Spirit 2 and C++ 11 (Gnu)
As requested the simple X3 translation.
fewer lines of code (10 lines)
compilation time down from 7.4s to 3.6s (clang)
compilation time down from 11.4s to 6.0s (gcc5)
runtime down from 0.80s to 0.55s (clang and gcc)
The outputs are identical (exactly).
Live On Coliru
//#define BOOST_SPIRIT_DEBUG
#ifndef PGNGAMESEXTRACTOR_HPP
#define PGNGAMESEXTRACTOR_HPP
#include <string>
#include <vector>
#include <fstream>
#include <stdexcept>
namespace loloof64 {
struct pgn_tag {
std::string key;
std::string value;
};
struct game_move {
unsigned move_number;
std::string white_move;
std::string black_move;
enum result_t { white_won, black_won, draw, undecided } result;
};
struct pgn_game {
std::vector<pgn_tag> header;
std::vector<game_move> moves;
};
class PgnGamesExtractor {
public:
PgnGamesExtractor(std::string inputFilePath);
PgnGamesExtractor(std::istream &inputFile);
/*
Both constructos may throw PgnParsingException (if bad pgn format) and InputFileException (if missing file)
*/
std::vector<pgn_game> getGames() const { return games; }
virtual ~PgnGamesExtractor();
protected:
private:
std::vector<pgn_game> games;
void parseInput(std::istream &inputFile);
};
class PgnParsingException : public virtual std::runtime_error {
public:
PgnParsingException(std::string message) : std::runtime_error(message) {}
};
class InputFileException : public virtual std::runtime_error {
public:
InputFileException(std::string message) : std::runtime_error(message) {}
};
}
#endif // PGNGAMESEXTRACTOR_HPP
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/include/support_istream_iterator.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
BOOST_FUSION_ADAPT_STRUCT(loloof64::pgn_tag, key, value)
BOOST_FUSION_ADAPT_STRUCT(loloof64::game_move, move_number, white_move, black_move, result)
BOOST_FUSION_ADAPT_STRUCT(loloof64::pgn_game, header, moves)
namespace loloof64 {
namespace pgn_parser {
using namespace boost::spirit::x3;
static std::string const no_move;
static auto const result = []{
symbols<game_move::result_t> table;
table.add
("1-0", game_move::white_won)
("0-1", game_move::black_won)
("1/2-1/2", game_move::draw)
("*", game_move::undecided);
return table;
}();
static auto const quoted_string = lexeme['"' >> *~char_('"') >> '"'];
static auto const tag = '[' >> +alnum >> quoted_string >> ']';
static auto const header = +tag;
static auto const regular_move = as_parser("O-O-O") | "O-O" | (+char_("a-hNBRQK") >> +char_("a-h1-8x=NBRQK") >> -lit("e.p."));
static auto const single_move = rule<struct single_move_, std::string> { "single_move" }
= raw [ lexeme [ regular_move >> -char_("+#")] ];
static auto const full_move = rule<struct full_move_, game_move> { "full_move" }
= uint_
>> (lexeme["..." >> attr(no_move)] | "." >> single_move)
>> (single_move | attr(no_move))
>> -result;
static auto const game_description = +full_move;
static auto const single_game = rule<struct single_game_, pgn_game> { "single_game" }
= -header >> game_description;
static auto const games = *single_game;
}
}
loloof64::PgnGamesExtractor::PgnGamesExtractor(std::string inputFilePath) {
std::ifstream inputFile(inputFilePath);
parseInput(inputFile);
}
loloof64::PgnGamesExtractor::PgnGamesExtractor(std::istream &inputFile) { parseInput(inputFile); }
loloof64::PgnGamesExtractor::~PgnGamesExtractor() {
// dtor
}
void loloof64::PgnGamesExtractor::parseInput(std::istream &inputFile) {
if (inputFile.fail() || inputFile.bad())
throw new InputFileException("Could not read the input file !");
typedef boost::spirit::istream_iterator It;
std::vector<loloof64::pgn_game> temp_games;
It iter(inputFile >> std::noskipws), end;
bool success = boost::spirit::x3::phrase_parse(iter, end, pgn_parser::games, boost::spirit::x3::space, temp_games);
if (success && iter == end) {
games.swap(temp_games);
} else {
std::string error_fragment(iter, end);
throw PgnParsingException("Failed to parse the input at :'" + error_fragment + "' !");
}
}
#include <iostream>
int main() {
loloof64::PgnGamesExtractor pge("ScotchGambit.pgn");
std::cout << "Parsed " << pge.getGames().size() << " games\n";
for (auto& g : pge.getGames())
for (auto& m : g.moves)
std::cout << m.move_number << ".\t" << m.white_move << "\t" << m.black_move << "\n";
}
For what it's worth, here's significantly simplified:
Live On Coliru
//#define BOOST_SPIRIT_DEBUG
#ifndef PGNGAMESEXTRACTOR_HPP
#define PGNGAMESEXTRACTOR_HPP
#include <string>
#include <vector>
#include <fstream>
#include <stdexcept>
namespace loloof64 {
struct pgn_tag {
std::string key;
std::string value;
};
struct game_move {
unsigned move_number;
std::string white_move;
std::string black_move;
enum result_t { white_won, black_won, draw, undecided } result;
};
struct pgn_game {
std::vector<pgn_tag> header;
std::vector<game_move> moves;
};
class PgnGamesExtractor {
public:
PgnGamesExtractor(std::string inputFilePath);
PgnGamesExtractor(std::istream &inputFile);
/*
Both constructos may throw PgnParsingException (if bad pgn format) and InputFileException (if missing file)
*/
std::vector<pgn_game> getGames() const { return games; }
virtual ~PgnGamesExtractor();
protected:
private:
std::vector<pgn_game> games;
void parseInput(std::istream &inputFile);
};
class PgnParsingException : public virtual std::runtime_error {
public:
PgnParsingException(std::string message) : std::runtime_error(message) {}
};
class InputFileException : public virtual std::runtime_error {
public:
InputFileException(std::string message) : std::runtime_error(message) {}
};
}
#endif // PGNGAMESEXTRACTOR_HPP
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
BOOST_FUSION_ADAPT_STRUCT(loloof64::pgn_tag, key, value)
BOOST_FUSION_ADAPT_STRUCT(loloof64::game_move, move_number, white_move, black_move, result)
BOOST_FUSION_ADAPT_STRUCT(loloof64::pgn_game, header, moves)
namespace loloof64 {
namespace qi = boost::spirit::qi;
template <typename Iterator> struct pgn_parser : qi::grammar<Iterator, std::vector<pgn_game>, qi::space_type> {
pgn_parser() : pgn_parser::base_type(games) {
using namespace qi;
const std::string no_move;
result.add
("1-0", game_move::white_won)
("0-1", game_move::black_won)
("1/2-1/2", game_move::draw)
("*", game_move::undecided);
quoted_string = '"' >> *~char_('"') >> '"';
tag = '[' >> +alnum >> quoted_string >> ']';
header = +tag;
regular_move = lit("O-O-O") | "O-O" | (+char_("a-hNBRQK") >> +char_("a-h1-8x=NBRQK") >> -lit("e.p."));
single_move = raw [ regular_move >> -char_("+#") ];
full_move = uint_
>> (lexeme["..." >> attr(no_move)] | "." >> single_move)
>> (single_move | attr(no_move))
>> -result;
game_description = +full_move;
single_game = -header >> game_description;
games = *single_game;
BOOST_SPIRIT_DEBUG_NODES(
(tag)(header)(quoted_string)(regular_move)(single_move)
(full_move)(game_description)(single_game)(games)
)
}
private:
qi::rule<Iterator, pgn_tag(), qi::space_type> tag;
qi::rule<Iterator, std::vector<pgn_tag>, qi::space_type> header;
qi::rule<Iterator, game_move(), qi::space_type> full_move;
qi::rule<Iterator, std::vector<game_move>, qi::space_type> game_description;
qi::rule<Iterator, pgn_game, qi::space_type> single_game;
qi::rule<Iterator, std::vector<pgn_game>, qi::space_type> games;
// lexemes
qi::symbols<char, game_move::result_t> result;
qi::rule<Iterator, std::string()> quoted_string;
qi::rule<Iterator> regular_move;
qi::rule<Iterator, std::string()> single_move;
};
}
loloof64::PgnGamesExtractor::PgnGamesExtractor(std::string inputFilePath) {
std::ifstream inputFile(inputFilePath);
parseInput(inputFile);
}
loloof64::PgnGamesExtractor::PgnGamesExtractor(std::istream &inputFile) { parseInput(inputFile); }
loloof64::PgnGamesExtractor::~PgnGamesExtractor() {
// dtor
}
void loloof64::PgnGamesExtractor::parseInput(std::istream &inputFile) {
if (inputFile.fail() || inputFile.bad())
throw new InputFileException("Could not read the input file !");
typedef boost::spirit::istream_iterator It;
loloof64::pgn_parser<It> parser;
std::vector<loloof64::pgn_game> temp_games;
It iter(inputFile >> std::noskipws), end;
bool success = boost::spirit::qi::phrase_parse(iter, end, parser, boost::spirit::qi::space, temp_games);
if (success && iter == end) {
games.swap(temp_games);
} else {
std::string error_fragment(iter, end);
throw PgnParsingException("Failed to parse the input at :'" + error_fragment + "' !");
}
}
int main() {
loloof64::PgnGamesExtractor pge(std::cin); // "ScotchGambit.pgn"
std::cout << "Parsed " << pge.getGames().size() << " games\n";
for (auto& g : pge.getGames())
for (auto& m : g.moves)
std::cout << m.move_number << ".\t" << m.white_move << "\t" << m.black_move << "\n";
}
Notes:
don't read full file in memory (boost::spirit::istream_iterator)
don't manually skip (use skippers)
don't explicitly lexeme (Boost spirit skipper issues)
don't use %= if not needed
don't synthesize unneeded attributes (use raw[])
treat optional parts of move as optional, don't store assymetric magic flags like "..." (look for no_move)
don't be overly specific (use istream& instead of ifstream&)
Probably some other things I forgot. Output is e.g.
Parsed 6166 games
1. e4 e5
2. Nf3 Nc6
3. d4 exd4
4. Bc4 Qf6
5. O-O d6
6. Ng5 Nh6
7. f4 Be7
8. e5 Qg6
9. exd6 cxd6
10. c3 dxc3
11. Nxc3 O-O
12. Nd5 Bd7
13. Rf3 Bg4
14. Bd3 Bxf3
15. Qxf3 f5
16. Bc4 Kh8
17. Nxe7 Nxe7
18. Qxb7 Qf6
19. Be3 Rfb8
20. Qd7 Rd8
21. Qb7 d5
22. Bb3 Nc6
23. Bxd5 Nd4
24. Rd1 Ne2+
25. Kf1 Rab8
26. Qxa7 Rxb2
27. Ne6 Qxe6
28. Bxe6 Rxd1+
29. Kf2
1. e4 e5
2. Nf3 Nc6
3. d4 exd4
4. Bc4 Bc5
5. Ng5 Ne5
6. Bxf7+ Nxf7
7. Nxf7 Bb4+
8. c3 dxc3
9. bxc3 Bxc3+
10. Nxc3 Kxf7
11. Qd5+ Kf8
12. Ba3+ d6
13. e5 Qg5
14. exd6 Qxd5
Indeed the problem is with Veronica. Or, actually, it's with Ver?nica. Where ? is the code unit <93> - which, lacking codepage/encoding information could mean anything really.
You're using ascii::char and this requires 7-bit only characters.
Easily fix it by changing
using ascii::char_;
into
using qi::char_;
Developping with Boost Spirit 2, I am trying to follow example in order to get progression (will add semantic actions later) in my pgn parser (see also related previous question). But I can't manage to avoid compilation errors : cpp
#include "pgn_games_extractor.h"
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <tuple>
#include <iostream>
BOOST_FUSION_ADAPT_STRUCT(loloof64::pgn_tag, key, value)
BOOST_FUSION_ADAPT_STRUCT(loloof64::game_move, move_number, white_move, black_move, result)
BOOST_FUSION_ADAPT_STRUCT(loloof64::pgn_game, header, moves)
namespace loloof64 {
namespace qi = boost::spirit::qi;
typedef std::tuple<std::size_t, game_move> move_t;
typedef std::tuple<std::vector<pgn_tag>, std::vector<move_t>> game_t;
typedef std::tuple<std::size_t, std::vector<game_t>> pgn_t;
template <typename Iterator> struct pgn_parser : qi::grammar<Iterator, std::vector<pgn_game>, qi::space_type> {
pgn_parser() : pgn_parser::base_type(games) {
using namespace qi;
CurrentPos<Iterator> filepos;
const std::string no_move;
result.add
("1-0", result_t::white_won)
("0-1", result_t::black_won)
("1/2-1/2", result_t::draw)
("*", result_t::undecided);
quoted_string = '"' >> *~char_('"') >> '"';
tag = '[' >> +alnum >> quoted_string >> ']';
header = +tag;
regular_move = lit("O-O-O") | "O-O" | (+char_("a-hNBRQK") >> +char_("a-h1-8x=NBRQK") >> -lit("e.p."));
single_move = raw [ regular_move >> -char_("+#") ];
full_move = filepos.current_pos >> uint_
>> (lexeme["..." >> attr(no_move)] | "." >> single_move)
>> (single_move | attr(no_move))
>> -result;
game_description = +full_move;
single_game = -header >> game_description;
games = filepos.save_start_pos >> *single_game;
BOOST_SPIRIT_DEBUG_NODES(
(tag)(header)(quoted_string)(regular_move)(single_move)
(full_move)(game_description)(single_game)(games)
)
}
private:
qi::rule<Iterator, pgn_tag(), qi::space_type> tag;
qi::rule<Iterator, std::vector<pgn_tag>, qi::space_type> header;
qi::rule<Iterator, move_t(), qi::space_type> full_move;
qi::rule<Iterator, std::vector<move_t>, qi::space_type> game_description;
qi::rule<Iterator, game_t(), qi::space_type> single_game;
qi::rule<Iterator, pgn_t(), qi::space_type> games;
// lexemes
qi::symbols<char, result_t> result;
qi::rule<Iterator, std::string()> quoted_string;
qi::rule<Iterator> regular_move;
qi::rule<Iterator, std::string()> single_move;
};
}
loloof64::PgnGamesExtractor::PgnGamesExtractor(std::string inputFilePath) {
std::ifstream inputFile(inputFilePath);
parseInput(inputFile);
}
loloof64::PgnGamesExtractor::PgnGamesExtractor(std::istream &inputFile) { parseInput(inputFile); }
loloof64::PgnGamesExtractor::~PgnGamesExtractor() {
// dtor
}
void loloof64::PgnGamesExtractor::parseInput(std::istream &inputFile) {
if (inputFile.fail() || inputFile.bad())
throw new InputFileException("Could not read the input file !");
typedef boost::spirit::istream_iterator It;
loloof64::pgn_parser<It> parser;
std::vector<loloof64::pgn_game> temp_games;
It iter(inputFile >> std::noskipws), end;
//////////////////////////////////
std::cout << "About to parse the file" << std::endl;
//////////////////////////////////
bool success = boost::spirit::qi::phrase_parse(iter, end, parser, boost::spirit::qi::space, temp_games);
//////////////////////////////////
std::cout << "Finished to parse the file" << std::endl;
//////////////////////////////////
if (success && iter == end) {
games.swap(temp_games);
} else {
std::string error_fragment(iter, end);
throw PgnParsingException("Failed to parse the input at :'" + error_fragment + "' !");
}
}
and the header file : header.
#ifndef PGNGAMESEXTRACTOR_HPP
#define PGNGAMESEXTRACTOR_HPP
#include <string>
#include <vector>
#include <fstream>
#include <stdexcept>
#include <boost/fusion/adapted/std_tuple.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/repository/include/qi_iter_pos.hpp>
namespace loloof64 {
namespace phx = boost::phoenix;
namespace qi = boost::spirit::qi;
/*
* This class has been taken from http://marko-editor.com/articles/position_tracking/
*/
template<typename Iterator>
struct CurrentPos {
CurrentPos() {
save_start_pos = qi::omit[boost::spirit::repository::qi::iter_pos[
phx::bind(&CurrentPos::setStartPos, this, qi::_1)]];
current_pos = boost::spirit::repository::qi::iter_pos[
qi::_val = phx::bind(&CurrentPos::getCurrentPos, this, qi::_1)];
}
qi::rule<Iterator> save_start_pos;
qi::rule<Iterator, std::size_t()> current_pos;
private:
void setStartPos(const Iterator &iterator) {
start_pos_ = iterator;
}
std::size_t getCurrentPos(const Iterator &iterator) {
return std::distance(start_pos_, iterator);
}
Iterator start_pos_;
};
enum result_t { white_won, black_won, draw, undecided };
struct pgn_tag {
std::string key;
std::string value;
};
struct game_move {
unsigned move_number;
std::string white_move;
std::string black_move;
result_t result;
};
struct pgn_game {
std::vector<pgn_tag> header;
std::vector<game_move> moves;
};
class PgnGamesExtractor {
public:
PgnGamesExtractor(std::string inputFilePath);
PgnGamesExtractor(std::istream &inputFile);
/*
Both constructos may throw PgnParsingException (if bad pgn format) and InputFileException (if missing file)
*/
std::vector<pgn_game> getGames() const { return games; }
virtual ~PgnGamesExtractor();
protected:
private:
std::vector<pgn_game> games;
void parseInput(std::istream &inputFile);
};
class PgnParsingException : public virtual std::runtime_error {
public:
PgnParsingException(std::string message) : std::runtime_error(message) {}
};
class InputFileException : public virtual std::runtime_error {
public:
InputFileException(std::string message) : std::runtime_error(message) {}
};
}
#endif // PGNGAMESEXTRACTOR_HPP
I did not post the compilation errors as there are too many and as the files can be easily tested.
Of course, it's not gonna work well with a streaming interface. You can retain the start iterator, but
you won't know the stream length ahead of time (unless you get it out-of-band)
calculating the current position (distance from the start iterator) each time is going to be horrendously inefficient.
Since you mentioned in a comment you were parsing files, you should consider using memory mapping (boost::iostream::mapped_file_source or mmap e.g.). That way, the distance calculation is instantaneous, using pointer arithmetic on the random-access iterators.
Here's a working example, with the following changes/notes:
using memory mapped input data3
omit[] in save_start_pos is useless (there is no declared attribute)
getCurrentPos was horrifically inefficient (to the extent that just using omit[current_pos] in the full_move rule slowed the parsing down several orders of magnitude.
This is because boost::spirit::istream_iterator holds on to all previously read state in a deque and traversing them doesn't come for free when doing std::distance
Your CurrentPos<Iterator> filepos; instance goes out of scope after construction! This means that invoking save_start_pos/current_pos is Undefined Behaviour¹. Move it out of the constructor.
A subtler point is to use full_move %= ... when you add the semantic action (see docs and blog)
You changed the types on some of the rules to include position information, alongside the AST types. That's both unnecessary and flawed: the AST types would not be compatible with the tuple<size_t, T> versions of the rules.
Besides, e.g. the games rule didn't even expose a position, because save_start_pos synthesizes unused_type (no attribute).
So, drop the whole tuple business, and just work with the state of the filepos member inside your semantic action:
full_move %=
omit[filepos.current_pos [ reportProgress(_1) ]] >>
uint_
>> (lexeme["..." >> attr(no_move)] | "." >> single_move)
>> (single_move | attr(no_move))
>> -result;
Finally, as a demonstration on how to report strictly increasing progress indications², I included a simple phoenix actor:
struct reportProgress_f {
size_t total_;
mutable double pct = 0.0;
reportProgress_f(size_t total) : total_(total) {}
template<typename T>
void operator()(T pos) const {
double newpct = pos * 100.0 / total_;
if ((newpct - pct) > 10) {
//sleep(1); // because it's way too fast otherwise...
pct = newpct;
std::cerr << "\rProgress " << std::fixed << std::setprecision(1) << pct << std::flush;
};
}
};
phx::function<reportProgress_f> reportProgress;
Note reportProgress needs to be constructed with knowledge about start and end iterators, see the constructor for pgn_parser
¹ in the recorded live stream you can see I spotted the error on the first reading, then forgot about after I made it to compile. The program crashed, dutifully :) Then I remembered.
² even in the face of backtracking
3 (not strictly required, but I guess the goal wasn't to simply make it so slow you actually need the progress indicator?)
Live On Coliru
#ifndef PGNGAMESEXTRACTOR_HPP
#define PGNGAMESEXTRACTOR_HPP
#include <string>
#include <vector>
#include <fstream>
#include <stdexcept>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/repository/include/qi_iter_pos.hpp>
namespace loloof64 {
namespace phx = boost::phoenix;
namespace qi = boost::spirit::qi;
namespace qr = boost::spirit::repository::qi;
/*
* This class has been taken from http://marko-editor.com/articles/position_tracking/
*/
template<typename Iterator>
struct CurrentPos {
CurrentPos() {
save_start_pos = qr::iter_pos [phx::bind(&CurrentPos::setStartPos, this, qi::_1)] >> qi::eps;
current_pos = qr::iter_pos [qi::_val = phx::bind(&CurrentPos::getCurrentPos, this, qi::_1)] >> qi::eps;
}
qi::rule<Iterator> save_start_pos;
qi::rule<Iterator, std::size_t()> current_pos;
private:
void setStartPos(const Iterator &iterator) {
start_pos_ = iterator;
}
std::size_t getCurrentPos(const Iterator &iterator) {
return std::distance(start_pos_, iterator);
}
Iterator start_pos_;
};
enum result_t { white_won, black_won, draw, undecided };
struct pgn_tag {
std::string key;
std::string value;
};
struct game_move {
unsigned move_number;
std::string white_move;
std::string black_move;
result_t result;
};
struct pgn_game {
std::vector<pgn_tag> header;
std::vector<game_move> moves;
};
class PgnGamesExtractor {
public:
PgnGamesExtractor(std::string const& inputFilePath);
/*
Both constructos may throw PgnParsingException (if bad pgn format) and InputFileException (if missing file)
*/
std::vector<pgn_game> getGames() const { return games; }
virtual ~PgnGamesExtractor();
protected:
private:
std::vector<pgn_game> games;
void parseInput(std::string const&);
};
class PgnParsingException : public virtual std::runtime_error {
public:
PgnParsingException(std::string message) : std::runtime_error(message) {}
};
class InputFileException : public virtual std::runtime_error {
public:
InputFileException(std::string message) : std::runtime_error(message) {}
};
}
#endif // PGNGAMESEXTRACTOR_HPP
//#include "pgn_games_extractor.h"
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <iostream>
#include <iomanip>
BOOST_FUSION_ADAPT_STRUCT(loloof64::pgn_tag, key, value)
BOOST_FUSION_ADAPT_STRUCT(loloof64::game_move, move_number, white_move, black_move, result)
BOOST_FUSION_ADAPT_STRUCT(loloof64::pgn_game, header, moves)
namespace loloof64 {
namespace qi = boost::spirit::qi;
template <typename Iterator> struct pgn_parser : qi::grammar<Iterator, std::vector<pgn_game>(), qi::space_type> {
pgn_parser(Iterator start, Iterator end)
: pgn_parser::base_type(games),
reportProgress(std::distance(start, end))
{
using namespace qi;
const std::string no_move;
result.add
("1-0", result_t::white_won)
("0-1", result_t::black_won)
("1/2-1/2", result_t::draw)
("*", result_t::undecided);
quoted_string = '"' >> *~char_('"') >> '"';
tag = '[' >> +alnum >> quoted_string >> ']';
header = +tag;
regular_move = lit("O-O-O") | "O-O" | (+char_("a-hNBRQK") >> +char_("a-h1-8x=NBRQK") >> -lit("e.p."));
single_move = raw [ regular_move >> -char_("+#") ];
full_move %=
omit[filepos.current_pos [ reportProgress(_1) ]] >>
uint_
>> (lexeme["..." >> attr(no_move)] | "." >> single_move)
>> (single_move | attr(no_move))
>> -result;
game_description = +full_move;
single_game = -header >> game_description;
games = filepos.save_start_pos >> *single_game;
BOOST_SPIRIT_DEBUG_NODES(
(tag)(header)(quoted_string)(regular_move)(single_move)
(full_move)(game_description)(single_game)(games)
)
}
private:
struct reportProgress_f {
size_t total_;
mutable double pct = 0.0;
reportProgress_f(size_t total) : total_(total) {}
template<typename T>
void operator()(T pos) const {
double newpct = pos * 100.0 / total_;
if ((newpct - pct) > 10) {
//sleep(1); // because it's way too fast otherwise...
pct = newpct;
std::cerr << "\rProgress " << std::fixed << std::setprecision(1) << pct << " " << std::flush;
};
}
};
phx::function<reportProgress_f> reportProgress;
CurrentPos<Iterator> filepos;
qi::rule<Iterator, pgn_tag(), qi::space_type> tag;
qi::rule<Iterator, std::vector<pgn_tag>, qi::space_type> header;
qi::rule<Iterator, game_move(), qi::space_type> full_move;
qi::rule<Iterator, std::vector<game_move>, qi::space_type> game_description;
qi::rule<Iterator, pgn_game(), qi::space_type> single_game;
qi::rule<Iterator, std::vector<pgn_game>(), qi::space_type> games;
// lexemes
qi::symbols<char, result_t> result;
qi::rule<Iterator, std::string()> quoted_string;
qi::rule<Iterator> regular_move;
qi::rule<Iterator, std::string()> single_move;
};
}
#include <boost/iostreams/device/mapped_file.hpp>
loloof64::PgnGamesExtractor::~PgnGamesExtractor() {
// dtor
}
loloof64::PgnGamesExtractor::PgnGamesExtractor(std::string const& inputFilePath) {
parseInput(inputFilePath);
}
void loloof64::PgnGamesExtractor::parseInput(std::string const& inputFilePath) {
boost::iostreams::mapped_file_source mf(inputFilePath);
//if (inputFile.fail() || inputFile.bad())
//throw new InputFileException("Could not read the input file !");
typedef char const* It;
std::vector<loloof64::pgn_game> temp_games;
/* It iter(inputFile >> std::noskipws), end; */
auto iter = mf.begin();
auto end = mf.end();
loloof64::pgn_parser<It> parser(iter, end);
//////////////////////////////////
//std::cout << "About to parse the file" << std::endl;
//////////////////////////////////
bool success = boost::spirit::qi::phrase_parse(iter, end, parser, boost::spirit::qi::space, temp_games);
//////////////////////////////////
//std::cout << "Finished to parse the file" << std::endl;
//////////////////////////////////
if (success && iter == end) {
games.swap(temp_games);
} else {
std::string error_fragment(iter, end);
throw PgnParsingException("Failed to parse the input at :'" + error_fragment + "' !");
}
}
int main() {
loloof64::PgnGamesExtractor pge("ScotchGambit.pgn");
std::cout << "Parsed " << pge.getGames().size() << " games\n";
for (auto& g : pge.getGames())
for (auto& m : g.moves)
std::cout << m.move_number << ".\t" << m.white_move << "\t" << m.black_move << "\n";
}
With sample output
Progress 32.6
Progress 44.5
Progress 55.5
Progress 67.2
Progress 77.2
Progress 89.1
Progress 100.0Parsed 1 games
1. e4 e5
2. Nf3 Nc6
3. d4 exd4
4. Bc4 Qf6
5. O-O d6
6. Ng5 Nh6
7. f4 Be7
8. e5 Qg6
9. exd6 cxd6
10. c3 dxc3
11. Nxc3 O-O
12. Nd5 Bd7
13. Rf3 Bg4
14. Bd3 Bxf3
15. Qxf3 f5
16. Bc4 Kh8
17. Nxe7 Nxe7
18. Qxb7 Qf6
19. Be3 Rfb8
20. Qd7 Rd8
21. Qb7 d5
22. Bb3 Nc6
23. Bxd5 Nd4
24. Rd1 Ne2+
25. Kf1 Rab8
26. Qxa7 Rxb2
27. Ne6 Qxe6
28. Bxe6 Rxd1+
29. Kf2
Note that on a terminal, the progress indication will self-update using a carriage-return instead of printing separate lines
Solved the problem by following this Sehe video tutorial
Also, one should notice that, as this time he is using a boost::iostreams::mapped_file_source instead of a ifstream as I did, the process is really speeding up ! So the progress bar is not needed any more for this process.
Cpp file and Hpp file
i'm trying to parse a Newick grammar (which is defined here) using the boost::spirit library.
I already made my own parser, which recognize correctly the grammar. Here it is the code:
#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/variant/recursive_variant.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <vector>
namespace parser
{
struct ptree;
typedef boost::variant<boost::recursive_wrapper<ptree>> ptree_recursive;
struct ptree
{
std::vector<ptree_recursive> children;
std::string name;
double length;
};
/* Used to cast ptree_recursive into ptree. */
class ptree_visitor : public boost::static_visitor<ptree>
{
public:
ptree operator() (ptree tree) const
{
return tree;
}
};
}
BOOST_FUSION_ADAPT_STRUCT(
parser::ptree,
(std::vector<parser::ptree_recursive>, children)
(std::string, name)
(double, length)
)
namespace parser
{
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
template<typename Iterator>
struct newick_grammar : qi::grammar<Iterator, ptree(), ascii::space_type>
{
public:
newick_grammar() : newick_grammar::base_type(tree)
{
using qi::lexeme;
using qi::double_;
using ascii::char_;
/* This is the only grammar that works fine:
* http://evolution.genetics.washington.edu/phylip/newick_doc.html */
label = lexeme[+(char_ - ':' - ')' - ',')];
branch_length = ':' >> double_;
subtree =
-descendant_list
>> -label
>> -branch_length;
descendant_list =
'('
>> subtree
>> *(',' >> subtree )
>> ')';
tree = subtree >> ';';
BOOST_SPIRIT_DEBUG_NODE(label);
BOOST_SPIRIT_DEBUG_NODE(branch_length);
BOOST_SPIRIT_DEBUG_NODE(subtree);
BOOST_SPIRIT_DEBUG_NODE(descendant_list);
BOOST_SPIRIT_DEBUG_NODE(tree);
}
private:
/* grammar rules */
qi::rule<Iterator, ptree(), ascii::space_type> tree, subtree;
qi::rule<Iterator, ptree_recursive(), ascii::space_type> descendant_list;
qi::rule<Iterator, double(), ascii::space_type> branch_length;
qi::rule<Iterator, std::string(), ascii::space_type> label;
};
}
The ptree instance, which is given to the parser, stores the newick tree.
The test string, used for this code, is the following one:
(((One:0.1,Two:0.2)Sub1:0.3,(Three:0.4,Four:0.5)Sub2:0.6)Sub3:0.7,Five:0.8)Root:0.9;
The parser correctly recognize the grammar, but it generates a partial tree. In particular, the ptree instance that is retured, contains "Root" node and its first "Sub3" child.
I tried to use the push_at and at_c methods (explained here) aswel. I had the same results.
Why the grammar does not seem to create and add all nodes, even being able to recognize the grammar and travel the tree aswel?
Thanks in advice.
SOLUTION
template<typename Iterator>
struct newick_grammar : qi::grammar<Iterator, base::ptree()>
{
public:
newick_grammar() : newick_grammar::base_type(tree)
{
/* This is the only grammar that works fine:
* http://evolution.genetics.washington.edu/phylip/newick_doc.html */
label %= qi::lexeme[+(qi::char_ - ':' - ')' - ',')];
branch_length %= ':' >> qi::double_;
subtree =
-descendant_list
>> -label
>> -branch_length;
descendant_list =
'('
>> subtree
>> *(',' >> subtree )
>> ')';
tree %= subtree >> ';';
BOOST_SPIRIT_DEBUG_NODE(label);
BOOST_SPIRIT_DEBUG_NODE(branch_length);
BOOST_SPIRIT_DEBUG_NODE(subtree);
BOOST_SPIRIT_DEBUG_NODE(descendant_list);
BOOST_SPIRIT_DEBUG_NODE(tree);
}
private:
/* grammar rules */
qi::rule<Iterator, base::ptree()> tree, subtree;
qi::rule<Iterator, base::children_ptree()> descendant_list;
qi::rule<Iterator, double()> branch_length;
qi::rule<Iterator, std::string()> label;
};
I think there is a lot of cargo-cult coding in your program. For example the variant is completly useless. So I rewrited it a bit, adding comments to help you understand (I hope, if it is not clear do not hesitate to ask in comment). I left the space specification aside as I think it was useless in your case.
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_fusion.hpp>
#include <boost/spirit/include/phoenix_stl.hpp>
#include <vector>
#include <string>
#include <iostream>
namespace parser
{
// Forward declaration for the vector
struct ptree;
// typedef to ease the writing
typedef std::vector<ptree> children_vector;
// The tree structure itseflf
struct ptree
{
children_vector children;
std::string name;
double length;
};
// Streaming operator for printing the result
std::ostream& operator<<(std::ostream& stream, const ptree& tree)
{
bool first = true;
stream << "(" << tree.name << ": " << tree.length << " { ";
for (auto child: tree.children)
{
stream << (first ? "" : "," ) << child;
first = false;
}
stream << " }";
return stream;
}
}
// adapt the structure to fusion phoenix
BOOST_FUSION_ADAPT_STRUCT(
parser::ptree,
(parser::children_vector, children)
(std::string, name)
(double, length)
)
namespace parser
{
// namespace aliasing to shorten the names
namespace qi = boost::spirit::qi;
namespace phoenix = boost::phoenix;
// This grammar parse string to a ptree
struct newick_grammar : qi::grammar<std::string::const_iterator, ptree()>
{
public:
newick_grammar()
: newick_grammar::base_type(tree) // We try to parse the tree rule
{
using phoenix::at_c; // Access nth field of structure
using phoenix::push_back; // Push into vector
// For label use %= to assign the result of the parse to the string
label %= qi::lexeme[+(qi::char_ - ':' - ')' - ',')];
// For branch length use %= to assign the result of the parse to the
// double
branch_length %= ':' >> qi::double_;
// When parsing the subtree just assign the elements that have been
// built in the subrules
subtree =
// Assign vector of children to the first element of the struct
-descendant_list [at_c<0>(qi::_val) = qi::_1 ]
// Assign the label to the second element
>> -label [ at_c<1>(qi::_val) = qi::_1 ]
// Assign the branch length to the third element
>> -branch_length [ at_c<2>(qi::_val) = qi::_1 ];
// Descendant list is a vector of ptree, we just push back the
// created ptrees into the vector
descendant_list =
'(' >> subtree [ push_back(qi::_val, qi::_1) ]
>> *(',' >> subtree [ push_back(qi::_val, qi::_1) ])
>> ')';
// The tree receive the whole subtree using %=
tree %= subtree >> ';' ;
}
private:
// Here are the various grammar rules typed by the element they do
// generate
qi::rule<std::string::const_iterator, ptree()> tree, subtree;
qi::rule<std::string::const_iterator, children_vector()> descendant_list;
qi::rule<std::string::const_iterator, double()> branch_length;
qi::rule<std::string::const_iterator, std::string()> label;
};
}
int main(int argc, char const *argv[])
{
namespace qi = boost::spirit::qi;
std::string str;
while (getline(std::cin, str))
{
// Instantiate grammar and tree
parser::newick_grammar grammar;
parser::ptree tree;
// Parse
bool result = qi::phrase_parse(str.cbegin(), str.cend(), grammar, qi::space, tree);
// Print the result
std::cout << "Parsing result: " << std::boolalpha << result << std::endl;
std::cout << tree << std::endl;
}
return 0;
}
Here is the output of your sample:
$ ./a.exe
(((One:0.1,Two:0.2)Sub1:0.3,(Three:0.4,Four:0.5)Sub2:0.6)Sub3:0.7,Five:0.8)Root:0.9;
Parsing result: true
(Root: 0.9 { (Sub3: 0.7 { (Sub1: 0.3 { (One: 0.1 { },(Two: 0.2 { } },(Sub2: 0.6 { (Three: 0.4 { },(Four: 0.5 { } } },(Five: 0.8 { } }