I am continuing to learn the Boost Spirit library and have comile issue with example that I couldn`t compile. The source of example you can find here: source place.
Also you can look at this code and compile result on Coliru
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
//#define BOOST_SPIRIT_USE_PHOENIX_V3
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_statement.hpp>
#include <boost/spirit/include/phoenix_algorithm.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <string>
#include <iostream>
namespace lex = boost::spirit::lex;
struct distance_func
{
template <typename Iterator1, typename Iterator2>
struct result : boost::iterator_difference<Iterator1> {};
template <typename Iterator1, typename Iterator2>
typename result<Iterator1, Iterator2>::type
operator()(Iterator1& begin, Iterator2& end) const
{
return std::distance(begin, end);
}
};
boost::phoenix::function<distance_func> const distance = distance_func();
//[wcl_token_definition
template <typename Lexer>
struct word_count_tokens : lex::lexer<Lexer>
{
word_count_tokens()
: c(0), w(0), l(0)
, word("[^ \t\n]+") // define tokens
, eol("\n")
, any(".")
{
using boost::spirit::lex::_start;
using boost::spirit::lex::_end;
using boost::phoenix::ref;
// associate tokens with the lexer
this->self
= word [++ref(w), ref(c) += distance(_start, _end)]
| eol [++ref(c), ++ref(l)]
| any [++ref(c)]
;
}
std::size_t c, w, l;
lex::token_def<> word, eol, any;
};
//]
///////////////////////////////////////////////////////////////////////////////
//[wcl_main
int main(int argc, char* argv[])
{
typedef
lex::lexertl::token<char const*, lex::omit, boost::mpl::false_>
token_type;
/*< This defines the lexer type to use
>*/ typedef lex::lexertl::actor_lexer<token_type> lexer_type;
/*< Create the lexer object instance needed to invoke the lexical analysis
>*/ word_count_tokens<lexer_type> word_count_lexer;
/*< Read input from the given file, tokenize all the input, while discarding
all generated tokens
>*/ std::string str;
char const* first = str.c_str();
char const* last = &first[str.size()];
/*< Create a pair of iterators returning the sequence of generated tokens
>*/ lexer_type::iterator_type iter = word_count_lexer.begin(first, last);
lexer_type::iterator_type end = word_count_lexer.end();
/*< Here we simply iterate over all tokens, making sure to break the loop
if an invalid token gets returned from the lexer
>*/ while (iter != end && token_is_valid(*iter))
++iter;
if (iter == end) {
std::cout << "lines: " << word_count_lexer.l
<< ", words: " << word_count_lexer.w
<< ", characters: " << word_count_lexer.c
<< "\n";
}
else {
std::string rest(first, last);
std::cout << "Lexical analysis failed\n" << "stopped at: \""
<< rest << "\"\n";
}
return 0;
}
When I try to compile it I receive a lot of errors, see full list on Coliru.
What wrong with this example? What and why need be changed to compile it?
Apparently something changed in the internals of Lex, and the iterator(s) are now rvalues sometimes.
You need to adjust the distance_func to either read
operator()(Iterator1 begin, Iterator2 end) const
or
operator()(Iterator1 const& begin, Iterator2 const& end) const
Then it works. See Live On Coliru
Related
I'm having trouble getting member functions to bind inside grammar definitions. Compile errors result.
In short:
struct my_functor_word
{
// This code
void print ( std::string const& s, qi::unused_type, qi::unused_type ) const
// Gives the compiler error seen below.
// This code works fine:
// void operator()( std::string const& s, qi::unused_type, qi::unused_type ) const
{
std::cout << "word:" << s << std::endl;
}
};
template <typename Iterator>
struct bd_parse_grammar : qi::grammar<Iterator>
{
template <typename TokenDef> bd_parse_grammar( TokenDef const& tok )
: bd_parse_grammar::base_type( start )
{
my_functor_word mfw;
start = *(
// This code
tok.word [boost::bind(&my_functor_word::print, &mfw, qi::_1)]
// gives:
// {aka void (my_functor_word::*)(const std::basic_string<char>&, boost::spirit::unused_type, boost::spirit::unused_type) const}' is not a class, struct, or union type
// function_apply;
// ^~~~~~~~~~~~~~
///usr/include/boost/spirit/home/phoenix/core/detail/function_eval.hpp:126:13: error: 'boost::remove_reference<void (my_functor_word::*)(const std::basic_string<char>&, boost::spirit::unused_type, boost::spirit::unused_type) const>::type {aka void (my_functor_word::*)(const std::basic_string<char>&, boost::spirit::unused_type, boost::spirit::unused_type) const}' is not a class, struct, or union type
// type;
// ^~~~
// This:
// tok.word [boost::bind(&my_functor_word::print, &mfw, qi::_1)]
// similarly gives:
// /usr/include/boost/bind/bind.hpp:69:37: error: 'void (my_functor_word::*)(const std::basic_string<char>&, boost::spirit::unused_type, boost::spirit::unused_type) const' is not a class, struct, or union type
// typedef typename F::result_type type;
// This works OK:
// tok.word [my_functor_word()]
) ;
}
qi::rule<Iterator> start;
};
Here's the whole program. It compiles and functions correctly with functors but not member functions:
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/lambda/lambda.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_statement.hpp>
#include <boost/spirit/include/phoenix_container.hpp>
#include <boost/bind.hpp>
#include <iostream>
#include <string>
using namespace boost::spirit;
using namespace boost::spirit::ascii;
template <typename Lexer>
struct bd_parse_tokens : lex::lexer<Lexer>
{
bd_parse_tokens()
{
// define patterns (lexer macros) to be used during token definition
this->self.add_pattern( "WORD", "[a-zA-Z._]+" );
// define tokens and associate them with the lexer
word = "{WORD}"; // reference the pattern 'WORD' as defined above
this->self.add ( word );
}
// the token 'word' exposes the matched string as its parser attribute
lex::token_def<std::string> word;
};
///////////////////////////////////////////////////////////////////////////////
// Grammar definition
///////////////////////////////////////////////////////////////////////////////
struct my_functor_word
{
// This code
void print ( std::string const& s, qi::unused_type, qi::unused_type ) const
// Gives the compiler error seen below.
// This code works fine:
// void operator()( std::string const& s, qi::unused_type, qi::unused_type ) const
{
std::cout << "word:" << s << std::endl;
}
};
template <typename Iterator>
struct bd_parse_grammar : qi::grammar<Iterator>
{
template <typename TokenDef> bd_parse_grammar( TokenDef const& tok )
: bd_parse_grammar::base_type( start )
{
my_functor_word mfw;
start = *(
// This code
tok.word [boost::bind(&my_functor_word::print, &mfw, qi::_1)]
// gives:
// {aka void (my_functor_word::*)(const std::basic_string<char>&, boost::spirit::unused_type, boost::spirit::unused_type) const}' is not a class, struct, or union type
// function_apply;
// ^~~~~~~~~~~~~~
///usr/include/boost/spirit/home/phoenix/core/detail/function_eval.hpp:126:13: error: 'boost::remove_reference<void (my_functor_word::*)(const std::basic_string<char>&, boost::spirit::unused_type, boost::spirit::unused_type) const>::type {aka void (my_functor_word::*)(const std::basic_string<char>&, boost::spirit::unused_type, boost::spirit::unused_type) const}' is not a class, struct, or union type
// type;
// ^~~~
// This:
// tok.word [boost::bind(&my_functor_word::print, &mfw, qi::_1)]
// similarly gives:
// /usr/include/boost/bind/bind.hpp:69:37: error: 'void (my_functor_word::*)(const std::basic_string<char>&, boost::spirit::unused_type, boost::spirit::unused_type) const' is not a class, struct, or union type
// typedef typename F::result_type type;
// This works OK:
// tok.word [my_functor_word()]
) ;
}
qi::rule<Iterator> start;
};
///////////////////////////////////////////////////////////////////////////////
int main( int argc, char* argv[] )
{
// Define the token type to be used: `std::string` is available as the
// type of the token attribute
typedef lex::lexertl::token < char const*, boost::mpl::vector<std::string> > token_type;
// Define the lexer type to use implementing the state machine
typedef lex::lexertl::lexer<token_type> lexer_type;
// Define the iterator type exposed by the lexer type */
typedef bd_parse_tokens<lexer_type>::iterator_type iterator_type;
// now we use the types defined above to create the lexer and grammar
// object instances needed to invoke the parsing process
bd_parse_tokens<lexer_type> bd_parse; // Our lexer
bd_parse_grammar<iterator_type> g( bd_parse ); // Our parser
// read in the file int memory
std::string str( argv[1] );
char const* first = str.c_str();
char const* last = &first[str.size()];
bool r = lex::tokenize_and_parse( first, last, bd_parse, g );
if ( ! r )
{
std::string rest( first, last );
std::cerr << "Parsing failed\n" << "stopped at: \""
<< rest << "\"\n";
}
return 0;
}
There are many aspects to this puzzle.
Firstly, to bind a member function you have to pass the extra leading instance parameter (the this object).
Secondly, semantic actions are Phoenix Actors, so deferred functors. boost::bind are likely not what you wantL you cannot call my_functor_word::print with qi::_1_type anyways. Instead you might use phoenix::bind, in which case you don't even have to deal with the "magic" context parameters:
struct my_functor_word {
void print(std::string const& s) const {
std::cout << "word:" << s << std::endl;
}
};
And
start = *(tok.word[ //
boost::phoenix::bind(&my_functor_word::print, &mfw, qi::_1)]);
That Was It?
I won't let you go without some more observations.
Firstly, the bind is still broken! You bound mfw as the this instance, but mfw is a local variable. The nature of the semantic action (being a defferred actor as said above) is that it will be called during parse: long after the constructor is finished. The mfw needs to be a member variable. Or better, not be a part of the grammar at all. I'd suggest
///////////////////////////////////////////////////////////////////////////////
// Grammar definition
///////////////////////////////////////////////////////////////////////////////
template <typename Iterator>
struct bd_parse_grammar : qi::grammar<Iterator>
{
template <typename TokenDef>
bd_parse_grammar(TokenDef const& tok) : bd_parse_grammar::base_type(start)
{
using namespace qi::labels;
start = *tok.word[px::bind(&my_functor_word::print, &mfw, _1)];
}
private:
struct my_functor_word {
void print(std::string const& s)
{
std::cout << "word:" << s << std::endl;
}
};
mutable my_functor_word mfw;
qi::rule<Iterator> start;
};
///////////////////////////////////////////////////////////////////////////////
I see a lot of old-fashioned and questionable style. E.g. why are you including boost/bind.hpp (or even boost/bind/bind.hpp) and even boost/lambda.hpp?
Why are you using Lex?
You're using using namespace liberally, which is a bad idea, especially when mixing all these libraries (that literally all have their own idea of placeholders named 1 etc). Instead just make some aliases:
namespace qi = boost::spirit::qi;
namespace lex = boost::spirit::lex;
namespace px = boost::phoenix;
You have a comment
// read in the file int memory
That doesn't match the code given:
std::string str(argv[1]);
char const* first = str.c_str();
char const* last = &first[str.size()];
That is just weird all around. Why are you using raw char* with a string (it has proper iterators with begin() and end()?). Also, maybe you really wanted to read a file?
By the way, let's make sure argv[1] is actually valid:
for (std::string fname : std::vector(argv + 1, argv + argc)) {
std::ifstream ifs(fname);
// read in the file into memory
std::string const str(std::istreambuf_iterator<char>(ifs), {});
auto first = str.begin(), //
last = str.end();
So here's a demo Live On Coliru
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/include/qi.hpp>
#include <fstream>
#include <iomanip>
namespace qi = boost::spirit::qi;
namespace lex = boost::spirit::lex;
namespace px = boost::phoenix;
template <typename Lexer> struct bd_parse_tokens : lex::lexer<Lexer> {
bd_parse_tokens() {
this->self.add_pattern("WORD", "[a-zA-Z._]+");
word = "{WORD}";
this->self.add(word);
}
// the token 'word' exposes the matched string as its parser attribute
lex::token_def<std::string> word;
};
///////////////////////////////////////////////////////////////////////////////
// Grammar definition
///////////////////////////////////////////////////////////////////////////////
template <typename Iterator>
struct bd_parse_grammar : qi::grammar<Iterator>
{
template <typename TokenDef>
bd_parse_grammar(TokenDef const& tok) : bd_parse_grammar::base_type(start) {
using namespace qi::labels;
start = *tok.word[px::bind(&my_functor_word::print, &mfw, _1)];
}
private:
struct my_functor_word {
void print(std::string const& s) const { std::cout << "word:" << s << std::endl; }
};
mutable my_functor_word mfw;
qi::rule<Iterator> start;
};
///////////////////////////////////////////////////////////////////////////////
int main(int argc, char* argv[])
{
// type of the token attribute
using token_type = lex::lexertl::token<std::string::const_iterator,
boost::mpl::vector<std::string>>;
using lexer_type = lex::lexertl::/*actor_*/lexer<token_type>;
using iterator_type = bd_parse_tokens<lexer_type>::iterator_type;
bd_parse_tokens<lexer_type> bd_parse;
bd_parse_grammar<iterator_type> g(bd_parse);
for (std::string fname : std::vector(argv + 1, argv + argc)) {
std::ifstream ifs(fname);
// read in the file into memory
std::string const str(std::istreambuf_iterator<char>(ifs), {});
auto first = str.begin();
auto last = str.end();
bool ok = lex::tokenize_and_parse(first, last, bd_parse, g);
std::cerr << "Parsing " << fname << " (length " << str.length() << ") "
<< (ok ? "succeeded" : "failed") << "\n";
if (first != last)
std::cerr << "Stopped at #" << std::distance(str.begin(), first)
<< "\n";
}
}
Prints
word:includeboostspiritincludelex_lexertl.hppincludeboostspiritincludephoenix.hppincludeboostspiritincludeqi.hppincludefstreamincludeiomanipnamespaceqiboostspiritqinamespacelexboostspiritlexnamespacepxboostphoenixtemplatetypenameLexerstructbd_parse_tokenslexlexerLexerbd_parse_tokensthisself.add_patternWORDazAZ._wordWORDthisself.addwordthetokenwordexposesthematchedstringasitsparserattributelextoken_defstdstringwordGrammardefinitiontemplatetypenameIteratorstructbd_parse_grammarqigrammarIteratortemplatetypenameTokenDefbd_parse_grammarTokenDefconsttokbd_parse_grammarbase_typestartusingnamespaceqilabelsstarttok.wordpxbindmy_functor_wordprintmfw_privatestructmy_functor_wordvoidprintstdstringconstsconststdcoutwordsstdendlmutablemy_functor_wordmfwqiruleIteratorstartintmainintargccharargvtypeofthetokenattributeusingtoken_typelexlexertltokenstdstringconst_iteratorboostmplvectorstdstringusinglexer_typelexlexertlactor_lexertoken_typeusingiterator_typebd_parse_tokenslexer_typeiterator_typebd_parse_tokenslexer_typebd_parsebd_parse_grammariterator_typegbd_parseforstdstringfnamestdvectorargvargvargcstdifstreamifsfnamereadinthefileintomemorystdstringconststrstdistreambuf_iteratorcharifsautofirststr.beginautolaststr.endbooloklextokenize_and_parsefirstlastbd_parsegstdcerrParsingfnamelengthstr.lengthoksucceededfailedniffirstlaststdcerrStoppedatstddistancestr.beginfirstn
Parsing input.txt (length 1367) succeeded
Parsing main.cpp (length 2479) succeeded
Stopped at #0
Without Lex
I think this would be strictly simpler:
Live On Coliru
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/include/qi.hpp>
#include <fstream>
#include <iomanip>
namespace qi = boost::spirit::qi;
namespace px = boost::phoenix;
template <typename Iterator>
struct bd_parse_grammar : qi::grammar<Iterator> {
bd_parse_grammar() : bd_parse_grammar::base_type(start)
{
using namespace qi::labels;
word = +qi::char_("a-zA-Z_.");
start = *word[px::bind(&my_functor_word::print, &mfw, _1)];
}
private:
struct my_functor_word {
void print(std::string const& s) const { std::cout << "word:" << s << std::endl; }
};
mutable my_functor_word mfw;
qi::rule<Iterator> start;
qi::rule<Iterator, std::string()> word;
};
int main(int argc, char* argv[]) {
bd_parse_grammar<std::string::const_iterator> const g;
for (std::string fname : std::vector(argv + 1, argv + argc)) {
std::ifstream ifs(fname);
// read the file into memory
std::string const str(std::istreambuf_iterator<char>(ifs), {});
auto first = str.begin();
auto last = str.end();
bool ok = qi::parse(first, last, g);
std::cerr << "Parsing " << fname << " (length " << str.length() << ") "
<< (ok ? "succeeded" : "failed") << "\n";
if (first != last)
std::cerr << "Stopped at #" << std::distance(str.begin(), first)
<< "\n";
}
}
Same output
Without Phoenix Bind
Using some C++17 CTAD and phoenix::function:
Live On Coliru
template <typename Iterator> struct Parser : qi::grammar<Iterator> {
Parser() : Parser::base_type(start) {
px::function print{[](std::string const& s) {
std::cout << "word:" << s << std::endl;
}};
word = +qi::char_("a-zA-Z_.");
start = *word[print(qi::_1)];
}
qi::rule<Iterator> start;
qi::rule<Iterator, std::string()> word;
};
Only half the original code.
Using X3
If you're using C++14 anyways, consider slashing compile times:
Live On Coliru
#include <boost/spirit/home/x3.hpp>
#include <iostream>
#include <fstream>
namespace x3 = boost::spirit::x3;
namespace Parser {
auto print = [](auto& ctx) {
std::cout << "word:" << _attr(ctx) << std::endl;
};
auto word = +x3::char_("a-zA-Z_.");
auto start = *word[print];
} // namespace Parser
int main(int argc, char* argv[]) {
for (std::string fname : std::vector(argv + 1, argv + argc)) {
std::ifstream ifs(fname);
// read the file into memory
std::string const str(std::istreambuf_iterator<char>(ifs), {});
auto first = str.begin();
auto last = str.end();
bool ok = x3::parse(first, last, Parser::start);
std::cerr << "Parsing " << fname << " (length " << str.length() << ") "
<< (ok ? "succeeded" : "failed") << "\n";
if (first != last)
std::cerr << "Stopped at #" << std::distance(str.begin(), first) << "\n";
}
}
Still the same output.
I've reduced my code to the absolute minimum needed to reproduce the error (sadly that is still 60 lines, not quite Minimal, but its VCE at least).
I'm using Boost 1.56 in Visual Studio 2013 (Platform Toolset v120).
The code below gives me an Access Violation unless I uncomment the marked lines. By doing some tests, it seems boost::spirit doesn't like it if the enum starts at 0 (in my full code I have more values in the enum and I just set IntLiteral = 1 and it also got rid of the access violation error, although the names were wrong because ToString was off by one when indexing into the array).
Is this a bug in boost::spirit or did I do something wrong?
#include <iostream>
#include <string>
#include <vector>
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
namespace lex = boost::spirit::lex;
typedef lex::lexertl::token<char const*> LexToken;
typedef lex::lexertl::actor_lexer<LexToken> LexerType;
typedef boost::iterator_range<char const*> IteratorRange;
enum TokenType
{
//Unused, // <-- Uncommenting this fixes the error (1/2)
IntLiteral,
};
std::string tokenTypeNames[] = {
//"unused", // <-- Uncommenting this line fixes the error (2/2)
"int literal",
};
std::string ToString(TokenType t)
{
return tokenTypeNames[t];
}
template <typename T>
struct Lexer : boost::spirit::lex::lexer < T >
{
Lexer()
{
self.add
// Literals
("[1-9][0-9]*", TokenType::IntLiteral);
}
};
int main(int argc, char* argv[])
{
std::cout << "Boost version: " << BOOST_LIB_VERSION << std::endl;
std::string input = "33";
char const* inputIt = input.c_str();
char const* inputEnd = &input[input.size()];
Lexer<LexerType> tokens;
LexerType::iterator_type token = tokens.begin(inputIt, inputEnd);
LexerType::iterator_type end = tokens.end();
for (; token->is_valid() && token != end; ++token)
{
auto range = boost::get<IteratorRange>(token->value());
std::cout << ToString(static_cast<TokenType>(token->id())) << " (" << std::string(range.begin(), range.end()) << ')' << std::endl;
}
std::cin.get();
return 0;
}
If I uncomment the lines I get:
Boost version: 1_56
int literal (33)
The fact that it "works" if you uncomment those lines, is pure accident.
From the docs spirit/lex/tutorials/lexer_quickstart2.html:
To ensure every token gets assigned a id the Spirit.Lex library internally assigns unique numbers to the token definitions, starting with the constant defined by boost::spirit::lex::min_token_id
See also this older answer:
Spirit Lex: Which token definition generated this token?
So you can just fix it using the offset, but I guess it will keep on being a brittle solution as it is very easy to let the enum go out of synch with actual token definitions in the lexer tables.
I'd suggest using the nameof() approach as given in the linked answer, which leverages named token_def<> objects.
Live On Coliru
#include <iostream>
#include <string>
#include <vector>
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
namespace lex = boost::spirit::lex;
typedef lex::lexertl::token<char const*> LexToken;
typedef lex::lexertl::actor_lexer<LexToken> LexerType;
typedef boost::iterator_range<char const*> IteratorRange;
enum TokenType {
IntLiteral = boost::spirit::lex::min_token_id
};
std::string const& ToString(TokenType t) {
static const std::string tokenTypeNames[] = {
"int literal",
};
return tokenTypeNames[t - boost::spirit::lex::min_token_id];
}
template <typename T>
struct Lexer : boost::spirit::lex::lexer<T> {
Lexer() {
this->self.add
// Literals
("[1-9][0-9]*", TokenType::IntLiteral);
}
};
int main() {
std::cout << "Boost version: " << BOOST_LIB_VERSION << std::endl;
std::string input = "33";
char const* inputIt = input.c_str();
char const* inputEnd = &input[input.size()];
Lexer<LexerType> tokens;
LexerType::iterator_type token = tokens.begin(inputIt, inputEnd);
LexerType::iterator_type end = tokens.end();
for (; token->is_valid() && token != end; ++token)
{
auto range = boost::get<IteratorRange>(token->value());
std::cout << ToString(static_cast<TokenType>(token->id())) << " (" << std::string(range.begin(), range.end()) << ')' << std::endl;
}
}
I have the following code:
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>
#include <boost/spirit/include/qi.hpp>
#include <iostream>
#include <string>
struct function
{
std::string ret_type;
std::string name;
};
BOOST_FUSION_ADAPT_STRUCT(
::function,
(std::string, ret_type)
(std::string, name)
)
template <typename Iterator>
struct function_parser : boost::spirit::qi::grammar<Iterator, function(), boost::spirit::qi::ascii::space_type>
{
function_parser() : function_parser::base_type(start)
{
using boost::spirit::qi::ascii::char_;
using boost::spirit::qi::int_;
start %= +char_ >> +char_;
}
boost::spirit::qi::rule<Iterator, function(), boost::spirit::qi::ascii::space_type> start;
};
int main()
{
std::string input_data("void foo");
function fn;
auto itr = input_data.begin();
auto end = input_data.end();
function_parser<decltype(itr)> g;
bool res = boost::spirit::qi::phrase_parse(itr, end, g, boost::spirit::ascii::space, fn);
if (res && itr == end)
{
std::cout << boost::fusion::tuple_open('[');
std::cout << boost::fusion::tuple_close(']');
std::cout << boost::fusion::tuple_delimiter(", ");
std::cout << "Parsing succeeded\n";
std::cout << "got: " << boost::fusion::as_vector(fn) << std::endl;
}
else
{
std::cout << "Parsing failed \n";
}
}
Output
Parsing failed
What am I doing wrong? How can I fix it?
+char_
eats all input! Now, the next
+char_
requires at least a single character, which isn't there (the first kleen plus ate it) so the parse fails.
I suggest instead:
using namespace boost::spirit::qi;
start = lexeme[+graph] >> lexeme[+graph];
The documentation should be able to tell you what that does (I hope. No time to elaborate)
I am trying to use Boost Spirit to parse the following grammar:
sentence:
noun verb
sentence conjunction sentence
conjunction:
"and"
noun:
"birds"
"cats"
verb:
"fly"
"meow"
parsing succeeds when the grammar only includes noun >> verb rule.
when grammar is modified to include sentence>>conjunction>>sentence rule and i supply an invalid input such as "birds fly" instead of "birdsfly" i get an unhandled exception when the program runs.
here is the code which is modified from examples found on boost doc
#define BOOST_VARIANT_MINIMIZE_SIZE
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_statement.hpp>
#include <boost/spirit/include/phoenix_container.hpp>
#include <iostream>
#include <string>
using namespace boost::spirit;
using namespace boost::spirit::ascii;
template <typename Lexer>
struct token_list : lex::lexer<Lexer>
{
token_list()
{
noun = "birds|cats";
verb = "fly|meow";
conjunction = "and";
this->self.add
(noun)
(verb)
(conjunction)
;
}
lex::token_def<std::string> noun, verb, conjunction;
};
template <typename Iterator>
struct Grammar : qi::grammar<Iterator>
{
template <typename TokenDef>
Grammar(TokenDef const& tok)
: Grammar::base_type(sentence)
{
sentence = (tok.noun>>tok.verb)
|
(sentence>>tok.conjunction>>sentence)>>eoi
;
}
qi::rule<Iterator> sentence;
};
int main()
{
typedef lex::lexertl::token<char const*, boost::mpl::vector<std::string>> token_type;
typedef lex::lexertl::lexer<token_type> lexer_type;
typedef token_list<lexer_type>::iterator_type iterator_type;
token_list<lexer_type> word_count;
Grammar<iterator_type> g (word_count);
std::string str = "birdsfly";
//std::string str = "birds fly"; this input caused unhandled exception
char const* first = str.c_str();
char const* last = &first[str.size()];
bool r = lex::tokenize_and_parse(first, last, word_count, g);
if (r) {
std::cout << "Parsing passed"<< "\n";
}
else {
std::string rest(first, last);
std::cerr << "Parsing failed\n" << "stopped at: \""
<< rest << "\"\n";
}
system("PAUSE");
return 0;
}
You have left-recursion in the second branch of the sentence rule.
sentence = sentence >> ....
will always recurse on sentence, so you're seeing a stackoverflow.
I suggest writing the rule like, e.g:
sentence =
(tok.noun >> tok.verb)
>> *(tok.conjunction >> sentence)
>> qi::eoi
;
Now the result reads
g++ -Wall -pedantic -std=c++0x -g -O0 test.cpp -o test
Parsing failed
stopped at: " fly"
(and the inevitable "sh: PAUSE: command not found" of course...)
PS. Don't using namespace please. Instead:
namespace qi = boost::spirit::qi;
namespace lex = boost::spirit::lex;
Here's a cleaned up version with some other stuff removed/fixed: http://coliru.stacked-crooked.com/view?id=1fb26ca3e8c207979eaaf4592c319316-e223fd4a885a77b520bbfe69dda8fb91
#define BOOST_VARIANT_MINIMIZE_SIZE
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
// #include <boost/spirit/include/phoenix.hpp>
#include <iostream>
#include <string>
namespace qi = boost::spirit::qi;
namespace lex = boost::spirit::lex;
template <typename Lexer>
struct token_list : lex::lexer<Lexer>
{
token_list()
{
noun = "birds|cats";
verb = "fly|meow";
conjunction = "and";
this->self.add
(noun)
(verb)
(conjunction)
;
}
lex::token_def<std::string> noun, verb, conjunction;
};
template <typename Iterator>
struct Grammar : qi::grammar<Iterator>
{
template <typename TokenDef>
Grammar(TokenDef const& tok) : Grammar::base_type(sentence)
{
sentence =
(tok.noun >> tok.verb)
>> *(tok.conjunction >> sentence)
>> qi::eoi
;
}
qi::rule<Iterator> sentence;
};
int main()
{
typedef std::string::const_iterator It;
typedef lex::lexertl::token<It, boost::mpl::vector<std::string>> token_type;
typedef lex::lexertl::lexer<token_type> lexer_type;
typedef token_list<lexer_type>::iterator_type iterator_type;
token_list<lexer_type> word_count;
Grammar<iterator_type> g(word_count);
//std::string str = "birdsfly";
const std::string str = "birds fly";
It first = str.begin();
It last = str.end();
bool r = lex::tokenize_and_parse(first, last, word_count, g);
if (r) {
std::cout << "Parsing passed"<< "\n";
}
else {
std::string rest(first, last);
std::cerr << "Parsing failed\n" << "stopped at: \"" << rest << "\"\n";
}
}
I have a very simple path construct that I am trying to parse with boost spirit.lex.
We have the following grammar:
token := [a-z]+
path := (token : path) | (token)
So we're just talking about colon separated lower-case ASCII strings here.
I have three examples "xyz", "abc:xyz", "abc:xyz:".
The first two should be deemed valid. The third one, which has a trailing colon, should not be deemed valid. Unfortunately the parser I have recognizes all three as being valid. The grammar should not allow an empty token, but apparently spirit is doing just that. What am I missing to get the third one rejected?
Also, if you read the code below, in comments there is another version of the parser that demands that all paths end with semi-colons. I can get appropriate behavior when I activate those lines, (i.e. rejection of "abc:xyz:;"), but this is not really what I want.
Anyone have any ideas?
Thanks.
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <iostream>
#include <string>
using namespace boost::spirit;
using boost::phoenix::val;
template<typename Lexer>
struct PathTokens : boost::spirit::lex::lexer<Lexer>
{
PathTokens()
{
identifier = "[a-z]+";
separator = ":";
this->self.add
(identifier)
(separator)
(';')
;
}
boost::spirit::lex::token_def<std::string> identifier, separator;
};
template <typename Iterator>
struct PathGrammar
: boost::spirit::qi::grammar<Iterator>
{
template <typename TokenDef>
PathGrammar(TokenDef const& tok)
: PathGrammar::base_type(path)
{
using boost::spirit::_val;
path
=
(token >> tok.separator >> path)[std::cerr << _1 << "\n"]
|
//(token >> ';')[std::cerr << _1 << "\n"]
(token)[std::cerr << _1 << "\n"]
;
token
= (tok.identifier) [_val=_1]
;
}
boost::spirit::qi::rule<Iterator> path;
boost::spirit::qi::rule<Iterator, std::string()> token;
};
int main()
{
typedef std::string::iterator BaseIteratorType;
typedef boost::spirit::lex::lexertl::token<BaseIteratorType, boost::mpl::vector<std::string> > TokenType;
typedef boost::spirit::lex::lexertl::lexer<TokenType> LexerType;
typedef PathTokens<LexerType>::iterator_type TokensIterator;
typedef std::vector<std::string> Tests;
Tests paths;
paths.push_back("abc");
paths.push_back("abc:xyz");
paths.push_back("abc:xyz:");
/*
paths.clear();
paths.push_back("abc;");
paths.push_back("abc:xyz;");
paths.push_back("abc:xyz:;");
*/
for ( Tests::iterator iter = paths.begin(); iter != paths.end(); ++iter )
{
std::string str = *iter;
std::cerr << "*****" << str << "*****\n";
PathTokens<LexerType> tokens;
PathGrammar<TokensIterator> grammar(tokens);
BaseIteratorType first = str.begin();
BaseIteratorType last = str.end();
bool r = boost::spirit::lex::tokenize_and_parse(first, last, tokens, grammar);
std::cerr << r << " " << (first==last) << "\n";
}
}
I addition to to what llonesmiz already said, here's a trick using qi::eoi that I sometimes use:
path = (
(token >> tok.separator >> path) [std::cerr << _1 << "\n"]
| token [std::cerr << _1 << "\n"]
) >> eoi;
This makes the grammar require eoi (end-of-input) at the end of a successful match. This leads to the desired result:
http://liveworkspace.org/code/23a7adb11889bbb2825097d7c553f71d
*****abc*****
abc
1 1
*****abc:xyz*****
xyz
abc
1 1
*****abc:xyz:*****
xyz
abc
0 1
The problem lies in the meaning of first and last after your call to tokenize_and_parse. first==last checks if your string has been completely tokenized, you can't infer anything about grammar. If you isolate the parsing like this, you obtain the expected result:
PathTokens<LexerType> tokens;
PathGrammar<TokensIterator> grammar(tokens);
BaseIteratorType first = str.begin();
BaseIteratorType last = str.end();
LexerType::iterator_type lexfirst = tokens.begin(first,last);
LexerType::iterator_type lexlast = tokens.end();
bool r = parse(lexfirst, lexlast, grammar);
std::cerr << r << " " << (lexfirst==lexlast) << "\n";
This is what I finally ended up with. It uses the suggestions from both #sehe and #llonesmiz. Note the conversion to std::wstring and the use of actions in the grammar definition, which were not present in the original post.
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/bind.hpp>
#include <iostream>
#include <string>
//
// This example uses boost spirit to parse a simple
// colon-delimited grammar.
//
// The grammar we want to recognize is:
// identifier := [a-z]+
// separator = :
// path= (identifier separator path) | identifier
//
// From the boost spirit perspective this example shows
// a few things I found hard to come by when building my
// first parser.
// 1. How to flag an incomplete token at the end of input
// as an error. (use of boost::spirit::eoi)
// 2. How to bind an action on an instance of an object
// that is taken as input to the parser.
// 3. Use of std::wstring.
// 4. Use of the lexer iterator.
//
// This using directive will cause issues with boost::bind
// when referencing placeholders such as _1.
// using namespace boost::spirit;
//! A class that tokenizes our input.
template<typename Lexer>
struct Tokens : boost::spirit::lex::lexer<Lexer>
{
Tokens()
{
identifier = L"[a-z]+";
separator = L":";
this->self.add
(identifier)
(separator)
;
}
boost::spirit::lex::token_def<std::wstring, wchar_t> identifier, separator;
};
//! This class provides a callback that echoes strings to stderr.
struct Echo
{
void echo(boost::fusion::vector<std::wstring> const& t) const
{
using namespace boost::fusion;
std::wcerr << at_c<0>(t) << "\n";
}
};
//! The definition of our grammar, as described above.
template <typename Iterator>
struct Grammar : boost::spirit::qi::grammar<Iterator>
{
template <typename TokenDef>
Grammar(TokenDef const& tok, Echo const& e)
: Grammar::base_type(path)
{
using boost::spirit::_val;
path
=
((token >> tok.separator >> path)[boost::bind(&Echo::echo, e,::_1)]
|
(token)[boost::bind(&Echo::echo, &e, ::_1)]
) >> boost::spirit::eoi; // Look for end of input.
token
= (tok.identifier) [_val=boost::spirit::qi::_1]
;
}
boost::spirit::qi::rule<Iterator> path;
boost::spirit::qi::rule<Iterator, std::wstring()> token;
};
int main()
{
// A set of typedefs to make things a little clearer. This stuff is
// well described in the boost spirit documentation/examples.
typedef std::wstring::iterator BaseIteratorType;
typedef boost::spirit::lex::lexertl::token<BaseIteratorType, boost::mpl::vector<std::wstring> > TokenType;
typedef boost::spirit::lex::lexertl::lexer<TokenType> LexerType;
typedef Tokens<LexerType>::iterator_type TokensIterator;
typedef LexerType::iterator_type LexerIterator;
// Define some paths to parse.
typedef std::vector<std::wstring> Tests;
Tests paths;
paths.push_back(L"abc");
paths.push_back(L"abc:xyz");
paths.push_back(L"abc:xyz:");
paths.push_back(L":");
// Parse 'em.
for ( Tests::iterator iter = paths.begin(); iter != paths.end(); ++iter )
{
std::wstring str = *iter;
std::wcerr << L"*****" << str << L"*****\n";
Echo e;
Tokens<LexerType> tokens;
Grammar<TokensIterator> grammar(tokens, e);
BaseIteratorType first = str.begin();
BaseIteratorType last = str.end();
// Have the lexer consume our string.
LexerIterator lexFirst = tokens.begin(first, last);
LexerIterator lexLast = tokens.end();
// Have the parser consume the output of the lexer.
bool r = boost::spirit::qi::parse(lexFirst, lexLast, grammar);
// Print the status and whether or note all output of the lexer
// was processed.
std::wcerr << r << L" " << (lexFirst==lexLast) << L"\n";
}
}