Performance issue with parser written with Boost::spirit - c++

I want to parse a file that looks like this (FASTA-like text format):
>InfoHeader
"Some text sequence that has a line break after every 80 characters"
>InfoHeader
"Some text sequence that has a line break after every 80 characters"
...
e.g.:
>gi|31563518|ref|NP_852610.1| microtubule-associated proteins 1A/1B light chain 3A isoform b [Homo sapiens]
MKMRFFSSPCGKAAVDPADRCKEVQQIRDQHPSKIPVIIERYKGEKQLPVLDKTKFLVPDHVNMSELVKI
IRRRLQLNPTQAFFLLVNQHSMVSVSTPIADIYEQEKDEDGFLYMVYASQETFGFIRENE
I wrote a parser for this with boost::spirit. The parser correctly stores the header line and the following text sequence in a std::vector< std::pair< string, string >> but it takes kind of long for bigger files (17sec for a 100MB file). As comparison I wrote a program without boost::spirit (just STL functions) that simply copies each line of that 100MB file in a std::vector. The whole process takes less than a second. The "program" used for the comparison is not serving the purpose but I don't think the parser should take that much longer...
I know there are plenty of other FASTA parsers around but I'm rather curious why my code is slow.
The .hpp file:
#include <boost/filesystem/path.hpp>
namespace fs = boost::filesystem;
class FastaReader {
public:
typedef std::vector< std::pair<std::string, std::string> > fastaVector;
private:
fastaVector fV;
fs::path file;
public:
FastaReader(const fs::path & f);
~FastaReader();
const fs::path & getFile() const;
const fastaVector::const_iterator getBeginIterator() const;
const fastaVector::const_iterator getEndIterator() const;
private:
void parse();
};
And the .cpp file:
#include <iomanip>
#include <boost/date_time/posix_time/posix_time.hpp>
#include <boost/filesystem/fstream.hpp>
#include <boost/filesystem/operations.hpp>
#include <boost/filesystem/path.hpp>
#include <boost/spirit/include/classic_position_iterator.hpp>
#include <boost/spirit/include/phoenix_bind.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_fusion.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/qi.hpp>
#include "fastaReader.hpp"
using namespace std;
namespace fs = boost::filesystem;
namespace qi = boost::spirit::qi;
namespace pt = boost::posix_time;
template <typename Iterator, typename Skipper>
struct FastaGrammar : qi::grammar<Iterator, FastaReader::fastaVector(), qi::locals<string>, Skipper> {
qi::rule<Iterator> infoLineStart;
qi::rule<Iterator> inputEnd;
qi::rule<Iterator> lineEnd;
qi::rule<Iterator, string(), Skipper> infoLine;
qi::rule<Iterator, string(), Skipper> seqLine;
qi::rule<Iterator, FastaReader::fastaVector(), qi::locals<string>, Skipper> fasta;
FastaGrammar() : FastaGrammar::base_type(fasta, "fasta") {
using boost::spirit::standard::char_;
using boost::phoenix::bind;
using qi::eoi;
using qi::eol;
using qi::lexeme;
using qi::_1;
using qi::_val;
using namespace qi::labels;
infoLineStart = char_('>');
inputEnd = eoi;
/* grammar */
infoLine = lexeme[*(char_ - eol)];
seqLine = *(char_ - infoLineStart);
fasta = *(infoLineStart > infoLine[_a = _1]
> seqLine[bind(&FastaGrammar::addValue, _val, _a, _1)]
)
> inputEnd
;
infoLineStart.name(">");
infoLine.name("sequence identifier");
seqLine.name("sequence");
}
static void addValue(FastaReader::fastaVector & fa, const string & info, const string & seq) {
fa.push_back(make_pair(info, seq));
}
};
FastaReader::FastaReader(const fs::path & f) {
this->file = f;
this->parse();
}
FastaReader::~FastaReader() {}
const fs::path & FastaReader::getFile() const {
return this->file;
}
const FastaReader::fastaVector::const_iterator FastaReader::getBeginIterator() const {
return this->fV.cbegin();
}
const FastaReader::fastaVector::const_iterator FastaReader::getEndIterator() const {
return this->fV.cend();
}
void FastaReader::parse() {
if ( this->file.empty() ) throw string("FastaReader: No file specified.");
if ( ! fs::is_regular_file(this->file) ) throw (string("FastaReader: File not found: ") + this->file.string());
typedef boost::spirit::istream_iterator iterator_type;
typedef boost::spirit::classic::position_iterator2<iterator_type> pos_iterator_type;
typedef FastaGrammar<pos_iterator_type, boost::spirit::ascii::space_type> fastaGr;
fs::ifstream fin(this->file);
if ( ! fin.is_open() ) {
throw (string("FastaReader: Access denied: ") + this->file.string());
}
fin.unsetf(ios::skipws);
iterator_type begin(fin);
iterator_type end;
pos_iterator_type pos_begin(begin, end, this->file.string());
pos_iterator_type pos_end;
fastaGr fG;
try {
std::cerr << "Measuring: Parsing." << std::endl;
const pt::ptime startMeasurement = pt::microsec_clock::universal_time();
qi::phrase_parse(pos_begin, pos_end, fG, boost::spirit::ascii::space, this->fV);
const pt::ptime endMeasurement = pt::microsec_clock::universal_time();
pt::time_duration duration (endMeasurement - startMeasurement);
std::cerr << duration << std::endl;
} catch (std::string str) {
cerr << "error message: " << str << endl;
}
}
So the grammar does the folloing:
It looks for a ">" sign and then stores all following characters until an EOL is detected. After the EOL the text sequence starts and ends when a ">" sign is detected. Both strings (header line and text sequence) are then stored in a std::vector by calling FastaReader::addValue().
I compiled my program using g++ version 4.8.2 with -O2 and -std=c++11 flags.
So where is the the performance issue in my code?

Previous: Step 3: MOAR FASTER WITH ZERO-COPY Return to Step 1. Cleaning up + Profiling
Step 4: Dropping the position iterator
Since you're not using it, we can drop the stateful iterator, which is likely to inhibit quite a lot of optimizations (and was indirectly visible in the profiler output)
Live On Coliru
#define BOOST_SPIRIT_USE_PHOENIX_V3
#include <boost/filesystem/path.hpp>
#include <boost/utility/string_ref.hpp>
#include <boost/iostreams/device/mapped_file.hpp>
namespace io = boost::iostreams;
namespace fs = boost::filesystem;
class FastaReader {
public:
typedef std::pair<boost::string_ref, boost::string_ref> Entry;
typedef std::vector<Entry> Data;
private:
Data fV;
fs::path file;
public:
FastaReader(const fs::path & f);
~FastaReader();
const fs::path & getFile() const;
const Data::const_iterator begin() const;
const Data::const_iterator end() const;
private:
io::mapped_file_source mmap;
void parse();
};
#include <iomanip>
#include <boost/date_time/posix_time/posix_time.hpp>
#include <boost/filesystem/fstream.hpp>
#include <boost/filesystem/operations.hpp>
#include <boost/filesystem/path.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/adapted/std_pair.hpp>
//#include "fastaReader.hpp"
#include <boost/iostreams/device/mapped_file.hpp>
using namespace std;
namespace fs = boost::filesystem;
namespace qi = boost::spirit::qi;
namespace pt = boost::posix_time;
namespace io = boost::iostreams;
namespace boost { namespace spirit { namespace traits {
template <typename It>
struct assign_to_attribute_from_iterators<boost::string_ref, It, void> {
static void call(It f, It l, boost::string_ref& attr) { attr = boost::string_ref { f, size_t(std::distance(f,l)) }; }
};
} } }
template <typename Iterator>
struct FastaGrammar : qi::grammar<Iterator, FastaReader::Data()> {
FastaGrammar() : FastaGrammar::base_type(fasta) {
using namespace qi;
using boost::phoenix::construct;
using boost::phoenix::begin;
using boost::phoenix::size;
entry = ('>' >> raw[ *~char_('\n') ] >> '\n' >> raw[ *~char_('>') ]);
fasta = *entry >> *eol >> eoi ;
BOOST_SPIRIT_DEBUG_NODES((fasta)(entry));
}
private:
qi::rule<Iterator, FastaReader::Data()> fasta;
qi::rule<Iterator, FastaReader::Entry()> entry;
};
FastaReader::FastaReader(const fs::path & f) : file(f), mmap(file.c_str()) {
parse();
}
FastaReader::~FastaReader() {}
const fs::path & FastaReader::getFile() const {
return this->file;
}
const FastaReader::Data::const_iterator FastaReader::begin() const {
return this->fV.cbegin();
}
const FastaReader::Data::const_iterator FastaReader::end() const {
return this->fV.cend();
}
void FastaReader::parse() {
if (this->file.empty()) throw std::runtime_error("FastaReader: No file specified.");
if (! fs::is_regular_file(this->file)) throw std::runtime_error(string("FastaReader: File not found: ") + this->file.string());
typedef char const* iterator_type;
typedef FastaGrammar<iterator_type> fastaGr;
static const fastaGr fG{};
try {
std::cerr << "Measuring: Parsing." << std::endl;
const pt::ptime startMeasurement = pt::microsec_clock::universal_time();
iterator_type first(mmap.data()), last(mmap.end());
qi::phrase_parse(first, last, fG, boost::spirit::ascii::space, this->fV);
const pt::ptime endMeasurement = pt::microsec_clock::universal_time();
pt::time_duration duration (endMeasurement - startMeasurement);
std::cerr << duration << std::endl;
} catch (std::exception const& e) {
cerr << "error message: " << e.what() << endl;
}
}
int main() {
FastaReader reader("input.txt");
for (auto& e : reader) std::cout << '>' << e.first << '\n' << e.second << "\n\n";
}
Now it's 74.8x faster.
$ time ./test | head -n4
Measuring: Parsing.
00:00:00.194432

Next: Step 2. Faster with mmap
Step 1. Cleaning up + Profiling
You should avoid the many rules they introduce type erasure.
If you input is sane, you can do without the skipper (anyways, line ends were significant, so it made no sense to skip them).
Use fusion adaptation instead of a helper to construct new pairs:
This is not optimal, yet, but a lot cleaner:
$ ./test1
Measuring: Parsing.
00:00:22.681605
Slightly more efficient by reducing moving parts and indirections:
Live On Coliru
#include <boost/filesystem/path.hpp>
namespace fs = boost::filesystem;
class FastaReader {
public:
typedef std::pair<std::string, std::string> Entry;
typedef std::vector<Entry> Data;
private:
Data fV;
fs::path file;
public:
FastaReader(const fs::path & f);
~FastaReader();
const fs::path & getFile() const;
const Data::const_iterator begin() const;
const Data::const_iterator end() const;
private:
void parse();
};
#include <iomanip>
#include <boost/date_time/posix_time/posix_time.hpp>
#include <boost/filesystem/fstream.hpp>
#include <boost/filesystem/operations.hpp>
#include <boost/filesystem/path.hpp>
#include <boost/spirit/include/classic_position_iterator.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/adapted/std_pair.hpp>
//#include "fastaReader.hpp"
using namespace std;
namespace fs = boost::filesystem;
namespace qi = boost::spirit::qi;
namespace pt = boost::posix_time;
template <typename Iterator>
struct FastaGrammar : qi::grammar<Iterator, FastaReader::Data()> {
qi::rule<Iterator, FastaReader::Data()> fasta;
FastaGrammar() : FastaGrammar::base_type(fasta) {
using namespace qi;
fasta = *('>' >> *~char_('\n') >> '\n'
>> *~char_('>'))
>> *eol
>> eoi
;
BOOST_SPIRIT_DEBUG_NODES((fasta));
}
};
FastaReader::FastaReader(const fs::path & f) : file(f) {
parse();
}
FastaReader::~FastaReader() {}
const fs::path & FastaReader::getFile() const {
return this->file;
}
const FastaReader::Data::const_iterator FastaReader::begin() const {
return this->fV.cbegin();
}
const FastaReader::Data::const_iterator FastaReader::end() const {
return this->fV.cend();
}
void FastaReader::parse() {
if (this->file.empty()) throw std::runtime_error("FastaReader: No file specified.");
if (! fs::is_regular_file(this->file)) throw std::runtime_error(string("FastaReader: File not found: ") + this->file.string());
typedef boost::spirit::istream_iterator iterator_type;
typedef boost::spirit::classic::position_iterator2<iterator_type> pos_iterator_type;
typedef FastaGrammar<pos_iterator_type> fastaGr;
fs::ifstream fin(this->file);
if (!fin) {
throw std::runtime_error(string("FastaReader: Access denied: ") + this->file.string());
}
static const fastaGr fG{};
try {
std::cerr << "Measuring: Parsing." << std::endl;
const pt::ptime startMeasurement = pt::microsec_clock::universal_time();
pos_iterator_type first(iterator_type{fin >> std::noskipws}, {}, file.string());
qi::phrase_parse<pos_iterator_type>(first, {}, fG, boost::spirit::ascii::space, this->fV);
const pt::ptime endMeasurement = pt::microsec_clock::universal_time();
pt::time_duration duration (endMeasurement - startMeasurement);
std::cerr << duration << std::endl;
} catch (std::exception const& e) {
cerr << "error message: " << e.what() << endl;
}
}
int main() {
std::ios::sync_with_stdio(false);
FastaReader reader("input.txt");
//for (auto& e : reader) std::cout << '>' << e.first << '\n' << e.second << "\n\n";
}
This is still slow. Let's see what takes so long:
That's pretty, but hardly tells us what we need to know. This however does: top-N time consumers are
So most time is spent in istream iteration and the multi-pass adaptor. You could argue that the multipass adaptor could be optimized for by flushing it once in a while (each line?) but really, we would prefer not to be tied to the whole stream and operator on the (stream) buffer instead.
So, I though let's use a mapped file instead:
Next: Step 2. Faster with mmap

Previous: Step 2. Faster with mmap
Next: Step 4: Dropping the position iterator
Step 3: MOAR FASTER WITH ZERO-COPY
Let's avoid allocations! If we move the file mapping into the FastaReader class, we can directly point to data in the mapping instead of copying strings all the time.
Using boost::string_ref as e.g. described here: C++: Fast way to read mapped file into a matrix you can do
Live On Coliru
#define BOOST_SPIRIT_USE_PHOENIX_V3
#include <boost/filesystem/path.hpp>
#include <boost/utility/string_ref.hpp>
#include <boost/iostreams/device/mapped_file.hpp>
namespace io = boost::iostreams;
namespace fs = boost::filesystem;
class FastaReader {
public:
typedef std::pair<boost::string_ref, boost::string_ref> Entry;
typedef std::vector<Entry> Data;
private:
Data fV;
fs::path file;
public:
FastaReader(const fs::path & f);
~FastaReader();
const fs::path & getFile() const;
const Data::const_iterator begin() const;
const Data::const_iterator end() const;
private:
io::mapped_file_source mmap;
void parse();
};
#include <iomanip>
#include <boost/date_time/posix_time/posix_time.hpp>
#include <boost/filesystem/fstream.hpp>
#include <boost/filesystem/operations.hpp>
#include <boost/filesystem/path.hpp>
#include <boost/spirit/include/classic_position_iterator.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/adapted/std_pair.hpp>
//#include "fastaReader.hpp"
#include <boost/iostreams/device/mapped_file.hpp>
using namespace std;
namespace fs = boost::filesystem;
namespace qi = boost::spirit::qi;
namespace pt = boost::posix_time;
namespace io = boost::iostreams;
namespace boost { namespace spirit { namespace traits {
template <typename It>
struct assign_to_attribute_from_iterators<boost::string_ref, It, void> {
static void call(It f, It l, boost::string_ref& attr) { attr = boost::string_ref { f.base(), size_t(std::distance(f.base(),l.base())) }; }
};
} } }
template <typename Iterator>
struct FastaGrammar : qi::grammar<Iterator, FastaReader::Data()> {
FastaGrammar() : FastaGrammar::base_type(fasta) {
using namespace qi;
using boost::phoenix::construct;
using boost::phoenix::begin;
using boost::phoenix::size;
entry = ('>' >> raw[ *~char_('\n') ] >> '\n' >> raw[ *~char_('>') ]);
fasta = *entry >> *eol >> eoi ;
BOOST_SPIRIT_DEBUG_NODES((fasta)(entry));
}
private:
qi::rule<Iterator, FastaReader::Data()> fasta;
qi::rule<Iterator, FastaReader::Entry()> entry;
};
FastaReader::FastaReader(const fs::path & f) : file(f), mmap(file.c_str()) {
parse();
}
FastaReader::~FastaReader() {}
const fs::path & FastaReader::getFile() const {
return this->file;
}
const FastaReader::Data::const_iterator FastaReader::begin() const {
return this->fV.cbegin();
}
const FastaReader::Data::const_iterator FastaReader::end() const {
return this->fV.cend();
}
void FastaReader::parse() {
if (this->file.empty()) throw std::runtime_error("FastaReader: No file specified.");
if (! fs::is_regular_file(this->file)) throw std::runtime_error(string("FastaReader: File not found: ") + this->file.string());
typedef char const* iterator_type;
typedef boost::spirit::classic::position_iterator2<iterator_type> pos_iterator_type;
typedef FastaGrammar<pos_iterator_type> fastaGr;
static const fastaGr fG{};
try {
std::cerr << "Measuring: Parsing." << std::endl;
const pt::ptime startMeasurement = pt::microsec_clock::universal_time();
pos_iterator_type first(iterator_type{mmap.data()}, iterator_type{mmap.end()}, file.string());
qi::phrase_parse<pos_iterator_type>(first, {}, fG, boost::spirit::ascii::space, this->fV);
const pt::ptime endMeasurement = pt::microsec_clock::universal_time();
pt::time_duration duration (endMeasurement - startMeasurement);
std::cerr << duration << std::endl;
} catch (std::exception const& e) {
cerr << "error message: " << e.what() << endl;
}
}
int main() {
FastaReader reader("input.txt");
for (auto& e : reader) std::cout << '>' << e.first << '\n' << e.second << "\n\n";
}
This is indeed already 4.8x faster:
$ ./test3 | head -n4
Measuring: Parsing.
00:00:04.577123
>gi|31563518|ref|NP_852610.1| microtubule-associated proteins 1A/1B light chain 3A isoform b [Homo sapiens]
MKMRFFSSPCGKAAVDPADRCKEVQQIRDQHPSKIPVIIERYKGEKQLPVLDKTKFLVPDHVNMSELVKI
IRRRLQLNPTQAFFLLVNQHSMVSVSTPIADIYEQEKDEDGFLYMVYASQETFGFIRENE
Next: Step 4: Dropping the position iterator

Previous: Step 1. Cleaning up + Profiling
Next: Step 3: MOAR FASTER WITH ZERO-COPY
Step 2. Faster with mmap
Live On Coliru
#include <boost/filesystem/path.hpp>
namespace fs = boost::filesystem;
class FastaReader {
public:
typedef std::pair<std::string, std::string> Entry;
typedef std::vector<Entry> Data;
private:
Data fV;
fs::path file;
public:
FastaReader(const fs::path & f);
~FastaReader();
const fs::path & getFile() const;
const Data::const_iterator begin() const;
const Data::const_iterator end() const;
private:
void parse();
};
#include <iomanip>
#include <boost/date_time/posix_time/posix_time.hpp>
#include <boost/filesystem/fstream.hpp>
#include <boost/filesystem/operations.hpp>
#include <boost/filesystem/path.hpp>
#include <boost/spirit/include/classic_position_iterator.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/adapted/std_pair.hpp>
//#include "fastaReader.hpp"
#include <boost/iostreams/device/mapped_file.hpp>
using namespace std;
namespace fs = boost::filesystem;
namespace qi = boost::spirit::qi;
namespace pt = boost::posix_time;
namespace io = boost::iostreams;
template <typename Iterator>
struct FastaGrammar : qi::grammar<Iterator, FastaReader::Data()> {
qi::rule<Iterator, FastaReader::Data()> fasta;
FastaGrammar() : FastaGrammar::base_type(fasta) {
using namespace qi;
fasta = *('>' >> *~char_('\n') >> '\n'
>> *~char_('>'))
>> *eol
>> eoi
;
BOOST_SPIRIT_DEBUG_NODES((fasta));
}
};
FastaReader::FastaReader(const fs::path & f) : file(f) {
parse();
}
FastaReader::~FastaReader() {}
const fs::path & FastaReader::getFile() const {
return this->file;
}
const FastaReader::Data::const_iterator FastaReader::begin() const {
return this->fV.cbegin();
}
const FastaReader::Data::const_iterator FastaReader::end() const {
return this->fV.cend();
}
void FastaReader::parse() {
if (this->file.empty()) throw std::runtime_error("FastaReader: No file specified.");
if (! fs::is_regular_file(this->file)) throw std::runtime_error(string("FastaReader: File not found: ") + this->file.string());
typedef char const* iterator_type;
typedef boost::spirit::classic::position_iterator2<iterator_type> pos_iterator_type;
typedef FastaGrammar<pos_iterator_type> fastaGr;
io::mapped_file_source mmap(file.c_str());
static const fastaGr fG{};
try {
std::cerr << "Measuring: Parsing." << std::endl;
const pt::ptime startMeasurement = pt::microsec_clock::universal_time();
pos_iterator_type first(iterator_type{mmap.data()}, iterator_type{mmap.end()}, file.string());
qi::phrase_parse<pos_iterator_type>(first, {}, fG, boost::spirit::ascii::space, this->fV);
const pt::ptime endMeasurement = pt::microsec_clock::universal_time();
pt::time_duration duration (endMeasurement - startMeasurement);
std::cerr << duration << std::endl;
} catch (std::exception const& e) {
cerr << "error message: " << e.what() << endl;
}
}
int main() {
FastaReader reader("input.txt");
//for (auto& e : reader) std::cout << '>' << e.first << '\n' << e.second << "\n\n";
}
Indeed on my system it's roughly 3x faster (input is 229 MiB):
$ ./mapped_file_source
Measuring: Parsing.
00:00:07.385787
Next: Step 3: MOAR FASTER WITH ZERO-COPY

Related

Need help using member functions as action with Boost Spirit QI

I'm having trouble getting member functions to bind inside grammar definitions. Compile errors result.
In short:
struct my_functor_word
{
// This code
void print ( std::string const& s, qi::unused_type, qi::unused_type ) const
// Gives the compiler error seen below.
// This code works fine:
// void operator()( std::string const& s, qi::unused_type, qi::unused_type ) const
{
std::cout << "word:" << s << std::endl;
}
};
template <typename Iterator>
struct bd_parse_grammar : qi::grammar<Iterator>
{
template <typename TokenDef> bd_parse_grammar( TokenDef const& tok )
: bd_parse_grammar::base_type( start )
{
my_functor_word mfw;
start = *(
// This code
tok.word [boost::bind(&my_functor_word::print, &mfw, qi::_1)]
// gives:
// {aka void (my_functor_word::*)(const std::basic_string<char>&, boost::spirit::unused_type, boost::spirit::unused_type) const}' is not a class, struct, or union type
// function_apply;
// ^~~~~~~~~~~~~~
///usr/include/boost/spirit/home/phoenix/core/detail/function_eval.hpp:126:13: error: 'boost::remove_reference<void (my_functor_word::*)(const std::basic_string<char>&, boost::spirit::unused_type, boost::spirit::unused_type) const>::type {aka void (my_functor_word::*)(const std::basic_string<char>&, boost::spirit::unused_type, boost::spirit::unused_type) const}' is not a class, struct, or union type
// type;
// ^~~~
// This:
// tok.word [boost::bind(&my_functor_word::print, &mfw, qi::_1)]
// similarly gives:
// /usr/include/boost/bind/bind.hpp:69:37: error: 'void (my_functor_word::*)(const std::basic_string<char>&, boost::spirit::unused_type, boost::spirit::unused_type) const' is not a class, struct, or union type
// typedef typename F::result_type type;
// This works OK:
// tok.word [my_functor_word()]
) ;
}
qi::rule<Iterator> start;
};
Here's the whole program. It compiles and functions correctly with functors but not member functions:
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/lambda/lambda.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_statement.hpp>
#include <boost/spirit/include/phoenix_container.hpp>
#include <boost/bind.hpp>
#include <iostream>
#include <string>
using namespace boost::spirit;
using namespace boost::spirit::ascii;
template <typename Lexer>
struct bd_parse_tokens : lex::lexer<Lexer>
{
bd_parse_tokens()
{
// define patterns (lexer macros) to be used during token definition
this->self.add_pattern( "WORD", "[a-zA-Z._]+" );
// define tokens and associate them with the lexer
word = "{WORD}"; // reference the pattern 'WORD' as defined above
this->self.add ( word );
}
// the token 'word' exposes the matched string as its parser attribute
lex::token_def<std::string> word;
};
///////////////////////////////////////////////////////////////////////////////
// Grammar definition
///////////////////////////////////////////////////////////////////////////////
struct my_functor_word
{
// This code
void print ( std::string const& s, qi::unused_type, qi::unused_type ) const
// Gives the compiler error seen below.
// This code works fine:
// void operator()( std::string const& s, qi::unused_type, qi::unused_type ) const
{
std::cout << "word:" << s << std::endl;
}
};
template <typename Iterator>
struct bd_parse_grammar : qi::grammar<Iterator>
{
template <typename TokenDef> bd_parse_grammar( TokenDef const& tok )
: bd_parse_grammar::base_type( start )
{
my_functor_word mfw;
start = *(
// This code
tok.word [boost::bind(&my_functor_word::print, &mfw, qi::_1)]
// gives:
// {aka void (my_functor_word::*)(const std::basic_string<char>&, boost::spirit::unused_type, boost::spirit::unused_type) const}' is not a class, struct, or union type
// function_apply;
// ^~~~~~~~~~~~~~
///usr/include/boost/spirit/home/phoenix/core/detail/function_eval.hpp:126:13: error: 'boost::remove_reference<void (my_functor_word::*)(const std::basic_string<char>&, boost::spirit::unused_type, boost::spirit::unused_type) const>::type {aka void (my_functor_word::*)(const std::basic_string<char>&, boost::spirit::unused_type, boost::spirit::unused_type) const}' is not a class, struct, or union type
// type;
// ^~~~
// This:
// tok.word [boost::bind(&my_functor_word::print, &mfw, qi::_1)]
// similarly gives:
// /usr/include/boost/bind/bind.hpp:69:37: error: 'void (my_functor_word::*)(const std::basic_string<char>&, boost::spirit::unused_type, boost::spirit::unused_type) const' is not a class, struct, or union type
// typedef typename F::result_type type;
// This works OK:
// tok.word [my_functor_word()]
) ;
}
qi::rule<Iterator> start;
};
///////////////////////////////////////////////////////////////////////////////
int main( int argc, char* argv[] )
{
// Define the token type to be used: `std::string` is available as the
// type of the token attribute
typedef lex::lexertl::token < char const*, boost::mpl::vector<std::string> > token_type;
// Define the lexer type to use implementing the state machine
typedef lex::lexertl::lexer<token_type> lexer_type;
// Define the iterator type exposed by the lexer type */
typedef bd_parse_tokens<lexer_type>::iterator_type iterator_type;
// now we use the types defined above to create the lexer and grammar
// object instances needed to invoke the parsing process
bd_parse_tokens<lexer_type> bd_parse; // Our lexer
bd_parse_grammar<iterator_type> g( bd_parse ); // Our parser
// read in the file int memory
std::string str( argv[1] );
char const* first = str.c_str();
char const* last = &first[str.size()];
bool r = lex::tokenize_and_parse( first, last, bd_parse, g );
if ( ! r )
{
std::string rest( first, last );
std::cerr << "Parsing failed\n" << "stopped at: \""
<< rest << "\"\n";
}
return 0;
}
There are many aspects to this puzzle.
Firstly, to bind a member function you have to pass the extra leading instance parameter (the this object).
Secondly, semantic actions are Phoenix Actors, so deferred functors. boost::bind are likely not what you wantL you cannot call my_functor_word::print with qi::_1_type anyways. Instead you might use phoenix::bind, in which case you don't even have to deal with the "magic" context parameters:
struct my_functor_word {
void print(std::string const& s) const {
std::cout << "word:" << s << std::endl;
}
};
And
start = *(tok.word[ //
boost::phoenix::bind(&my_functor_word::print, &mfw, qi::_1)]);
That Was It?
I won't let you go without some more observations.
Firstly, the bind is still broken! You bound mfw as the this instance, but mfw is a local variable. The nature of the semantic action (being a defferred actor as said above) is that it will be called during parse: long after the constructor is finished. The mfw needs to be a member variable. Or better, not be a part of the grammar at all. I'd suggest
///////////////////////////////////////////////////////////////////////////////
// Grammar definition
///////////////////////////////////////////////////////////////////////////////
template <typename Iterator>
struct bd_parse_grammar : qi::grammar<Iterator>
{
template <typename TokenDef>
bd_parse_grammar(TokenDef const& tok) : bd_parse_grammar::base_type(start)
{
using namespace qi::labels;
start = *tok.word[px::bind(&my_functor_word::print, &mfw, _1)];
}
private:
struct my_functor_word {
void print(std::string const& s)
{
std::cout << "word:" << s << std::endl;
}
};
mutable my_functor_word mfw;
qi::rule<Iterator> start;
};
///////////////////////////////////////////////////////////////////////////////
I see a lot of old-fashioned and questionable style. E.g. why are you including boost/bind.hpp (or even boost/bind/bind.hpp) and even boost/lambda.hpp?
Why are you using Lex?
You're using using namespace liberally, which is a bad idea, especially when mixing all these libraries (that literally all have their own idea of placeholders named 1 etc). Instead just make some aliases:
namespace qi = boost::spirit::qi;
namespace lex = boost::spirit::lex;
namespace px = boost::phoenix;
You have a comment
// read in the file int memory
That doesn't match the code given:
std::string str(argv[1]);
char const* first = str.c_str();
char const* last = &first[str.size()];
That is just weird all around. Why are you using raw char* with a string (it has proper iterators with begin() and end()?). Also, maybe you really wanted to read a file?
By the way, let's make sure argv[1] is actually valid:
for (std::string fname : std::vector(argv + 1, argv + argc)) {
std::ifstream ifs(fname);
// read in the file into memory
std::string const str(std::istreambuf_iterator<char>(ifs), {});
auto first = str.begin(), //
last = str.end();
So here's a demo Live On Coliru
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/include/qi.hpp>
#include <fstream>
#include <iomanip>
namespace qi = boost::spirit::qi;
namespace lex = boost::spirit::lex;
namespace px = boost::phoenix;
template <typename Lexer> struct bd_parse_tokens : lex::lexer<Lexer> {
bd_parse_tokens() {
this->self.add_pattern("WORD", "[a-zA-Z._]+");
word = "{WORD}";
this->self.add(word);
}
// the token 'word' exposes the matched string as its parser attribute
lex::token_def<std::string> word;
};
///////////////////////////////////////////////////////////////////////////////
// Grammar definition
///////////////////////////////////////////////////////////////////////////////
template <typename Iterator>
struct bd_parse_grammar : qi::grammar<Iterator>
{
template <typename TokenDef>
bd_parse_grammar(TokenDef const& tok) : bd_parse_grammar::base_type(start) {
using namespace qi::labels;
start = *tok.word[px::bind(&my_functor_word::print, &mfw, _1)];
}
private:
struct my_functor_word {
void print(std::string const& s) const { std::cout << "word:" << s << std::endl; }
};
mutable my_functor_word mfw;
qi::rule<Iterator> start;
};
///////////////////////////////////////////////////////////////////////////////
int main(int argc, char* argv[])
{
// type of the token attribute
using token_type = lex::lexertl::token<std::string::const_iterator,
boost::mpl::vector<std::string>>;
using lexer_type = lex::lexertl::/*actor_*/lexer<token_type>;
using iterator_type = bd_parse_tokens<lexer_type>::iterator_type;
bd_parse_tokens<lexer_type> bd_parse;
bd_parse_grammar<iterator_type> g(bd_parse);
for (std::string fname : std::vector(argv + 1, argv + argc)) {
std::ifstream ifs(fname);
// read in the file into memory
std::string const str(std::istreambuf_iterator<char>(ifs), {});
auto first = str.begin();
auto last = str.end();
bool ok = lex::tokenize_and_parse(first, last, bd_parse, g);
std::cerr << "Parsing " << fname << " (length " << str.length() << ") "
<< (ok ? "succeeded" : "failed") << "\n";
if (first != last)
std::cerr << "Stopped at #" << std::distance(str.begin(), first)
<< "\n";
}
}
Prints
word:includeboostspiritincludelex_lexertl.hppincludeboostspiritincludephoenix.hppincludeboostspiritincludeqi.hppincludefstreamincludeiomanipnamespaceqiboostspiritqinamespacelexboostspiritlexnamespacepxboostphoenixtemplatetypenameLexerstructbd_parse_tokenslexlexerLexerbd_parse_tokensthisself.add_patternWORDazAZ._wordWORDthisself.addwordthetokenwordexposesthematchedstringasitsparserattributelextoken_defstdstringwordGrammardefinitiontemplatetypenameIteratorstructbd_parse_grammarqigrammarIteratortemplatetypenameTokenDefbd_parse_grammarTokenDefconsttokbd_parse_grammarbase_typestartusingnamespaceqilabelsstarttok.wordpxbindmy_functor_wordprintmfw_privatestructmy_functor_wordvoidprintstdstringconstsconststdcoutwordsstdendlmutablemy_functor_wordmfwqiruleIteratorstartintmainintargccharargvtypeofthetokenattributeusingtoken_typelexlexertltokenstdstringconst_iteratorboostmplvectorstdstringusinglexer_typelexlexertlactor_lexertoken_typeusingiterator_typebd_parse_tokenslexer_typeiterator_typebd_parse_tokenslexer_typebd_parsebd_parse_grammariterator_typegbd_parseforstdstringfnamestdvectorargvargvargcstdifstreamifsfnamereadinthefileintomemorystdstringconststrstdistreambuf_iteratorcharifsautofirststr.beginautolaststr.endbooloklextokenize_and_parsefirstlastbd_parsegstdcerrParsingfnamelengthstr.lengthoksucceededfailedniffirstlaststdcerrStoppedatstddistancestr.beginfirstn
Parsing input.txt (length 1367) succeeded
Parsing main.cpp (length 2479) succeeded
Stopped at #0
Without Lex
I think this would be strictly simpler:
Live On Coliru
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/include/qi.hpp>
#include <fstream>
#include <iomanip>
namespace qi = boost::spirit::qi;
namespace px = boost::phoenix;
template <typename Iterator>
struct bd_parse_grammar : qi::grammar<Iterator> {
bd_parse_grammar() : bd_parse_grammar::base_type(start)
{
using namespace qi::labels;
word = +qi::char_("a-zA-Z_.");
start = *word[px::bind(&my_functor_word::print, &mfw, _1)];
}
private:
struct my_functor_word {
void print(std::string const& s) const { std::cout << "word:" << s << std::endl; }
};
mutable my_functor_word mfw;
qi::rule<Iterator> start;
qi::rule<Iterator, std::string()> word;
};
int main(int argc, char* argv[]) {
bd_parse_grammar<std::string::const_iterator> const g;
for (std::string fname : std::vector(argv + 1, argv + argc)) {
std::ifstream ifs(fname);
// read the file into memory
std::string const str(std::istreambuf_iterator<char>(ifs), {});
auto first = str.begin();
auto last = str.end();
bool ok = qi::parse(first, last, g);
std::cerr << "Parsing " << fname << " (length " << str.length() << ") "
<< (ok ? "succeeded" : "failed") << "\n";
if (first != last)
std::cerr << "Stopped at #" << std::distance(str.begin(), first)
<< "\n";
}
}
Same output
Without Phoenix Bind
Using some C++17 CTAD and phoenix::function:
Live On Coliru
template <typename Iterator> struct Parser : qi::grammar<Iterator> {
Parser() : Parser::base_type(start) {
px::function print{[](std::string const& s) {
std::cout << "word:" << s << std::endl;
}};
word = +qi::char_("a-zA-Z_.");
start = *word[print(qi::_1)];
}
qi::rule<Iterator> start;
qi::rule<Iterator, std::string()> word;
};
Only half the original code.
Using X3
If you're using C++14 anyways, consider slashing compile times:
Live On Coliru
#include <boost/spirit/home/x3.hpp>
#include <iostream>
#include <fstream>
namespace x3 = boost::spirit::x3;
namespace Parser {
auto print = [](auto& ctx) {
std::cout << "word:" << _attr(ctx) << std::endl;
};
auto word = +x3::char_("a-zA-Z_.");
auto start = *word[print];
} // namespace Parser
int main(int argc, char* argv[]) {
for (std::string fname : std::vector(argv + 1, argv + argc)) {
std::ifstream ifs(fname);
// read the file into memory
std::string const str(std::istreambuf_iterator<char>(ifs), {});
auto first = str.begin();
auto last = str.end();
bool ok = x3::parse(first, last, Parser::start);
std::cerr << "Parsing " << fname << " (length " << str.length() << ") "
<< (ok ? "succeeded" : "failed") << "\n";
if (first != last)
std::cerr << "Stopped at #" << std::distance(str.begin(), first) << "\n";
}
}
Still the same output.

Spirit X3 composed attributes

I am trying to compose spirit rules but I cannot figure out what the attribute of this new rule would be.
The following code is working as I would expect it.
#include <iostream>
#include <boost/spirit/home/x3.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>
#include <boost/fusion/tuple.hpp>
namespace ast{
struct Record{
int id;
std::string name;
};
struct Document{
Record rec;
Record rec2;
//std::vector<Record> rec;
std::string name;
};
using boost::fusion::operator<<;
}
BOOST_FUSION_ADAPT_STRUCT(ast::Record,
name, id
)
BOOST_FUSION_ADAPT_STRUCT(ast::Document,
rec, rec2,
//rec,
name
)
namespace parser{
namespace x3 = boost::spirit::x3;
namespace ascii = boost::spirit::x3::ascii;
using x3::lit;
using x3::int_;
using ascii::char_;
const auto identifier = +char_("a-z");
const x3::rule<class record, ast::Record> record = "record";
const auto record_def = lit("record") >> identifier >> lit("{") >> int_ >> lit("}");
const x3::rule<class document, ast::Document> document = "document";
const auto document_def =
record >> record
//+record // This should generate a sequence
>> identifier
;
BOOST_SPIRIT_DEFINE(document, record);
}
namespace{
constexpr char g_input[] = R"input(
record foo{42}
record bar{73}
foobar
)input";
}
int main(){
using boost::spirit::x3::ascii::space;
std::string str = g_input;
ast::Document unit;
bool r = phrase_parse(str.begin(), str.end(), parser::document, space, unit);
std::cout << "Got: " << unit << "\n";
return 0;
}
But when I change the rule to parse multiple records(instead of exactly 2) I would expect it to have a std::vector<Record> as an attribute. But all I get is a long compiler error that does not help me very much.
Can someone point me to what I am doing wrong in order to compose the attributes correctly?
I think the whole reason it didn't compile is because you tried to print the result... and std::vector<Record> doesn't know how to be streamed:
namespace ast {
using boost::fusion::operator<<;
static inline std::ostream& operator<<(std::ostream& os, std::vector<Record> const& rs) {
os << "{ ";
for (auto& r : rs) os << r << " ";
return os << "}";
}
}
Some more notes:
adding lexemes where absolutely required (!)
simplifying (no need to BOOST_SPIRIT_DEFINE unless recursive rules/separate TUs)
dropping redundant lit
I arrived at
Live On Coliru
#include <iostream>
#include <boost/spirit/home/x3.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>
namespace ast {
struct Record{
int id;
std::string name;
};
struct Document{
std::vector<Record> rec;
std::string name;
};
}
BOOST_FUSION_ADAPT_STRUCT(ast::Record, name, id)
BOOST_FUSION_ADAPT_STRUCT(ast::Document, rec, name)
namespace ast {
using boost::fusion::operator<<;
static inline std::ostream& operator<<(std::ostream& os, std::vector<Record> const& rs) {
os << "{ ";
for (auto& r : rs) os << r << " ";
return os << "}";
}
}
namespace parser {
namespace x3 = boost::spirit::x3;
namespace ascii = x3::ascii;
const auto identifier = x3::lexeme[+x3::char_("a-z")];
const auto record = x3::rule<class record, ast::Record> {"record"}
= x3::lexeme["record"] >> identifier >> "{" >> x3::int_ >> "}";
const auto document = x3::rule<class document, ast::Document> {"document"}
= +record
>> identifier
;
}
int main(){
std::string const str = "record foo{42} record bar{73} foobar";
auto f = str.begin(), l = str.end();
ast::Document unit;
if (phrase_parse(f, l, parser::document, parser::ascii::space, unit)) {
std::cout << "Got: " << unit << "\n";
} else {
std::cout << "Parse failed\n";
}
if (f != l) {
std::cout << "Remaining unparsed input: '" << std::string(f,l) << "'\n";
}
}
Prints
Got: ({ (foo 42) (bar 73) } foobar)

reuse parsed variable with boost karma

I have a code base which is quite equivalent to the code below.
I try to generate a text file with two times the content of a variable.
I feel that the answer is in semantic actions and _a and _val but cannot manage to get through even with the documentation.
How will you do to have :
"toto" in str
and output :
toto some stuff toto
i.e how to reuse a parsed variable in karma ?
struct data
{
std::string str;
};
BOOST_FUSION_ADAPT_STRUCT(
data,
(std::string, str)
)
template <typename Iterator>
struct data: karma::grammar<Iterator, data() >
{
data():data::base_type(start)
{
start = karma::string << karma::lit("some stuff") << karma::string; //Second string is in fact the first one
}
karma::rule<Iterator, data()> start;
};
Solution (according to posts below :)
#include <iostream>
#include <string>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/spirit/include/karma.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_fusion.hpp>
#include <boost/spirit/include/phoenix_stl.hpp>
#include <boost/spirit/include/phoenix_object.hpp>
#include <boost/spirit/include/support_istream_iterator.hpp>
#include <boost/spirit/include/support_iso8859_1.hpp>
namespace ast
{
struct data
{
std::string str;
};
}
BOOST_FUSION_ADAPT_STRUCT(
ast::data,
(std::string, str)
)
namespace karma = boost::spirit::karma;
namespace parser
{
template <typename Iterator>
struct data: karma::grammar<Iterator, ast::data() >
{
data():data::base_type(start)
{
start =
karma::string[karma::_1 = boost::phoenix::at_c<0>(karma::_val)] <<
karma::lit("some stuff") <<
karma::string[karma::_1 = boost::phoenix::at_c<0>(karma::_val)]
;
}
karma::rule<Iterator, ast::data()> start;
};
}
main()
{
ast::data d;
d.str = "toto";
std::string generated;
typedef std::back_insert_iterator<std::string> iterator_type;
parser::data<iterator_type> d_p;
iterator_type sink(generated);
karma::generate(sink, d_p, d);
std::cout << generated << std::endl;
}
This should do the trick:
start = karma::string[karma::_1 = karma::_val]
<< karma::lit("some stuff")
<< karma::string[karma::_1 = karma::_val];

How can I extract std::string object via boost spirit

I have the following code:
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>
#include <boost/spirit/include/qi.hpp>
#include <iostream>
#include <string>
struct function
{
std::string ret_type;
std::string name;
};
BOOST_FUSION_ADAPT_STRUCT(
::function,
(std::string, ret_type)
(std::string, name)
)
template <typename Iterator>
struct function_parser : boost::spirit::qi::grammar<Iterator, function(), boost::spirit::qi::ascii::space_type>
{
function_parser() : function_parser::base_type(start)
{
using boost::spirit::qi::ascii::char_;
using boost::spirit::qi::int_;
start %= +char_ >> +char_;
}
boost::spirit::qi::rule<Iterator, function(), boost::spirit::qi::ascii::space_type> start;
};
int main()
{
std::string input_data("void foo");
function fn;
auto itr = input_data.begin();
auto end = input_data.end();
function_parser<decltype(itr)> g;
bool res = boost::spirit::qi::phrase_parse(itr, end, g, boost::spirit::ascii::space, fn);
if (res && itr == end)
{
std::cout << boost::fusion::tuple_open('[');
std::cout << boost::fusion::tuple_close(']');
std::cout << boost::fusion::tuple_delimiter(", ");
std::cout << "Parsing succeeded\n";
std::cout << "got: " << boost::fusion::as_vector(fn) << std::endl;
}
else
{
std::cout << "Parsing failed \n";
}
}
Output
Parsing failed
What am I doing wrong? How can I fix it?
+char_
eats all input! Now, the next
+char_
requires at least a single character, which isn't there (the first kleen plus ate it) so the parse fails.
I suggest instead:
using namespace boost::spirit::qi;
start = lexeme[+graph] >> lexeme[+graph];
The documentation should be able to tell you what that does (I hope. No time to elaborate)

How do you use a variable stored in a boost spirit closure as input to a boost spirit loop parser?

I would like to use a parsed value as the input to a loop parser.
The grammar defines a header that specifies the (variable) size of the following string. For example, say the following string is the input to some parser.
12\r\nTest Payload
The parser should extract the 12, convert it to an unsigned int and then read twelve characters. I can define a boost spirit grammar that compiles, but an assertion in the boost spirit code fails at runtime.
#include <iostream>
#include <boost/spirit.hpp>
using namespace boost::spirit;
struct my_closure : public closure<my_closure, std::size_t> {
member1 size;
};
struct my_grammar : public grammar<my_grammar> {
template <typename ScannerT>
struct definition {
typedef rule<ScannerT> rule_type;
typedef rule<ScannerT, my_closure::context_t> closure_rule_type;
closure_rule_type header;
rule_type payload;
rule_type top;
definition(const my_grammar &self)
{
using namespace phoenix;
header = uint_p[header.size = arg1];
payload = repeat_p(header.size())[anychar_p][assign_a(self.result)];
top = header >> str_p("\r\n") >> payload;
}
const rule_type &start() const { return top; }
};
my_grammar(std::string &p_) : result(p_) {}
std::string &result;
};
int
main(int argc, char **argv)
{
const std::string content = "12\r\nTest Payload";
std::string payload;
my_grammar g(payload);
if (!parse(content.begin(), content.end(), g).full) {
std::cerr << "there was a parsing error!\n";
return -1;
}
std::cout << "Payload: " << payload << std::endl;
return 0;
}
Is it possible to tell spirit that the closure variable should be evaluated lazily? Is this behaviour supported by boost spirit?
This is much easier with the new qi parser available in Spirit 2. The following code snippet provides a full example that mostly works. An unexpected character is being inserted into the final result.
#include <iostream>
#include <string>
#include <boost/version.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/qi_repeat.hpp>
#include <boost/spirit/include/qi_grammar.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
using boost::spirit::qi::repeat;
using boost::spirit::qi::uint_;
using boost::spirit::ascii::char_;
using boost::spirit::ascii::alpha;
using boost::spirit::qi::_1;
namespace phx = boost::phoenix;
namespace qi = boost::spirit::qi;
template <typename P, typename T>
void test_parser_attr(
char const* input, P const& p, T& attr, bool full_match = true)
{
using boost::spirit::qi::parse;
char const* f(input);
char const* l(f + strlen(f));
if (parse(f, l, p, attr) && (!full_match || (f == l)))
std::cout << "ok" << std::endl;
else
std::cout << "fail" << std::endl;
}
static void
straight_forward()
{
std::string str;
int n;
test_parser_attr("12\r\nTest Payload",
uint_[phx::ref(n) = _1] >> "\r\n" >> repeat(phx::ref(n))[char_],
str);
std::cout << "str.length() == " << str.length() << std::endl;
std::cout << n << "," << str << std::endl; // will print "12,Test Payload"
}
template <typename P, typename T>
void
test_phrase_parser(char const* input, P const& p,
T& attr, bool full_match = true)
{
using boost::spirit::qi::phrase_parse;
using boost::spirit::qi::ascii::space;
char const* f(input);
char const* l(f + strlen(f));
if (phrase_parse(f, l, p, space, attr) && (!full_match || (f == l)))
std::cout << "ok" << std::endl;
else
std::cout << "fail" << std::endl;
}
template <typename Iterator>
struct test_grammar
: qi::grammar<Iterator, std::string(), qi::locals<unsigned> > {
test_grammar()
: test_grammar::base_type(my_rule)
{
using boost::spirit::qi::_a;
my_rule %= uint_[_a = _1] >> "\r\n" >> repeat(_a)[char_];
}
qi::rule<Iterator, std::string(), qi::locals<unsigned> > my_rule;
};
static void
with_grammar_local_variable()
{
std::string str;
test_phrase_parser("12\r\nTest Payload", test_grammar<const char*>(), str);
std::cout << str << std::endl; // will print "Test Payload"
}
int
main(int argc, char **argv)
{
std::cout << "boost version: " << BOOST_LIB_VERSION << std::endl;
straight_forward();
with_grammar_local_variable();
return 0;
}
What you are looking for is lazy_p, check the example here: http://www.boost.org/doc/libs/1_35_0/libs/spirit/doc/the_lazy_parser.html