reuse parsed variable with boost karma - c++

I have a code base which is quite equivalent to the code below.
I try to generate a text file with two times the content of a variable.
I feel that the answer is in semantic actions and _a and _val but cannot manage to get through even with the documentation.
How will you do to have :
"toto" in str
and output :
toto some stuff toto
i.e how to reuse a parsed variable in karma ?
struct data
{
std::string str;
};
BOOST_FUSION_ADAPT_STRUCT(
data,
(std::string, str)
)
template <typename Iterator>
struct data: karma::grammar<Iterator, data() >
{
data():data::base_type(start)
{
start = karma::string << karma::lit("some stuff") << karma::string; //Second string is in fact the first one
}
karma::rule<Iterator, data()> start;
};
Solution (according to posts below :)
#include <iostream>
#include <string>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/spirit/include/karma.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_fusion.hpp>
#include <boost/spirit/include/phoenix_stl.hpp>
#include <boost/spirit/include/phoenix_object.hpp>
#include <boost/spirit/include/support_istream_iterator.hpp>
#include <boost/spirit/include/support_iso8859_1.hpp>
namespace ast
{
struct data
{
std::string str;
};
}
BOOST_FUSION_ADAPT_STRUCT(
ast::data,
(std::string, str)
)
namespace karma = boost::spirit::karma;
namespace parser
{
template <typename Iterator>
struct data: karma::grammar<Iterator, ast::data() >
{
data():data::base_type(start)
{
start =
karma::string[karma::_1 = boost::phoenix::at_c<0>(karma::_val)] <<
karma::lit("some stuff") <<
karma::string[karma::_1 = boost::phoenix::at_c<0>(karma::_val)]
;
}
karma::rule<Iterator, ast::data()> start;
};
}
main()
{
ast::data d;
d.str = "toto";
std::string generated;
typedef std::back_insert_iterator<std::string> iterator_type;
parser::data<iterator_type> d_p;
iterator_type sink(generated);
karma::generate(sink, d_p, d);
std::cout << generated << std::endl;
}

This should do the trick:
start = karma::string[karma::_1 = karma::_val]
<< karma::lit("some stuff")
<< karma::string[karma::_1 = karma::_val];

Related

Performance issue with parser written with Boost::spirit

I want to parse a file that looks like this (FASTA-like text format):
>InfoHeader
"Some text sequence that has a line break after every 80 characters"
>InfoHeader
"Some text sequence that has a line break after every 80 characters"
...
e.g.:
>gi|31563518|ref|NP_852610.1| microtubule-associated proteins 1A/1B light chain 3A isoform b [Homo sapiens]
MKMRFFSSPCGKAAVDPADRCKEVQQIRDQHPSKIPVIIERYKGEKQLPVLDKTKFLVPDHVNMSELVKI
IRRRLQLNPTQAFFLLVNQHSMVSVSTPIADIYEQEKDEDGFLYMVYASQETFGFIRENE
I wrote a parser for this with boost::spirit. The parser correctly stores the header line and the following text sequence in a std::vector< std::pair< string, string >> but it takes kind of long for bigger files (17sec for a 100MB file). As comparison I wrote a program without boost::spirit (just STL functions) that simply copies each line of that 100MB file in a std::vector. The whole process takes less than a second. The "program" used for the comparison is not serving the purpose but I don't think the parser should take that much longer...
I know there are plenty of other FASTA parsers around but I'm rather curious why my code is slow.
The .hpp file:
#include <boost/filesystem/path.hpp>
namespace fs = boost::filesystem;
class FastaReader {
public:
typedef std::vector< std::pair<std::string, std::string> > fastaVector;
private:
fastaVector fV;
fs::path file;
public:
FastaReader(const fs::path & f);
~FastaReader();
const fs::path & getFile() const;
const fastaVector::const_iterator getBeginIterator() const;
const fastaVector::const_iterator getEndIterator() const;
private:
void parse();
};
And the .cpp file:
#include <iomanip>
#include <boost/date_time/posix_time/posix_time.hpp>
#include <boost/filesystem/fstream.hpp>
#include <boost/filesystem/operations.hpp>
#include <boost/filesystem/path.hpp>
#include <boost/spirit/include/classic_position_iterator.hpp>
#include <boost/spirit/include/phoenix_bind.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_fusion.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/qi.hpp>
#include "fastaReader.hpp"
using namespace std;
namespace fs = boost::filesystem;
namespace qi = boost::spirit::qi;
namespace pt = boost::posix_time;
template <typename Iterator, typename Skipper>
struct FastaGrammar : qi::grammar<Iterator, FastaReader::fastaVector(), qi::locals<string>, Skipper> {
qi::rule<Iterator> infoLineStart;
qi::rule<Iterator> inputEnd;
qi::rule<Iterator> lineEnd;
qi::rule<Iterator, string(), Skipper> infoLine;
qi::rule<Iterator, string(), Skipper> seqLine;
qi::rule<Iterator, FastaReader::fastaVector(), qi::locals<string>, Skipper> fasta;
FastaGrammar() : FastaGrammar::base_type(fasta, "fasta") {
using boost::spirit::standard::char_;
using boost::phoenix::bind;
using qi::eoi;
using qi::eol;
using qi::lexeme;
using qi::_1;
using qi::_val;
using namespace qi::labels;
infoLineStart = char_('>');
inputEnd = eoi;
/* grammar */
infoLine = lexeme[*(char_ - eol)];
seqLine = *(char_ - infoLineStart);
fasta = *(infoLineStart > infoLine[_a = _1]
> seqLine[bind(&FastaGrammar::addValue, _val, _a, _1)]
)
> inputEnd
;
infoLineStart.name(">");
infoLine.name("sequence identifier");
seqLine.name("sequence");
}
static void addValue(FastaReader::fastaVector & fa, const string & info, const string & seq) {
fa.push_back(make_pair(info, seq));
}
};
FastaReader::FastaReader(const fs::path & f) {
this->file = f;
this->parse();
}
FastaReader::~FastaReader() {}
const fs::path & FastaReader::getFile() const {
return this->file;
}
const FastaReader::fastaVector::const_iterator FastaReader::getBeginIterator() const {
return this->fV.cbegin();
}
const FastaReader::fastaVector::const_iterator FastaReader::getEndIterator() const {
return this->fV.cend();
}
void FastaReader::parse() {
if ( this->file.empty() ) throw string("FastaReader: No file specified.");
if ( ! fs::is_regular_file(this->file) ) throw (string("FastaReader: File not found: ") + this->file.string());
typedef boost::spirit::istream_iterator iterator_type;
typedef boost::spirit::classic::position_iterator2<iterator_type> pos_iterator_type;
typedef FastaGrammar<pos_iterator_type, boost::spirit::ascii::space_type> fastaGr;
fs::ifstream fin(this->file);
if ( ! fin.is_open() ) {
throw (string("FastaReader: Access denied: ") + this->file.string());
}
fin.unsetf(ios::skipws);
iterator_type begin(fin);
iterator_type end;
pos_iterator_type pos_begin(begin, end, this->file.string());
pos_iterator_type pos_end;
fastaGr fG;
try {
std::cerr << "Measuring: Parsing." << std::endl;
const pt::ptime startMeasurement = pt::microsec_clock::universal_time();
qi::phrase_parse(pos_begin, pos_end, fG, boost::spirit::ascii::space, this->fV);
const pt::ptime endMeasurement = pt::microsec_clock::universal_time();
pt::time_duration duration (endMeasurement - startMeasurement);
std::cerr << duration << std::endl;
} catch (std::string str) {
cerr << "error message: " << str << endl;
}
}
So the grammar does the folloing:
It looks for a ">" sign and then stores all following characters until an EOL is detected. After the EOL the text sequence starts and ends when a ">" sign is detected. Both strings (header line and text sequence) are then stored in a std::vector by calling FastaReader::addValue().
I compiled my program using g++ version 4.8.2 with -O2 and -std=c++11 flags.
So where is the the performance issue in my code?
Previous: Step 3: MOAR FASTER WITH ZERO-COPY Return to Step 1. Cleaning up + Profiling
Step 4: Dropping the position iterator
Since you're not using it, we can drop the stateful iterator, which is likely to inhibit quite a lot of optimizations (and was indirectly visible in the profiler output)
Live On Coliru
#define BOOST_SPIRIT_USE_PHOENIX_V3
#include <boost/filesystem/path.hpp>
#include <boost/utility/string_ref.hpp>
#include <boost/iostreams/device/mapped_file.hpp>
namespace io = boost::iostreams;
namespace fs = boost::filesystem;
class FastaReader {
public:
typedef std::pair<boost::string_ref, boost::string_ref> Entry;
typedef std::vector<Entry> Data;
private:
Data fV;
fs::path file;
public:
FastaReader(const fs::path & f);
~FastaReader();
const fs::path & getFile() const;
const Data::const_iterator begin() const;
const Data::const_iterator end() const;
private:
io::mapped_file_source mmap;
void parse();
};
#include <iomanip>
#include <boost/date_time/posix_time/posix_time.hpp>
#include <boost/filesystem/fstream.hpp>
#include <boost/filesystem/operations.hpp>
#include <boost/filesystem/path.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/adapted/std_pair.hpp>
//#include "fastaReader.hpp"
#include <boost/iostreams/device/mapped_file.hpp>
using namespace std;
namespace fs = boost::filesystem;
namespace qi = boost::spirit::qi;
namespace pt = boost::posix_time;
namespace io = boost::iostreams;
namespace boost { namespace spirit { namespace traits {
template <typename It>
struct assign_to_attribute_from_iterators<boost::string_ref, It, void> {
static void call(It f, It l, boost::string_ref& attr) { attr = boost::string_ref { f, size_t(std::distance(f,l)) }; }
};
} } }
template <typename Iterator>
struct FastaGrammar : qi::grammar<Iterator, FastaReader::Data()> {
FastaGrammar() : FastaGrammar::base_type(fasta) {
using namespace qi;
using boost::phoenix::construct;
using boost::phoenix::begin;
using boost::phoenix::size;
entry = ('>' >> raw[ *~char_('\n') ] >> '\n' >> raw[ *~char_('>') ]);
fasta = *entry >> *eol >> eoi ;
BOOST_SPIRIT_DEBUG_NODES((fasta)(entry));
}
private:
qi::rule<Iterator, FastaReader::Data()> fasta;
qi::rule<Iterator, FastaReader::Entry()> entry;
};
FastaReader::FastaReader(const fs::path & f) : file(f), mmap(file.c_str()) {
parse();
}
FastaReader::~FastaReader() {}
const fs::path & FastaReader::getFile() const {
return this->file;
}
const FastaReader::Data::const_iterator FastaReader::begin() const {
return this->fV.cbegin();
}
const FastaReader::Data::const_iterator FastaReader::end() const {
return this->fV.cend();
}
void FastaReader::parse() {
if (this->file.empty()) throw std::runtime_error("FastaReader: No file specified.");
if (! fs::is_regular_file(this->file)) throw std::runtime_error(string("FastaReader: File not found: ") + this->file.string());
typedef char const* iterator_type;
typedef FastaGrammar<iterator_type> fastaGr;
static const fastaGr fG{};
try {
std::cerr << "Measuring: Parsing." << std::endl;
const pt::ptime startMeasurement = pt::microsec_clock::universal_time();
iterator_type first(mmap.data()), last(mmap.end());
qi::phrase_parse(first, last, fG, boost::spirit::ascii::space, this->fV);
const pt::ptime endMeasurement = pt::microsec_clock::universal_time();
pt::time_duration duration (endMeasurement - startMeasurement);
std::cerr << duration << std::endl;
} catch (std::exception const& e) {
cerr << "error message: " << e.what() << endl;
}
}
int main() {
FastaReader reader("input.txt");
for (auto& e : reader) std::cout << '>' << e.first << '\n' << e.second << "\n\n";
}
Now it's 74.8x faster.
$ time ./test | head -n4
Measuring: Parsing.
00:00:00.194432
Next: Step 2. Faster with mmap
Step 1. Cleaning up + Profiling
You should avoid the many rules they introduce type erasure.
If you input is sane, you can do without the skipper (anyways, line ends were significant, so it made no sense to skip them).
Use fusion adaptation instead of a helper to construct new pairs:
This is not optimal, yet, but a lot cleaner:
$ ./test1
Measuring: Parsing.
00:00:22.681605
Slightly more efficient by reducing moving parts and indirections:
Live On Coliru
#include <boost/filesystem/path.hpp>
namespace fs = boost::filesystem;
class FastaReader {
public:
typedef std::pair<std::string, std::string> Entry;
typedef std::vector<Entry> Data;
private:
Data fV;
fs::path file;
public:
FastaReader(const fs::path & f);
~FastaReader();
const fs::path & getFile() const;
const Data::const_iterator begin() const;
const Data::const_iterator end() const;
private:
void parse();
};
#include <iomanip>
#include <boost/date_time/posix_time/posix_time.hpp>
#include <boost/filesystem/fstream.hpp>
#include <boost/filesystem/operations.hpp>
#include <boost/filesystem/path.hpp>
#include <boost/spirit/include/classic_position_iterator.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/adapted/std_pair.hpp>
//#include "fastaReader.hpp"
using namespace std;
namespace fs = boost::filesystem;
namespace qi = boost::spirit::qi;
namespace pt = boost::posix_time;
template <typename Iterator>
struct FastaGrammar : qi::grammar<Iterator, FastaReader::Data()> {
qi::rule<Iterator, FastaReader::Data()> fasta;
FastaGrammar() : FastaGrammar::base_type(fasta) {
using namespace qi;
fasta = *('>' >> *~char_('\n') >> '\n'
>> *~char_('>'))
>> *eol
>> eoi
;
BOOST_SPIRIT_DEBUG_NODES((fasta));
}
};
FastaReader::FastaReader(const fs::path & f) : file(f) {
parse();
}
FastaReader::~FastaReader() {}
const fs::path & FastaReader::getFile() const {
return this->file;
}
const FastaReader::Data::const_iterator FastaReader::begin() const {
return this->fV.cbegin();
}
const FastaReader::Data::const_iterator FastaReader::end() const {
return this->fV.cend();
}
void FastaReader::parse() {
if (this->file.empty()) throw std::runtime_error("FastaReader: No file specified.");
if (! fs::is_regular_file(this->file)) throw std::runtime_error(string("FastaReader: File not found: ") + this->file.string());
typedef boost::spirit::istream_iterator iterator_type;
typedef boost::spirit::classic::position_iterator2<iterator_type> pos_iterator_type;
typedef FastaGrammar<pos_iterator_type> fastaGr;
fs::ifstream fin(this->file);
if (!fin) {
throw std::runtime_error(string("FastaReader: Access denied: ") + this->file.string());
}
static const fastaGr fG{};
try {
std::cerr << "Measuring: Parsing." << std::endl;
const pt::ptime startMeasurement = pt::microsec_clock::universal_time();
pos_iterator_type first(iterator_type{fin >> std::noskipws}, {}, file.string());
qi::phrase_parse<pos_iterator_type>(first, {}, fG, boost::spirit::ascii::space, this->fV);
const pt::ptime endMeasurement = pt::microsec_clock::universal_time();
pt::time_duration duration (endMeasurement - startMeasurement);
std::cerr << duration << std::endl;
} catch (std::exception const& e) {
cerr << "error message: " << e.what() << endl;
}
}
int main() {
std::ios::sync_with_stdio(false);
FastaReader reader("input.txt");
//for (auto& e : reader) std::cout << '>' << e.first << '\n' << e.second << "\n\n";
}
This is still slow. Let's see what takes so long:
That's pretty, but hardly tells us what we need to know. This however does: top-N time consumers are
So most time is spent in istream iteration and the multi-pass adaptor. You could argue that the multipass adaptor could be optimized for by flushing it once in a while (each line?) but really, we would prefer not to be tied to the whole stream and operator on the (stream) buffer instead.
So, I though let's use a mapped file instead:
Next: Step 2. Faster with mmap
Previous: Step 2. Faster with mmap
Next: Step 4: Dropping the position iterator
Step 3: MOAR FASTER WITH ZERO-COPY
Let's avoid allocations! If we move the file mapping into the FastaReader class, we can directly point to data in the mapping instead of copying strings all the time.
Using boost::string_ref as e.g. described here: C++: Fast way to read mapped file into a matrix you can do
Live On Coliru
#define BOOST_SPIRIT_USE_PHOENIX_V3
#include <boost/filesystem/path.hpp>
#include <boost/utility/string_ref.hpp>
#include <boost/iostreams/device/mapped_file.hpp>
namespace io = boost::iostreams;
namespace fs = boost::filesystem;
class FastaReader {
public:
typedef std::pair<boost::string_ref, boost::string_ref> Entry;
typedef std::vector<Entry> Data;
private:
Data fV;
fs::path file;
public:
FastaReader(const fs::path & f);
~FastaReader();
const fs::path & getFile() const;
const Data::const_iterator begin() const;
const Data::const_iterator end() const;
private:
io::mapped_file_source mmap;
void parse();
};
#include <iomanip>
#include <boost/date_time/posix_time/posix_time.hpp>
#include <boost/filesystem/fstream.hpp>
#include <boost/filesystem/operations.hpp>
#include <boost/filesystem/path.hpp>
#include <boost/spirit/include/classic_position_iterator.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/adapted/std_pair.hpp>
//#include "fastaReader.hpp"
#include <boost/iostreams/device/mapped_file.hpp>
using namespace std;
namespace fs = boost::filesystem;
namespace qi = boost::spirit::qi;
namespace pt = boost::posix_time;
namespace io = boost::iostreams;
namespace boost { namespace spirit { namespace traits {
template <typename It>
struct assign_to_attribute_from_iterators<boost::string_ref, It, void> {
static void call(It f, It l, boost::string_ref& attr) { attr = boost::string_ref { f.base(), size_t(std::distance(f.base(),l.base())) }; }
};
} } }
template <typename Iterator>
struct FastaGrammar : qi::grammar<Iterator, FastaReader::Data()> {
FastaGrammar() : FastaGrammar::base_type(fasta) {
using namespace qi;
using boost::phoenix::construct;
using boost::phoenix::begin;
using boost::phoenix::size;
entry = ('>' >> raw[ *~char_('\n') ] >> '\n' >> raw[ *~char_('>') ]);
fasta = *entry >> *eol >> eoi ;
BOOST_SPIRIT_DEBUG_NODES((fasta)(entry));
}
private:
qi::rule<Iterator, FastaReader::Data()> fasta;
qi::rule<Iterator, FastaReader::Entry()> entry;
};
FastaReader::FastaReader(const fs::path & f) : file(f), mmap(file.c_str()) {
parse();
}
FastaReader::~FastaReader() {}
const fs::path & FastaReader::getFile() const {
return this->file;
}
const FastaReader::Data::const_iterator FastaReader::begin() const {
return this->fV.cbegin();
}
const FastaReader::Data::const_iterator FastaReader::end() const {
return this->fV.cend();
}
void FastaReader::parse() {
if (this->file.empty()) throw std::runtime_error("FastaReader: No file specified.");
if (! fs::is_regular_file(this->file)) throw std::runtime_error(string("FastaReader: File not found: ") + this->file.string());
typedef char const* iterator_type;
typedef boost::spirit::classic::position_iterator2<iterator_type> pos_iterator_type;
typedef FastaGrammar<pos_iterator_type> fastaGr;
static const fastaGr fG{};
try {
std::cerr << "Measuring: Parsing." << std::endl;
const pt::ptime startMeasurement = pt::microsec_clock::universal_time();
pos_iterator_type first(iterator_type{mmap.data()}, iterator_type{mmap.end()}, file.string());
qi::phrase_parse<pos_iterator_type>(first, {}, fG, boost::spirit::ascii::space, this->fV);
const pt::ptime endMeasurement = pt::microsec_clock::universal_time();
pt::time_duration duration (endMeasurement - startMeasurement);
std::cerr << duration << std::endl;
} catch (std::exception const& e) {
cerr << "error message: " << e.what() << endl;
}
}
int main() {
FastaReader reader("input.txt");
for (auto& e : reader) std::cout << '>' << e.first << '\n' << e.second << "\n\n";
}
This is indeed already 4.8x faster:
$ ./test3 | head -n4
Measuring: Parsing.
00:00:04.577123
>gi|31563518|ref|NP_852610.1| microtubule-associated proteins 1A/1B light chain 3A isoform b [Homo sapiens]
MKMRFFSSPCGKAAVDPADRCKEVQQIRDQHPSKIPVIIERYKGEKQLPVLDKTKFLVPDHVNMSELVKI
IRRRLQLNPTQAFFLLVNQHSMVSVSTPIADIYEQEKDEDGFLYMVYASQETFGFIRENE
Next: Step 4: Dropping the position iterator
Previous: Step 1. Cleaning up + Profiling
Next: Step 3: MOAR FASTER WITH ZERO-COPY
Step 2. Faster with mmap
Live On Coliru
#include <boost/filesystem/path.hpp>
namespace fs = boost::filesystem;
class FastaReader {
public:
typedef std::pair<std::string, std::string> Entry;
typedef std::vector<Entry> Data;
private:
Data fV;
fs::path file;
public:
FastaReader(const fs::path & f);
~FastaReader();
const fs::path & getFile() const;
const Data::const_iterator begin() const;
const Data::const_iterator end() const;
private:
void parse();
};
#include <iomanip>
#include <boost/date_time/posix_time/posix_time.hpp>
#include <boost/filesystem/fstream.hpp>
#include <boost/filesystem/operations.hpp>
#include <boost/filesystem/path.hpp>
#include <boost/spirit/include/classic_position_iterator.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/adapted/std_pair.hpp>
//#include "fastaReader.hpp"
#include <boost/iostreams/device/mapped_file.hpp>
using namespace std;
namespace fs = boost::filesystem;
namespace qi = boost::spirit::qi;
namespace pt = boost::posix_time;
namespace io = boost::iostreams;
template <typename Iterator>
struct FastaGrammar : qi::grammar<Iterator, FastaReader::Data()> {
qi::rule<Iterator, FastaReader::Data()> fasta;
FastaGrammar() : FastaGrammar::base_type(fasta) {
using namespace qi;
fasta = *('>' >> *~char_('\n') >> '\n'
>> *~char_('>'))
>> *eol
>> eoi
;
BOOST_SPIRIT_DEBUG_NODES((fasta));
}
};
FastaReader::FastaReader(const fs::path & f) : file(f) {
parse();
}
FastaReader::~FastaReader() {}
const fs::path & FastaReader::getFile() const {
return this->file;
}
const FastaReader::Data::const_iterator FastaReader::begin() const {
return this->fV.cbegin();
}
const FastaReader::Data::const_iterator FastaReader::end() const {
return this->fV.cend();
}
void FastaReader::parse() {
if (this->file.empty()) throw std::runtime_error("FastaReader: No file specified.");
if (! fs::is_regular_file(this->file)) throw std::runtime_error(string("FastaReader: File not found: ") + this->file.string());
typedef char const* iterator_type;
typedef boost::spirit::classic::position_iterator2<iterator_type> pos_iterator_type;
typedef FastaGrammar<pos_iterator_type> fastaGr;
io::mapped_file_source mmap(file.c_str());
static const fastaGr fG{};
try {
std::cerr << "Measuring: Parsing." << std::endl;
const pt::ptime startMeasurement = pt::microsec_clock::universal_time();
pos_iterator_type first(iterator_type{mmap.data()}, iterator_type{mmap.end()}, file.string());
qi::phrase_parse<pos_iterator_type>(first, {}, fG, boost::spirit::ascii::space, this->fV);
const pt::ptime endMeasurement = pt::microsec_clock::universal_time();
pt::time_duration duration (endMeasurement - startMeasurement);
std::cerr << duration << std::endl;
} catch (std::exception const& e) {
cerr << "error message: " << e.what() << endl;
}
}
int main() {
FastaReader reader("input.txt");
//for (auto& e : reader) std::cout << '>' << e.first << '\n' << e.second << "\n\n";
}
Indeed on my system it's roughly 3x faster (input is 229 MiB):
$ ./mapped_file_source
Measuring: Parsing.
00:00:07.385787
Next: Step 3: MOAR FASTER WITH ZERO-COPY

How can I extract std::string object via boost spirit

I have the following code:
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>
#include <boost/spirit/include/qi.hpp>
#include <iostream>
#include <string>
struct function
{
std::string ret_type;
std::string name;
};
BOOST_FUSION_ADAPT_STRUCT(
::function,
(std::string, ret_type)
(std::string, name)
)
template <typename Iterator>
struct function_parser : boost::spirit::qi::grammar<Iterator, function(), boost::spirit::qi::ascii::space_type>
{
function_parser() : function_parser::base_type(start)
{
using boost::spirit::qi::ascii::char_;
using boost::spirit::qi::int_;
start %= +char_ >> +char_;
}
boost::spirit::qi::rule<Iterator, function(), boost::spirit::qi::ascii::space_type> start;
};
int main()
{
std::string input_data("void foo");
function fn;
auto itr = input_data.begin();
auto end = input_data.end();
function_parser<decltype(itr)> g;
bool res = boost::spirit::qi::phrase_parse(itr, end, g, boost::spirit::ascii::space, fn);
if (res && itr == end)
{
std::cout << boost::fusion::tuple_open('[');
std::cout << boost::fusion::tuple_close(']');
std::cout << boost::fusion::tuple_delimiter(", ");
std::cout << "Parsing succeeded\n";
std::cout << "got: " << boost::fusion::as_vector(fn) << std::endl;
}
else
{
std::cout << "Parsing failed \n";
}
}
Output
Parsing failed
What am I doing wrong? How can I fix it?
+char_
eats all input! Now, the next
+char_
requires at least a single character, which isn't there (the first kleen plus ate it) so the parse fails.
I suggest instead:
using namespace boost::spirit::qi;
start = lexeme[+graph] >> lexeme[+graph];
The documentation should be able to tell you what that does (I hope. No time to elaborate)

boost spirit parse with the source

I would like to be able to parse a Number, to store its original source and to track its position in the source preserving it in the structure itself.
This is what I have so far:
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_object.hpp>
#include <boost/spirit/home/support/iterators/line_pos_iterator.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>
#include <iostream>
#include <iomanip>
#include <ios>
#include <string>
#include <complex>
#include <boost/spirit/include/phoenix_fusion.hpp>
#include <boost/spirit/include/phoenix_stl.hpp>
struct Position
{
Position()
: line(-1)
{
}
size_t line;
};
struct Number : public Position
{
Number()
: Position()
, value(-1)
, source()
{
}
unsigned value;
std::string source;
};
using namespace boost::spirit;
BOOST_FUSION_ADAPT_STRUCT(Number,
(unsigned, value)
(std::string, source)
(size_t, line)
);
template <typename Iterator>
struct source_hex : qi::grammar<Iterator, Number()>
{
source_hex() : source_hex::base_type(start)
{
using qi::eps;
using qi::hex;
using qi::lit;
using qi::raw;
using qi::_val;
using qi::_1;
using ascii::char_;
namespace phx = boost::phoenix;
using phx::at_c;
using phx::begin;
using phx::end;
using phx::construct;
start = raw[ (lit("0x") | lit("0X"))
>> hex [at_c<0>(_val) = _1]
][at_c<2>(_val) = get_line(begin(_1))]
[at_c<1>(_val) = construct<std::string>(begin(_1), end(_1))]
;
}
qi::rule<Iterator, Number()> start;
};
and the test code is:
typedef line_pos_iterator<std::string::const_iterator> Iterator;
source_hex<Iterator> g;
Iterator iter(str.begin());
Iterator end(str.end());
Number number;
bool r = parse(iter, end, g, number);
if (r && iter == end) {
std::cout << number.line << ": 0x" << std::setw(8) << std::setfill('0') << std::hex << number.value << " // " << number.source << "\n";
} else
std::cout << "Parsing failed\n";
what I am not getting is why the iterator on line:
[at_c<2>(_val) = get_line(begin(_1))]
is not a line_pos_iterator even this is the one I am using for the parser.
I will appreciate explanation as well as ideas how to solve the problem - in whatever way.
Have a look at
#include <boost/spirit/repository/include/qi_iter_pos.hpp>
This defines a parser that directly exposes the position as an attribute. Let me add an example in a few minutes.
Edit I found it hard to shoe-horn iter_pos into your sample without "assuming" things and changing your data type layout. I'd very much favour this (I'd strive to lose the semantic actions all the way.). However, time's limited.
Here's a little helper that you can use to fix your problem:
struct get_line_f
{
template <typename> struct result { typedef size_t type; };
template <typename It> size_t operator()(It const& pos_iter) const
{
return get_line(pos_iter);
}
};
^ The polymorphic actor, use as such:
start = raw[ qi::no_case["0x"] >> hex [at_c<0>(_val) = _1] ]
[
at_c<1>(_val) = construct<std::string>(begin(_1), end(_1)),
at_c<2>(_val) = get_line_(begin(_1))
]
;
// with
boost::phoenix::function<get_line_f> get_line_;
Note I changed a few minor points.
Fully running demo with output: Live On Coliru

unhandled exception using Boost Spirit to parse grammar

I am trying to use Boost Spirit to parse the following grammar:
sentence:
noun verb
sentence conjunction sentence
conjunction:
"and"
noun:
"birds"
"cats"
verb:
"fly"
"meow"
parsing succeeds when the grammar only includes noun >> verb rule.
when grammar is modified to include sentence>>conjunction>>sentence rule and i supply an invalid input such as "birds fly" instead of "birdsfly" i get an unhandled exception when the program runs.
here is the code which is modified from examples found on boost doc
#define BOOST_VARIANT_MINIMIZE_SIZE
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_statement.hpp>
#include <boost/spirit/include/phoenix_container.hpp>
#include <iostream>
#include <string>
using namespace boost::spirit;
using namespace boost::spirit::ascii;
template <typename Lexer>
struct token_list : lex::lexer<Lexer>
{
token_list()
{
noun = "birds|cats";
verb = "fly|meow";
conjunction = "and";
this->self.add
(noun)
(verb)
(conjunction)
;
}
lex::token_def<std::string> noun, verb, conjunction;
};
template <typename Iterator>
struct Grammar : qi::grammar<Iterator>
{
template <typename TokenDef>
Grammar(TokenDef const& tok)
: Grammar::base_type(sentence)
{
sentence = (tok.noun>>tok.verb)
|
(sentence>>tok.conjunction>>sentence)>>eoi
;
}
qi::rule<Iterator> sentence;
};
int main()
{
typedef lex::lexertl::token<char const*, boost::mpl::vector<std::string>> token_type;
typedef lex::lexertl::lexer<token_type> lexer_type;
typedef token_list<lexer_type>::iterator_type iterator_type;
token_list<lexer_type> word_count;
Grammar<iterator_type> g (word_count);
std::string str = "birdsfly";
//std::string str = "birds fly"; this input caused unhandled exception
char const* first = str.c_str();
char const* last = &first[str.size()];
bool r = lex::tokenize_and_parse(first, last, word_count, g);
if (r) {
std::cout << "Parsing passed"<< "\n";
}
else {
std::string rest(first, last);
std::cerr << "Parsing failed\n" << "stopped at: \""
<< rest << "\"\n";
}
system("PAUSE");
return 0;
}
You have left-recursion in the second branch of the sentence rule.
sentence = sentence >> ....
will always recurse on sentence, so you're seeing a stackoverflow.
I suggest writing the rule like, e.g:
sentence =
(tok.noun >> tok.verb)
>> *(tok.conjunction >> sentence)
>> qi::eoi
;
Now the result reads
g++ -Wall -pedantic -std=c++0x -g -O0 test.cpp -o test
Parsing failed
stopped at: " fly"
(and the inevitable "sh: PAUSE: command not found" of course...)
PS. Don't using namespace please. Instead:
namespace qi = boost::spirit::qi;
namespace lex = boost::spirit::lex;
Here's a cleaned up version with some other stuff removed/fixed: http://coliru.stacked-crooked.com/view?id=1fb26ca3e8c207979eaaf4592c319316-e223fd4a885a77b520bbfe69dda8fb91
#define BOOST_VARIANT_MINIMIZE_SIZE
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
// #include <boost/spirit/include/phoenix.hpp>
#include <iostream>
#include <string>
namespace qi = boost::spirit::qi;
namespace lex = boost::spirit::lex;
template <typename Lexer>
struct token_list : lex::lexer<Lexer>
{
token_list()
{
noun = "birds|cats";
verb = "fly|meow";
conjunction = "and";
this->self.add
(noun)
(verb)
(conjunction)
;
}
lex::token_def<std::string> noun, verb, conjunction;
};
template <typename Iterator>
struct Grammar : qi::grammar<Iterator>
{
template <typename TokenDef>
Grammar(TokenDef const& tok) : Grammar::base_type(sentence)
{
sentence =
(tok.noun >> tok.verb)
>> *(tok.conjunction >> sentence)
>> qi::eoi
;
}
qi::rule<Iterator> sentence;
};
int main()
{
typedef std::string::const_iterator It;
typedef lex::lexertl::token<It, boost::mpl::vector<std::string>> token_type;
typedef lex::lexertl::lexer<token_type> lexer_type;
typedef token_list<lexer_type>::iterator_type iterator_type;
token_list<lexer_type> word_count;
Grammar<iterator_type> g(word_count);
//std::string str = "birdsfly";
const std::string str = "birds fly";
It first = str.begin();
It last = str.end();
bool r = lex::tokenize_and_parse(first, last, word_count, g);
if (r) {
std::cout << "Parsing passed"<< "\n";
}
else {
std::string rest(first, last);
std::cerr << "Parsing failed\n" << "stopped at: \"" << rest << "\"\n";
}
}

Error with Phoenix placeholder _val in Boost.Spirit.Lex :(

I'm newbie in Boost.Spirit.Lex.
Some strange error appears every time I try to use lex::_val in semantics actions in my simple lexer:
#ifndef _TOKENS_H_
#define _TOKENS_H_
#include <iostream>
#include <string>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_statement.hpp>
#include <boost/spirit/include/phoenix_container.hpp>
namespace lex = boost::spirit::lex;
namespace phx = boost::phoenix;
enum tokenids
{
ID_IDENTIFICATOR = 1,
ID_CONSTANT,
ID_OPERATION,
ID_BRACKET,
ID_WHITESPACES
};
template <typename Lexer>
struct mega_tokens
: lex::lexer<Lexer>
{
mega_tokens()
: identifier(L"[a-zA-Z_][a-zA-Z0-9_]*", ID_IDENTIFICATOR)
, constant (L"[0-9]+(\\.[0-9]+)?", ID_CONSTANT )
, operation (L"[\\+\\-\\*/]", ID_OPERATION )
, bracket (L"[\\(\\)\\[\\]]", ID_BRACKET )
{
using lex::_tokenid;
using lex::_val;
using phx::val;
this->self
= operation [ std::wcout
<< val(L'<') << _tokenid
// << val(L':') << lex::_val
<< val(L'>')
]
| identifier [ std::wcout
<< val(L'<') << _tokenid
<< val(L':') << _val
<< val(L'>')
]
| constant [ std::wcout
<< val(L'<') << _tokenid
// << val(L':') << _val
<< val(L'>')
]
| bracket [ std::wcout
<< val(L'<') << _tokenid
// << val(L':') << lex::_val
<< val(L'>')
]
;
}
lex::token_def<wchar_t, wchar_t> operation;
lex::token_def<std::wstring, wchar_t> identifier;
lex::token_def<double, wchar_t> constant;
lex::token_def<wchar_t, wchar_t> bracket;
};
#endif // _TOKENS_H_
and
#include <cstdlib>
#include <iostream>
#include <locale>
#include <boost/spirit/include/lex_lexertl.hpp>
#include "tokens.h"
int main()
{
setlocale(LC_ALL, "Russian");
namespace lex = boost::spirit::lex;
typedef std::wstring::iterator base_iterator;
typedef lex::lexertl::token <
base_iterator,
boost::mpl::vector<wchar_t, std::wstring, double, wchar_t>,
boost::mpl::true_
> token_type;
typedef lex::lexertl::actor_lexer<token_type> lexer_type;
typedef mega_tokens<lexer_type>::iterator_type iterator_type;
mega_tokens<lexer_type> mega_lexer;
std::wstring str = L"alfa+x1*(2.836-x2[i])";
base_iterator first = str.begin();
bool r = lex::tokenize(first, str.end(), mega_lexer);
if (r) {
std::wcout << L"Success" << std::endl;
}
else {
std::wstring rest(first, str.end());
std::wcerr << L"Lexical analysis failed\n" << L"stopped at: \""
<< rest << L"\"\n";
}
return EXIT_SUCCESS;
}
This code causes an error in Boost header 'boost/spirit/home/lex/argument.hpp' on line 167 while compiling:
return: can't convert 'const
boost::variant' to
'boost::variant &'
When I don't use lex::_val program compiles with no errors.
Obviously, I use _val in wrong way, but I do not know how to do this correctly. Help, please! :)
P.S. And sorry for my terrible English…
I believe this is a problem in the current Phoenix related to using iostreams. As a workaround I suggest to define a custom (Phoenix) function doing the actual output:
struct output_operation_impl
{
template <typename TokenId, typename Val>
struct result { typedef void type; };
template <typename TokenId, typename Val>
void operator()(T1 const& tokenid, T2 const& val) const
{
std::wcout << L'<' << tokenid << L':' << val << L'>';
}
};
boost::phoenix::function<output_operation_impl> const output_operation =
output_operation_impl();
calling it as:
this->self = operation[ output_operation(_tokenid, _val) ] ... ;
Regards Hartmut