Using Boost.Spirit.Lex and stream iterators - c++

I want use Boost.Spirit.Lex to lex a binary file; for this purpose I wrote the following program (here is an extract):
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/support_multi_pass.hpp>
#include <boost/bind.hpp>
#include <boost/ref.hpp>
#include <fstream>
#include <iterator>
#include <string>
namespace spirit = boost::spirit;
namespace lex = spirit::lex;
#define X 1
#define Y 2
#define Z 3
template<typename L>
class word_count_tokens : public lex::lexer<L>
{
public:
word_count_tokens () {
this->self.add
("[^ \t\n]+", X)
("\n", Y)
(".", Z);
}
};
class counter
{
public:
typedef bool result_type;
template<typename T>
bool operator () (const T &t, size_t &c, size_t &w, size_t &l) const {
switch (t.id ()) {
case X:
++w; c += t.value ().size ();
break;
case Y:
++l; ++c;
break;
case Z:
++c;
break;
}
return true;
}
};
int main (int argc, char **argv)
{
std::ifstream ifs (argv[1], std::ios::in | std::ios::binary);
auto first = spirit::make_default_multi_pass (std::istream_iterator<char> (ifs));
auto last = spirit::make_default_multi_pass (std::istream_iterator<char> ());
size_t w, c, l;
word_count_tokens<lex::lexertl::lexer<>> word_count_functor;
w = c = l = 0;
bool r = lex::tokenize (first, last, word_count_functor, boost::bind (counter (), _1, boost::ref (c), boost::ref (w), boost::ref (l)));
ifs.close ();
if (r) {
std::cout << l << ", " << w << ", " << c << std::endl;
}
return 0;
}
The build returns the following error:
lexer.hpp:390:46: error: non-const lvalue reference to type 'const char *' cannot bind to a value of unrelated type
Now, the error is due to definition of concrete lexer, lex::lexer<>; in fact its first parameter is defaulted to const char *. I obtain the same error also if I use spirit::istream_iterator or spirit::make_default_multi_pass (.....).
But if I specify the correct template parameters of lex::lexer<> I obtain a plethora of errors!
Solutions?
Update
I have putted all source file; it's the word_counter site's example.

Okay, since the question was changed, here's a new answer, addressing some points with the complete code sample.
Firstly, you need to use a custom token type. I.e.
word_count_tokens<lex::lexertl::lexer<lex::lexertl::token<boost::spirit::istream_iterator>>> word_count_functor;
// instead of:
// word_count_tokens<lex::lexertl::lexer<>> word_count_functor;
Obviously, it's customary to typedef lex::lexertl::token<boost::spirit::istream_iterator>
You need to use min_token_id instead of token IDs 1,2,3. Also, make it an enum for ease of maintenance:
enum token_ids {
X = lex::min_token_id + 1,
Y,
Z,
};
You can no longer just use .size() on the default token value() since the iterator range is not RandomAccessRange anymore. Instead, employ boost::distance() which is specialized for iterator_range:
++w; c += boost::distance(t.value()); // t.value ().size ();
Combining these fixes: Live On Coliru
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/support_istream_iterator.hpp>
#include <boost/bind.hpp>
#include <fstream>
namespace spirit = boost::spirit;
namespace lex = spirit::lex;
enum token_ids {
X = lex::min_token_id + 1,
Y,
Z,
};
template<typename L>
class word_count_tokens : public lex::lexer<L>
{
public:
word_count_tokens () {
this->self.add
("[^ \t\n]+", X)
("\n" , Y)
("." , Z);
}
};
struct counter
{
typedef bool result_type;
template<typename T>
bool operator () (const T &t, size_t &c, size_t &w, size_t &l) const {
switch (t.id ()) {
case X:
++w; c += boost::distance(t.value()); // t.value ().size ();
break;
case Y:
++l; ++c;
break;
case Z:
++c;
break;
}
return true;
}
};
int main (int argc, char **argv)
{
std::ifstream ifs (argv[1], std::ios::in | std::ios::binary);
ifs >> std::noskipws;
boost::spirit::istream_iterator first(ifs), last;
word_count_tokens<lex::lexertl::lexer<lex::lexertl::token<boost::spirit::istream_iterator>>> word_count_functor;
size_t w = 0, c = 0, l = 0;
bool r = lex::tokenize (first, last, word_count_functor,
boost::bind (counter (), _1, boost::ref (c), boost::ref (w), boost::ref (l)));
ifs.close ();
if (r) {
std::cout << l << ", " << w << ", " << c << std::endl;
}
}
When run on itself, prints
65, 183, 1665

I think the real problem is not shown. You don't show first or last and I have a feeling you might have temporaries there.
Here's a sample I came up with to verify, perhaps you can see what it is you're doing ---wrong--- differently :)
Live on Coliru (memory mapped an byte-vector, via const char*)
And this alternative (using spirit::istream_iterator)
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/qi.hpp>
#include <fstream>
#ifdef MEMORY_MAPPED
# include <boost/iostreams/device/mapped_file.hpp>
#endif
namespace /*anon*/
{
namespace qi =boost::spirit::qi;
namespace lex=boost::spirit::lex;
template <typename Lexer>
struct mylexer_t : lex::lexer<Lexer>
{
mylexer_t()
{
fileheader = "hello";
this->self = fileheader
| space [ lex::_pass = lex::pass_flags::pass_ignore ];
}
lex::token_def<lex::omit>
fileheader, space;
};
template <typename Iterator> struct my_grammar_t
: public qi::grammar<Iterator>
{
template <typename TokenDef>
my_grammar_t(TokenDef const& tok)
: my_grammar_t::base_type(header)
{
header = tok.fileheader;
BOOST_SPIRIT_DEBUG_NODE(header);
}
private:
qi::rule<Iterator> header;
};
}
namespace /* */ {
std::string safechar(char ch) {
switch (ch) {
case '\t': return "\\t"; break;
case '\0': return "\\0"; break;
case '\r': return "\\r"; break;
case '\n': return "\\n"; break;
}
return std::string(1, ch);
}
template <typename It>
std::string showtoken(const boost::iterator_range<It>& range)
{
std::ostringstream oss;
oss << '[';
std::transform(range.begin(), range.end(), std::ostream_iterator<std::string>(oss), safechar);
oss << ']';
return oss.str();
}
}
bool parsefile(const std::string& spec)
{
#ifdef MEMORY_MAPPED
typedef char const* It;
boost::iostreams::mapped_file mmap(spec.c_str(), boost::iostreams::mapped_file::readonly);
char const *first = mmap.const_data();
char const *last = first + mmap.size();
#else
typedef char const* It;
std::ifstream in(spec.c_str());
in.unsetf(std::ios::skipws);
std::string v(std::istreambuf_iterator<char>(in.rdbuf()), std::istreambuf_iterator<char>());
It first = &v[0];
It last = first+v.size();
#endif
typedef lex::lexertl::token<It /*, boost::mpl::vector<char, unsigned int, std::string> */> token_type;
typedef lex::lexertl::actor_lexer<token_type> lexer_type;
typedef mylexer_t<lexer_type>::iterator_type iterator_type;
try
{
static mylexer_t<lexer_type> mylexer;
static my_grammar_t<iterator_type> parser(mylexer);
auto iter = mylexer.begin(first, last);
auto end = mylexer.end();
bool r = qi::parse(iter, end, parser);
r = r && (iter == end);
if (!r)
std::cerr << spec << ": parsing failed at: \"" << std::string(first, last) << "\"\n";
return r;
}
catch (const qi::expectation_failure<iterator_type>& e)
{
std::cerr << "FIXME: expected " << e.what_ << ", got '";
for (auto it=e.first; it!=e.last; it++)
std::cerr << showtoken(it->value());
std::cerr << "'" << std::endl;
return false;
}
}
int main()
{
if (parsefile("input.bin"))
return 0;
return 1;
}
For the variant:
typedef boost::spirit::istream_iterator It;
std::ifstream in(spec.c_str());
in.unsetf(std::ios::skipws);
It first(in), last;

Related

Boost Spirit X3: Parsing (some) whitespace into an enum

I have a parser in which I want to capture certain types of whitespace as enum values and preserve the spaces for the "text" values.
My whitespace parser is pretty basic (Note: I've only added the pipe character here for test/dev purposes):
struct whitespace_p : x3::symbols<Whitespace>
{
whitespace_p()
{
add
("\n", Whitespace::NEWLINE)
("\t", Whitespace::TAB)
("|", Whitespace::PIPE)
;
}
} whitespace;
And I want to capture everything either into my enum or into std::strings:
struct Element : x3::variant<Whitespace, std::string>
{
using base_type::base_type;
using base_type::operator=;
};
And to parse my input I use something like this:
const auto contentParser
= x3::rule<class ContentParserID, Element, true> { "contentParser" }
= x3::no_skip[+(x3::char_ - (whitespace))]
| whitespace
;
using Elements = std::vector<Element>;
const auto elementsParser
= x3::rule<class ContentParserID, Elements, true> { "elementsParser" }
= contentParser >> *(contentParser);
The problem though is that the parser stops at the first tab or newline it hits.
Code: http://coliru.stacked-crooked.com/a/d2cda4ce721279a4
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
#include <iostream>
namespace x3 = boost::spirit::x3;
enum Whitespace
{
NEWLINE,
TAB,
PIPE
};
struct whitespace_p : x3::symbols<Whitespace>
{
whitespace_p()
{
add
("\n", Whitespace::NEWLINE)
("\t", Whitespace::TAB)
("|", Whitespace::PIPE)
;
}
} whitespace;
struct Element : x3::variant<Whitespace, std::string>
{
using base_type::base_type;
using base_type::operator=;
};
const auto contentParser
= x3::rule<class ContentParserID, Element, true> { "contentParser" }
= x3::no_skip[+(x3::char_ - (whitespace))]
| whitespace
;
using Elements = std::vector<Element>;
const auto elementsParser
= x3::rule<class ContentParserID, Elements, true> { "elementsParser" }
= contentParser >> *(contentParser);
struct print_visitor
: public boost::static_visitor<std::string>
{
std::string operator()(const Whitespace& ws) const
{
if (ws == Whitespace::NEWLINE)
{
return "newline";
}
else if (ws == Whitespace::PIPE)
{
return "pipe";
}
else
{
return "tab";
}
}
std::string operator()(const std::string& str) const
{
return str;
}
};
int main()
{
const std::string text = "Hello \n World";
std::string::const_iterator start = std::begin(text);
const std::string::const_iterator stop = std::end(text);
Elements elements{};
bool result =
phrase_parse(start, stop, elementsParser, x3::ascii::space, elements);
if (!result)
{
std::cout << "failed to parse!\n";
}
else if (start != stop)
{
std::cout << "unparsed: " << std::string{start, stop} << '\n';
}
else
{
for (const auto& e : elements)
{
std::cout << "element: [" << boost::apply_visitor(print_visitor{}, e) << "]\n";
}
}
}
If I parse the text Hello | World then I get the results I'm expecting. But if I instead use Hello \n World the whitespace after the \n is swallowed and the World is never parsed. Ideally I'd like to see this output:
element: [Hello ]
element: [newline]
element: [ World]
How can I accomplish this? Thank you!
My goto reference on skipper issues: Boost spirit skipper issues
In this case you made it work with no_skip[]. That's correct.
no_skip is like lexeme except it doesn't pre-skip, from the source (boost/spirit/home/x3/directive/no_skip.hpp):
// same as lexeme[], but does not pre-skip
Alternative Take
In your case I would flip the logic: just adjust the skipper itself.
Also, don't supply the skipper with phrase_parse, because your grammar is highly sensitive to the correct value of the skipper.
Your whole grammar could be:
const auto p = x3::skip(x3::space - whitespace) [
*(+x3::graph | whitespace)
];
Here's a Live Demo On Coliru
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
#include <iostream>
#include <iomanip>
namespace x3 = boost::spirit::x3;
enum Whitespace { NEWLINE, TAB, PIPE };
struct whitespace_p : x3::symbols<Whitespace> {
whitespace_p() {
add
("\n", Whitespace::NEWLINE)
("\t", Whitespace::TAB)
("|", Whitespace::PIPE)
;
}
} static const whitespace;
struct Element : x3::variant<Whitespace, std::string> {
using base_type::base_type;
using base_type::operator=;
};
using Elements = std::vector<Element>;
static inline std::ostream& operator<<(std::ostream& os, Element const& el) {
struct print_visitor {
std::ostream& os;
auto& operator()(Whitespace ws) const {
switch(ws) {
case Whitespace::NEWLINE: return os << "[newline]";
case Whitespace::PIPE: return os << "[pipe]";
case Whitespace::TAB: return os << "[tab]";
}
return os << "?";
}
auto& operator()(const std::string& str) const { return os << std::quoted(str); }
} vis{os};
return boost::apply_visitor(vis, el);
}
int main() {
std::string const text = "\tHello \n World";
auto start = begin(text), stop = end(text);
const auto p = x3::skip(x3::space - whitespace) [
*(+x3::graph | whitespace)
];
Elements elements;
if (!parse(start, stop, p, elements)) {
std::cout << "failed to parse!\n";
} else {
std::copy(begin(elements), end(elements), std::ostream_iterator<Element>(std::cout, "\n"));
}
if (start != stop) {
std::cout << "unparsed: " << std::quoted(std::string(start, stop)) << '\n';
}
}
Prints
[tab]
"Hello"
[newline]
"World"
Even Simpler?
It doesn't seem like you'd need any skipper here at all. Why not:
const auto p = *(+~x3::char_("\n\t|") | whitespace);
While we're at it, there's no need for symbols to map enums:
struct Element : x3::variant<char, std::string> {
// ...
};
using Elements = std::vector<Element>;
And then
const auto p
= x3::rule<struct ID, Element> {}
= +~x3::char_("\n\t|") | x3::char_;
Live On Coliru
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
#include <iostream>
#include <iomanip>
namespace x3 = boost::spirit::x3;
struct Element : x3::variant<char, std::string> {
using variant = x3::variant<char, std::string>;
using variant::variant;
using variant::operator=;
friend std::ostream& operator<<(std::ostream& os, Element const& el) {
struct print_visitor {
std::ostream& os;
auto& operator()(char ws) const {
switch(ws) {
case '\n': return os << "[newline]";
case '\t': return os << "[pipe]";
case '|': return os << "[tab]";
}
return os << "?";
}
auto& operator()(const std::string& str) const { return os << std::quoted(str); }
} vis{os};
return boost::apply_visitor(vis, el);
}
};
using Elements = std::vector<Element>;
int main() {
std::string const text = "\tHello \n World";
auto start = begin(text);
auto const stop = end(text);
Elements elements;
const auto p
= x3::rule<struct ID, Element> {}
= +~x3::char_("\n\t|") | x3::char_;
if (!parse(start, stop, *p, elements)) {
std::cout << "failed to parse!\n";
} else {
std::copy(begin(elements), end(elements), std::ostream_iterator<Element>(std::cout, "\n"));
}
if (start != stop) {
std::cout << "unparsed: " << std::quoted(std::string(start, stop)) << '\n';
}
}
Prints
[pipe]
"Hello "
[newline]
" World"
The problems are that you are using a phrase_parser instead of a parser at line 76.
Try to use something like
bool result =
parse(start, stop, elementsParser, elements);
Your phrase_parser was instructed to skip spaces, what you really don't want.
Look the first answer of How to use boost::spirit to parse a sequence of words into a vector?

boost::spirit::qi strange attribute print out

#include <iostream>
#include <string>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix.hpp>
namespace qi = boost::spirit::qi;
namespace test {
template< typename Rng,typename Expr >
bool parse(Rng const& rng,Expr const& expr) noexcept {
auto itBegin = boost::begin(rng);
auto itEnd = boost::end(rng);
try {
return qi::parse(itBegin,itEnd,expr);
} catch(qi::expectation_failure<decltype(itBegin)> const& exfail) {
exfail;
return false;
}
}
template< typename Rng,typename Expr,typename Attr >
bool parse(Rng const& rng,Expr const& expr,Attr& attr) noexcept {
auto itBegin = boost::begin(rng);
auto itEnd = boost::end(rng);
try {
return qi::parse(itBegin,itEnd,expr,attr);
} catch(qi::expectation_failure<decltype(itBegin)> const&) {
return false;
}
}
}
void print1(std::string const& s) {
std::cout<<"n1 = "<<s<<std::endl;
}
void print2(std::string const& s) {
std::cout<<"n2 = "<<s<<std::endl;
}
int main() {
qi::rule<std::string::const_iterator, std::string()> number = +qi::digit;
std::string input = "1+2";
std::string result;
if( test::parse(input, number[print1] >> *( qi::char_("+-") >> number[print2]) >> qi::eoi, result) ) {
std::cout<<"Match! result = "<<result<<std::endl;
} else {
std::cout<<"Not match!"<<std::endl;
}
return 0;
}
I'm expecting the output of this program is,
n1 = 1
n2 = 2
Match! result = 1+2
But the output is actually very strange for n2,
n1 = 1
n2 = 1+2
Match! result = 1+2
Why is the second number's attribute "1+2" instead of just "2"?
I know there are some other ways to parse this expression such as using qi::int_. I'm just wondering why do I get this strange attribute from it. Thanks!
Backtracking doesn't undo changes to container attributes. Adjacent compatible parser expressions bind to the same container attribute.
Both attributes get bound to the same container attribute (result) which means you are printing the same variable twice.
If you didn't want that, be explicit, e.g.
Live On Coliru
std::string result;
std::vector<std::string> v;
if( test::parse(input, number[px::bind(print1, qi::_1)] >> *qi::as_string[qi::char_("+-") >> number[print2]] >> qi::eoi, result, v) ) {
std::cout<<"Match! result = "<<result<<std::endl;
for (auto s : v)
std::cout << s << "\n";
} else {
Prints
n1 = 1
n2 = +2
Match! result = 1
+2
Now I don't know what you want to achieve, but this could be close:
if (test::parse(input, qi::raw [number[print_("n1",_1)] > *(qi::char_("+-") >> number[print_("n2",_1)]) ] >> qi::eoi, result)) {
Live On Coliru
#include <iostream>
#include <string>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix.hpp>
namespace qi = boost::spirit::qi;
namespace px = boost::phoenix;
namespace test {
template< typename Rng,typename Expr,typename... Attr >
bool parse(Rng const& rng,Expr const& expr,Attr&... attr) noexcept {
auto itBegin = boost::begin(rng);
auto itEnd = boost::end(rng);
try {
return qi::parse(itBegin,itEnd,expr,attr...);
} catch(qi::expectation_failure<decltype(itBegin)> const&) {
return false;
}
}
}
void printn(std::string const& label, std::string const& s) {
std::cout << label << " = " << s << std::endl;
}
BOOST_PHOENIX_ADAPT_FUNCTION(void, print_, printn, 2)
int main() {
qi::rule<std::string::const_iterator, std::string()> number = +qi::digit;
std::string input = "1+2";
std::string result;
using qi::_1;
if (test::parse(input, qi::raw [number[print_("n1",_1)] > *(qi::char_("+-") >> number[print_("n2",_1)]) ] >> qi::eoi, result)) {
std::cout<<"Match! result = "<<result<<std::endl;
} else {
std::cout<<"Not match!"<<std::endl;
}
return 0;
}
Prints
n1 = 1
n2 = 2
Match! result = 1+2
BONUS
A little more separation of concern:
Live On Coliru
void printn(int n, std::string const& s) {
std::cout << "n" << n << " = " << s << std::endl;
}
BOOST_PHOENIX_ADAPT_FUNCTION(void, print_, printn, 2)
int main() {
qi::rule<std::string::const_iterator, std::string()> number;
int n = 1;
number %= qi::as_string[+qi::digit] [print_(px::ref(n)++, qi::_1)];
std::string input = "1+2";
std::string result;
if (test::parse(input, qi::raw [number > *(qi::char_("+-") >> number) ] >> qi::eoi, result)) {
std::cout << "Match! result = " << result << std::endl;
} else {
std::cout << "Not match!" << std::endl;
}
return 0;
}

How to stop string concatenation in Spirit Qi 'repeat' parser?

I would like to split a string into parts:
input = "part1/part2/part3/also3"
and fill the structure that consist of three std::string with these parts.
struct strings
{
std::string a; // <- part1
std::string b; // <- part2
std::string c; // <- part3/also3
};
However my parser seems to merge the parts together and store it into the first std::string.
Here is the code on coliru
#include <iostream>
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapted.hpp>
namespace qi = ::boost::spirit::qi;
struct strings
{
std::string a;
std::string b;
std::string c;
};
BOOST_FUSION_ADAPT_STRUCT(strings,
(std::string, a) (std::string, b) (std::string, c))
template <typename It>
struct split_string_grammar: qi::grammar<It, strings ()>
{
split_string_grammar (int parts)
: split_string_grammar::base_type (split_string)
{
assert (parts > 0);
using namespace qi;
split_string = repeat (parts-1) [part > '/'] > last_part;
part = +(~char_ ("/"));
last_part = +char_;
BOOST_SPIRIT_DEBUG_NODES ((split_string) (part) (last_part))
}
private:
qi::rule<It, strings ()> split_string;
qi::rule<It, std::string ()> part, last_part;
};
int main ()
{
std::string const input { "one/two/three/four" };
auto const last = input.end ();
auto first = input.begin ();
// split into 3 parts.
split_string_grammar<decltype (first)> split_string (3);
strings ss;
bool ok = qi::parse (first, last, split_string, ss);
std::cout << "Parsed: " << ok << "\n";
if (ok) {
std::cout << "a:" << ss.a << "\n";
std::cout << "b:" << ss.b << "\n";
std::cout << "c:" << ss.c << "\n";
}
}
The output is:
Parsed: 1
a:onetwo
b:three/four
c:
while I expected:
Parsed: 1
a:one
b:two
c:three/four
I'd like not to modify the grammar heavily and leave "repeat" statement in it, because the "real" grammar is much more complex of course and I will need to have it there. Just need to find the way to disable the concatenations. I tried
repeat (parts-1) [as_string[part] > '/']
but that does not compile.
The trouble here is specifically that qi::repeat is documented to expose a container of element-types.
Now, because the exposed attribute type of the rule (strings) is not a container-type, Spirit "knows" how to flatten the values.
Of course it's not what you wanted in this case, but usually this heuristic makes for really convenient accumulation of string values.
Fix 1: use a container attribute
You could witness the reverse fix by getting rid of the non-container (sequence) target attribute:
Live On Coliru
//#define BOOST_SPIRIT_DEBUG
#include <iostream>
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapted.hpp>
namespace qi = ::boost::spirit::qi;
using strings = std::vector<std::string>;
template <typename It>
struct split_string_grammar: qi::grammar<It, strings ()>
{
split_string_grammar (int parts)
: split_string_grammar::base_type (split_string)
{
assert (parts > 0);
using namespace qi;
split_string = repeat (parts-1) [part > '/']
> last_part
;
part = +(~char_ ("/"))
;
last_part = +char_
;
BOOST_SPIRIT_DEBUG_NODES ((split_string) (part) (last_part))
}
private:
qi::rule<It, strings ()> split_string;
qi::rule<It, std::string ()> part, last_part;
};
int main ()
{
std::string const input { "one/two/three/four" };
auto const last = input.end ();
auto first = input.begin ();
// split into 3 parts.
split_string_grammar<decltype (first)> split_string (3);
strings ss;
bool ok = qi::parse (first, last, split_string, ss);
std::cout << "Parsed: " << ok << "\n";
if (ok) {
for(auto i = 0ul; i<ss.size(); ++i)
std::cout << static_cast<char>('a'+i) << ":" << ss[i] << "\n";
}
}
What you really wanted:
Of course you want to keep the struct/sequence adaptation (?); In this case that's really tricky because as soon as you use any kind of Kleene operator (*,%) or qi::repeat you'll have the attribute transformation rules as outlined above, ruining your mood.
Luckily, I just remembered I have a hacky solution based on the auto_ parser. Note the caveat in this older answer though:
Read empty values with boost::spirit
CAVEAT Specializing for std::string directly like this might not be the best idea (it might not always be appropriate and might interact badly with other parsers).
By default create_parser<std::string> is not defined, so you might decide this usage is good enough for your case:
Live On Coliru
#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/include/qi.hpp>
namespace qi = boost::spirit::qi;
struct strings {
std::string a;
std::string b;
std::string c;
};
namespace boost { namespace spirit { namespace traits {
template <> struct create_parser<std::string> {
typedef proto::result_of::deep_copy<
BOOST_TYPEOF(
qi::lexeme [+(qi::char_ - '/')] | qi::attr("(unspecified)")
)
>::type type;
static type call() {
return proto::deep_copy(
qi::lexeme [+(qi::char_ - '/')] | qi::attr("(unspecified)")
);
}
};
}}}
BOOST_FUSION_ADAPT_STRUCT(strings, (std::string, a)(std::string, b)(std::string, c))
template <typename Iterator>
struct google_parser : qi::grammar<Iterator, strings()> {
google_parser() : google_parser::base_type(entry, "contacts") {
using namespace qi;
entry =
skip('/') [auto_]
;
}
private:
qi::rule<Iterator, strings()> entry;
};
int main() {
using It = std::string::const_iterator;
google_parser<It> p;
std::string const input = "part1/part2/part3/also3";
It f = input.begin(), l = input.end();
strings ss;
bool ok = qi::parse(f, l, p >> *qi::char_, ss, ss.c);
if (ok)
{
std::cout << "a:" << ss.a << "\n";
std::cout << "b:" << ss.b << "\n";
std::cout << "c:" << ss.c << "\n";
}
else
std::cout << "Parse failed\n";
if (f!=l)
std::cout << "Remaining unparsed: '" << std::string(f,l) << "'\n";
}
Prints
a:part1
b:part2
c:part3/also3
Update/Bonus
In reponse to the OP's own answer I wanted to challenge myself to write it more generically indeed.
The main thing is to to write set_field_ in such a way that it doesn't know/assume more than required about the destination sequence type.
With a bit of Boost Fusion magic that became:
struct set_field_
{
template <typename Seq, typename Value>
void operator() (Seq& seq, Value const& src, unsigned idx) const {
fus::fold(seq, 0u, Visit<Value> { idx, src });
}
private:
template <typename Value>
struct Visit {
unsigned target_idx;
Value const& value;
template <typename B>
unsigned operator()(unsigned i, B& dest) const {
if (target_idx == i) {
boost::spirit::traits::assign_to(value, dest);
}
return i + 1;
}
};
};
It has the added flexibility of applying Spirit's attribute compatibility rules¹. So, you can use the same grammar with both the following types:
struct strings {
std::string a, b, c;
};
struct alternative {
std::vector<char> first;
std::string second;
std::string third;
};
To drive the point home, I made the adaptation of the second struct reverse the field order:
BOOST_FUSION_ADAPT_STRUCT(strings, a, b, c)
BOOST_FUSION_ADAPT_STRUCT(alternative, third, second, first) // REVERSE ORDER :)
Without further ado, the demo program:
Live On Coliru
#define BOOST_SPIRIT_USE_PHOENIX_V3
#define BOOST_RESULT_OF_USE_DECLTYPE
#include <boost/fusion/adapted.hpp>
#include <boost/fusion/algorithm/iteration.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
namespace qi = boost::spirit::qi;
namespace fus = boost::fusion;
struct strings {
std::string a, b, c;
};
struct alternative {
std::vector<char> first;
std::string second;
std::string third;
};
BOOST_FUSION_ADAPT_STRUCT(strings, a, b, c)
BOOST_FUSION_ADAPT_STRUCT(alternative, third, second, first) // REVERSE ORDER :)
// output helpers for demo:
namespace {
inline std::ostream& operator<<(std::ostream& os, strings const& data) {
return os
<< "a:\"" << data.a << "\" "
<< "b:\"" << data.b << "\" "
<< "c:\"" << data.c << "\" ";
}
inline std::ostream& operator<<(std::ostream& os, alternative const& data) {
os << "first: vector<char> { \""; os.write(&data.first[0], data.first.size()); os << "\" } ";
os << "second: \"" << data.second << "\" ";
os << "third: \"" << data.third << "\" ";
return os;
}
}
struct set_field_
{
template <typename Seq, typename Value>
void operator() (Seq& seq, Value const& src, unsigned idx) const {
fus::fold(seq, 0u, Visit<Value> { idx, src });
}
private:
template <typename Value>
struct Visit {
unsigned target_idx;
Value const& value;
template <typename B>
unsigned operator()(unsigned i, B& dest) const {
if (target_idx == i) {
boost::spirit::traits::assign_to(value, dest);
}
return i + 1;
}
};
};
boost::phoenix::function<set_field_> const set_field = {};
template <typename It, typename Target>
struct split_string_grammar: qi::grammar<It, Target(), qi::locals<unsigned> >
{
split_string_grammar (int parts)
: split_string_grammar::base_type (split_string)
{
assert (parts > 0);
using namespace qi;
using boost::phoenix::val;
_a_type _current; // custom placeholder
split_string =
eps [ _current = 0u ]
> repeat (parts-1)
[part [ set_field(_val, _1, _current++) ] > '/']
> last_part [ set_field(_val, _1, _current++) ];
part = +(~char_ ("/"));
last_part = +char_;
BOOST_SPIRIT_DEBUG_NODES ((split_string) (part) (last_part))
}
private:
qi::rule<It, Target(), qi::locals<unsigned> > split_string;
qi::rule<It, std::string()> part, last_part;
};
template <size_t N = 3, typename Target>
void run_test(Target target) {
using It = std::string::const_iterator;
std::string const input { "one/two/three/four" };
It first = input.begin(), last = input.end();
split_string_grammar<It, Target> split_string(N);
bool ok = qi::parse (first, last, split_string, target);
if (ok) {
std::cout << target << '\n';
} else {
std::cout << "Parse failed\n";
}
if (first != last)
std::cout << "Remaining input left unparsed: '" << std::string(first, last) << "'\n";
}
int main ()
{
run_test(strings {});
run_test(alternative {});
}
Output:
a:"one" b:"two" c:"three/four"
first: vector<char> { "three/four" } second: "two" third: "one"
¹ as with BOOST_SPIRIT_ACTIONS_ALLOW_ATTR_COMPAT
Besides sehe's suggestions one more possible way is to use semantic actions (coliru):
struct set_field_
{
void operator() (strings& dst, std::string const& src, unsigned& idx) const
{
assert (idx < 3);
switch (idx++) {
case 0: dst.a = src; break;
case 1: dst.b = src; break;
case 2: dst.c = src; break;
}
}
};
boost::phoenix::function<set_field_> const set_field { set_field_ {} };
template <typename It>
struct split_string_grammar: qi::grammar<It, strings (), qi::locals<unsigned> >
{
split_string_grammar (int parts)
: split_string_grammar::base_type (split_string)
{
assert (parts > 0);
using namespace qi;
using boost::phoenix::val;
split_string = eps [ _a = val (0) ]
> repeat (parts-1) [part [ set_field (_val, _1, _a) ] > '/']
> last_part [ set_field (_val, _1, _a) ];
part = +(~char_ ("/"));
last_part = +char_;
BOOST_SPIRIT_DEBUG_NODES ((split_string) (part) (last_part))
}
private:
qi::rule<It, strings (), qi::locals<unsigned> > split_string;
qi::rule<It, std::string ()> part, last_part;
};

How to split a string and get I want in c++?

There is a string like this: M90I4D7
I need to push it in to this kind of struct:
struct CigarOp {
char Type; //!< CIGAR operation type (MIDNSHPX=)
uint32_t Length; //!< CIGAR operation length (number of bases)
//! constructor
CigarOp(const char type = '\0',
const uint32_t& length = 0)
: Type(type)
, Length(length)
{ }
};
which means I need to split it into 3 groups and each of them is a CigarOp( 'M' ,90 'I', 4 'D' ,7 )
Assuming that the string is of the form ([A-Z][0-9]+)*, you could quite simply do something like this:
#include <sstream>
...
std::vector<CigarOp> cigars;
std::istringstream parser("M90I4D7");
char c;
std::uint32_t l;
while(parser >> c >> l) {
cigars.push_back(CigarOp(c, l));
}
Note that this code doesn't do any sort of validation. If validation is necessary, one way to achieve it is to use Boost.Spirit (found on http://boost.org):
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/adapted/struct.hpp>
#include <cstdint>
#include <iostream>
struct CigarOp {
char Type;
std::uint32_t Length;
};
BOOST_FUSION_ADAPT_STRUCT(CigarOp, (char, Type) (std::uint32_t, Length))
int main() {
using boost::spirit::qi::phrase_parse;
using boost::spirit::qi::char_;
using boost::spirit::qi::uint_;
using boost::spirit::qi::standard::space;
std::vector<CigarOp> cigars;
std::string s = "M90I4D7";
std::string::const_iterator first = s.begin(), last = s.end();
bool r = phrase_parse(first, last, *(char_ >> uint_), space, cigars);
if(r && first == last) {
// string was well-formed
for(auto const &cigar : cigars) {
std::cout << cigar.Type << ", " << cigar.Length << '\n';
}
}
}
how about:
#include <cstdio>
#include <cctype>
#include <vector>
#include <iostream>
#include <cstdlib>
using namespace std;
struct CigarOp {
char op; //!< CIGAR operation type (MIDNSHPX=)
int size; //!< CIGAR operation length (number of bases)
static int parse(const char* s,vector<CigarOp>& v)
{
char* p=(char*)(s);
while(*p!=0)
{
char* endptr;
CigarOp c;
c.op = *p;
if(!isalpha(c.op)) return -1;
++p;
if(!isdigit(*p)) return -1;
c.size =strtol(p,&endptr,10);
if(c.size<=0) return -1;
v.push_back(c);
p=endptr;
}
return 0;
}
};
int main(int argc,char** argv)
{
vector<CigarOp> cigar;
if(CigarOp::parse("M90I4D7",cigar)!=0) return -1;
for(size_t i=0;i< cigar.size();++i)
{
cout << cigar[i].op << ":" << cigar[i].size << endl;
}
return 0;
}
btw , for bioinformatics, you should ask biostars.org.

Why msgpack-c++ do not support vector<float> or vector<double> in my case?

See my code below:(you can compile the code with: g++-4.7 demo.cpp -std=c++11 -lmsgpack)
#include <iostream>
#include <vector>
#include <sstream>
#include <string>
#include <msgpack.hpp>
using namespace std;
template<class T>
void pack(T &t, string &str)
{
using namespace msgpack;
sbuffer buff;
pack(buff, t);
ostringstream is;
is << buff.size() << buff.data();
str = string(is.str());
}
template<class T>
T unpack(string &str)
{
using namespace msgpack;
unpacked result;
istringstream ss(str);
int len;
string buff;
ss >> len >> buff;
unpack(&result, buff.c_str(), len);
auto obj = result.get();
return obj.as<T>();
}
int main(int argc, char *argv[]) {
vector<float> t = {1., 2., 3., 4., 5.};
string s;
pack(t, s);
auto r = unpack<vector<float> >(s);
for(auto & v : r)
std::cout << v << std::endl;
// vector<int> is right
/*
vector<int> t = {1, 2, 3, 4, 5};
string s;
pack(t, s);
auto r = unpack<vector<int> >(s);
for(auto & v : r)
std::cout << v << std::endl;
*/
return 0;
}
There is a strange bug that 'vector', 'vector', 'int', 'double' can work in the above encapsulation, while 'vector', 'vector' can not.
runtime error shows below:
terminate called after throwing an instance of 'msgpack::type_error'
what(): std::bad_cast
but in the below encapsulation, any type can work well:
template<class T>
void pack(T &t, msgpack::sbuffer sbuf) {
pack(&sbuf, t);
}
template<class T>
T unpack(const msgpack::sbuffer & sbuf) {
msgpack::unpacked msg;
msgpack::unpack(&msg, sbuf.data(), sbuf.size());
auto obj = msg.get();
return obj.as<t>();
}
What's is the problem in my first code?
Thanks!
msgpack binary output can include null bytes, like it happens in your case. is […] << buff.data(); will only output first 4 bytes in your case.
To fix the error replace is << buff.size() << buff.data(); with is << buff.size() << string(buff.data(), buff.size());.