#include <iostream>
#include <string>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix.hpp>
namespace qi = boost::spirit::qi;
namespace test {
template< typename Rng,typename Expr >
bool parse(Rng const& rng,Expr const& expr) noexcept {
auto itBegin = boost::begin(rng);
auto itEnd = boost::end(rng);
try {
return qi::parse(itBegin,itEnd,expr);
} catch(qi::expectation_failure<decltype(itBegin)> const& exfail) {
exfail;
return false;
}
}
template< typename Rng,typename Expr,typename Attr >
bool parse(Rng const& rng,Expr const& expr,Attr& attr) noexcept {
auto itBegin = boost::begin(rng);
auto itEnd = boost::end(rng);
try {
return qi::parse(itBegin,itEnd,expr,attr);
} catch(qi::expectation_failure<decltype(itBegin)> const&) {
return false;
}
}
}
void print1(std::string const& s) {
std::cout<<"n1 = "<<s<<std::endl;
}
void print2(std::string const& s) {
std::cout<<"n2 = "<<s<<std::endl;
}
int main() {
qi::rule<std::string::const_iterator, std::string()> number = +qi::digit;
std::string input = "1+2";
std::string result;
if( test::parse(input, number[print1] >> *( qi::char_("+-") >> number[print2]) >> qi::eoi, result) ) {
std::cout<<"Match! result = "<<result<<std::endl;
} else {
std::cout<<"Not match!"<<std::endl;
}
return 0;
}
I'm expecting the output of this program is,
n1 = 1
n2 = 2
Match! result = 1+2
But the output is actually very strange for n2,
n1 = 1
n2 = 1+2
Match! result = 1+2
Why is the second number's attribute "1+2" instead of just "2"?
I know there are some other ways to parse this expression such as using qi::int_. I'm just wondering why do I get this strange attribute from it. Thanks!
Backtracking doesn't undo changes to container attributes. Adjacent compatible parser expressions bind to the same container attribute.
Both attributes get bound to the same container attribute (result) which means you are printing the same variable twice.
If you didn't want that, be explicit, e.g.
Live On Coliru
std::string result;
std::vector<std::string> v;
if( test::parse(input, number[px::bind(print1, qi::_1)] >> *qi::as_string[qi::char_("+-") >> number[print2]] >> qi::eoi, result, v) ) {
std::cout<<"Match! result = "<<result<<std::endl;
for (auto s : v)
std::cout << s << "\n";
} else {
Prints
n1 = 1
n2 = +2
Match! result = 1
+2
Now I don't know what you want to achieve, but this could be close:
if (test::parse(input, qi::raw [number[print_("n1",_1)] > *(qi::char_("+-") >> number[print_("n2",_1)]) ] >> qi::eoi, result)) {
Live On Coliru
#include <iostream>
#include <string>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix.hpp>
namespace qi = boost::spirit::qi;
namespace px = boost::phoenix;
namespace test {
template< typename Rng,typename Expr,typename... Attr >
bool parse(Rng const& rng,Expr const& expr,Attr&... attr) noexcept {
auto itBegin = boost::begin(rng);
auto itEnd = boost::end(rng);
try {
return qi::parse(itBegin,itEnd,expr,attr...);
} catch(qi::expectation_failure<decltype(itBegin)> const&) {
return false;
}
}
}
void printn(std::string const& label, std::string const& s) {
std::cout << label << " = " << s << std::endl;
}
BOOST_PHOENIX_ADAPT_FUNCTION(void, print_, printn, 2)
int main() {
qi::rule<std::string::const_iterator, std::string()> number = +qi::digit;
std::string input = "1+2";
std::string result;
using qi::_1;
if (test::parse(input, qi::raw [number[print_("n1",_1)] > *(qi::char_("+-") >> number[print_("n2",_1)]) ] >> qi::eoi, result)) {
std::cout<<"Match! result = "<<result<<std::endl;
} else {
std::cout<<"Not match!"<<std::endl;
}
return 0;
}
Prints
n1 = 1
n2 = 2
Match! result = 1+2
BONUS
A little more separation of concern:
Live On Coliru
void printn(int n, std::string const& s) {
std::cout << "n" << n << " = " << s << std::endl;
}
BOOST_PHOENIX_ADAPT_FUNCTION(void, print_, printn, 2)
int main() {
qi::rule<std::string::const_iterator, std::string()> number;
int n = 1;
number %= qi::as_string[+qi::digit] [print_(px::ref(n)++, qi::_1)];
std::string input = "1+2";
std::string result;
if (test::parse(input, qi::raw [number > *(qi::char_("+-") >> number) ] >> qi::eoi, result)) {
std::cout << "Match! result = " << result << std::endl;
} else {
std::cout << "Not match!" << std::endl;
}
return 0;
}
Related
I am trying to parse a sequence of characters separated by a "," into an std::map<char,int> of pairs where the key is the character and the value just the a count of parsed characters.
For example, if the input is
a,b,c
The map should contain the pairs:
(a,1) , (b,2) , (c,3)
Here's the code I am using :
namespace myparser
{
std::map<int, std::string> mapping;
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace phoenix = boost::phoenix;
int i = 0;
template <typename Iterator>
bool parse_numbers(Iterator first, Iterator last, std::map<char,int>& v)
{
using qi::double_;
using qi::char_;
using qi::phrase_parse;
using qi::_1;
using ascii::space;
using phoenix::push_back;
bool r = phrase_parse(first, last,
// Begin grammar
(
char_[v.insert(std::make_pair(_1,0)]
>> *(',' >> char_[v.insert(std::make_pair(_1,0)])
)
,
// End grammar
space);
if (first != last) // fail if we did not get a full match
return false;
return r;
}
//]
}
Then I try to print the pair in main like this:
int main() {
std::string str;
while (getline(std::cin, str))
{
if (str.empty() || str[0] == 'q' || str[0] == 'Q')
break;
std::map<char,int> v;
std::map<std::string, int>::iterator it = v.begin();
if (myparser::parse_numbers(str.begin(), str.end(), v))
{
std::cout << "-------------------------\n";
std::cout << "Parsing succeeded\n";
std::cout << str << " Parses OK: " << std::endl;
while (it != v.end())
{
// Accessing KEY from element pointed by it.
std::string word = it->first;
// Accessing VALUE from element pointed by it.
int count = it->second;
std::cout << word << " :: " << count << std::endl;
// Increment the Iterator to point to next entry
it++;
}
std::cout << "\n-------------------------\n";
}
else
{
std::cout << "-------------------------\n";
std::cout << "Parsing failed\n";
std::cout << "-------------------------\n";
}
}
return 0;
}
I am a beginner and I don't know how to fix this code . I also want to use strings instead of characters so I enter a sequence of strings separated by a "," and store them in a map similar to the one mentioned above. I would appreciate any help !
You cannot use Phoenix place holders outside Phoenix deferred actors. E.g. the type of std::make_pair(qi::_1, 0) is std::pair<boost::phoenix::actor<boost::phoenix::argument<0>>, int>.
Nothing interoperates with such a thing. Certainly not std::map<>::insert.
What you'd need to do is wrap all the operations in semantic actions as Phoenix actors.
#include <boost/phoenix.hpp>
namespace px = boost::phoenix;
Then you can:
#include <boost/phoenix.hpp>
#include <boost/spirit/include/qi.hpp>
namespace qi = boost::spirit::qi;
namespace px = boost::phoenix;
namespace myparser {
using Map = std::map<char, int>;
template <typename Iterator>
bool parse_numbers(Iterator first, Iterator last, Map& m) {
auto action = px::insert(px::ref(m), px::end(px::ref(m)),
px::construct<std::pair<char, int>>(qi::_1, 0));
bool r = qi::phrase_parse( //
first, last,
// Begin grammar
qi::char_[action] >> *(',' >> qi::char_[action]),
// End grammar
qi::space);
return r && first == last;
}
} // namespace myparser
See it Live
Easy peasy. Right.
I spent half an hour on that thing debugging why it wouldn't work. Why is this so hard?
It's because someone invented a whole meta-DSL to write "normal C++" but with defferred execution. Back when that happened it was pretty neat, but it is the mother of all leaky abstractions, with razor sharp edges.
So, what's new? Using C++11 you could:
Live
template <typename Iterator>
bool parse_numbers(Iterator first, Iterator last, Map& m) {
struct action_f {
Map& m_;
void operator()(char ch) const { m_.emplace(ch, 0); }
};
px::function<action_f> action{{m}};
bool r = qi::phrase_parse( //
first, last,
// Begin grammar
qi::char_[action(qi::_1)] >> *(',' >> qi::char_[action(qi::_1)]),
// End grammar
qi::space);
return r && first == last;
}
Or using c++17:
Live
template <typename Iterator>
bool parse_numbers(Iterator first, Iterator last, Map& m) {
px::function action{[&m](char ch) { m.emplace(ch, 0); }};
bool r = qi::phrase_parse( //
first, last,
// Begin grammar
qi::char_[action(qi::_1)] >> *(',' >> qi::char_[action(qi::_1)]),
// End grammar
qi::space);
return r && first == last;
}
On a tangent, you probably wanted to count things, so, maybe use
Live
px::function action{[&m](char ch) { m[ch] += 1; }};
By this time, you could switch to Spirit X3 (which requires C++14):
Live
#include <boost/spirit/home/x3.hpp>
#include <map>
namespace x3 = boost::spirit::x3;
namespace myparser {
using Map = std::map<char, int>;
template <typename Iterator>
bool parse_numbers(Iterator first, Iterator last, Map& m) {
auto action = [&m](auto& ctx) { m[_attr(ctx)] += 1; };
return x3::phrase_parse( //
first, last,
// Begin grammar
x3::char_[action] >> *(',' >> x3::char_[action]) >> x3::eoi,
// End grammar
x3::space);
}
} // namespace myparser
Now finally, let's simplify. p >> *(',' >> p) is just a clumsy way of saying p % ',':
Live
template <typename Iterator>
bool parse_numbers(Iterator first, Iterator last, Map& m) {
auto action = [&m](auto& ctx) { m[_attr(ctx)] += 1; };
return x3::phrase_parse( //
first, last, //
x3::char_[action] % ',', //
x3::space);
}
And you wanted words, not characters:
Live
#include <boost/spirit/home/x3.hpp>
#include <map>
namespace x3 = boost::spirit::x3;
namespace myparser {
using Map = std::map<std::string, int>;
template <typename Iterator>
bool parse_numbers(Iterator first, Iterator last, Map& m) {
auto action = [&m](auto& ctx) { m[_attr(ctx)] += 1; };
auto word_ = (*~x3::char_(','))[action];
return phrase_parse(first, last, word_ % ',', x3::space);
}
} // namespace myparser
#include <iomanip>
#include <iostream>
int main() {
for (std::string const str : {"foo,c++ is strange,bar,qux,foo,c++ is strange ,cuz"}) {
std::map<std::string, int> m;
std::cout << "Parsing " << std::quoted(str) << std::endl;
if (myparser::parse_numbers(str.begin(), str.end(), m)) {
std::cout << m.size() << " words:\n";
for (auto& [word,count]: m)
std::cout << " - " << std::quoted(word) << " :: " << count << std::endl;
} else {
std::cerr << "Parsing failed\n";
}
}
}
Prints
Parsing "foo,c++ is strange,bar,qux,foo,c++ is strange ,cuz"
5 words:
- "bar" :: 1
- "c++isstrange" :: 2
- "cuz" :: 1
- "foo" :: 2
- "qux" :: 1
Note the behaviour of the x3::space (like qi::space and qi::ascii::space above).
I tried to run some simple parser that will parse [ 1, 11, 3, 6-4]. Basically, integer list with range notation.
I want to put everything into AST without semantic action. So I use x3::variant. My code 'seems' very similar to the expression example. However, it can't compile under g++ 6.2. It indeed compile ok with clang++ 6.0 but yield wrong result.
The boost version is 1.63.
It seems that I have some 'move' or initialization issue.
#include <iostream>
#include <list>
#include <vector>
#include <utility>
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/adapted/std_pair.hpp>
#include <boost/fusion/include/io.hpp>
namespace ns
{
namespace ast
{
namespace x3 = boost::spirit::x3;
// forward definition
class uintObj;
struct varVec;
// define type
using uintPair_t = std::pair<unsigned int, unsigned int>;
using uintVec_t = std::vector<uintObj>;
// general token value:
class uintObj : public x3::variant <
unsigned int,
uintPair_t
>
{
public:
using base_type::base_type;
using base_type::operator=;
};
struct varVec
{
uintVec_t valVector;
};
}
}
BOOST_FUSION_ADAPT_STRUCT(
ns::ast::varVec,
valVector
)
namespace ns
{
namespace parser
{
// namespace x3 = boost::spirit::x3;
// using namespace x3;
using namespace boost::spirit::x3;
// definition of the range pair:
rule<class uintPair, ast::uintPair_t> const uintPair = "uintPair";
auto const uintPair_def =
uint_
>> '-'
>> uint_
;
rule<class uintObj, ast::uintObj> const uintObj = "uintObj";
auto const uintObj_def =
uint_
| uintPair
;
// define rule definition : rule<ID, attrib>
// more terse definition :
// struct varVec_class;
// using varVec_rule_t = x3::rule<varVec_class, ast::varVec>;
// varVec_rule_t const varVec = "varVec";
// varVec is the rule, "varVec" is the string name of the rule.
rule<class varVec, ast::varVec> const varVec = "varVec";
auto const varVec_def =
'['
>> uintObj % ','
>> ']'
;
BOOST_SPIRIT_DEFINE(
varVec,
uintObj,
uintPair
);
}
}
int main()
{
std::string input ("[1, 11, 3, 6-4]\n");
std::string::const_iterator begin = input.begin();
std::string::const_iterator end = input.end();
ns::ast::varVec result; // ast tree
using ns::parser::varVec; // grammar
using boost::spirit::x3::ascii::space;
bool success = phrase_parse(begin, end, varVec, space, result);
if (success && begin == end)
std::cout << "good" << std::endl;
else
std::cout << "bad" << std::endl;
return 0;
}
Swap the alternative order for the uintObj_def
auto const uintObj_def =
uintPair
| uint_
;
The formulation you have now will always match on a uint_ because the uintPair begins with a valid uint_.
mjcaisse's answer calls out the main problem I think you had. There were a few missing pieces, so I decided to make a simplified version that shows parsing results:
Live On Wandbox
#include <iostream>
#include <iomanip>
//#include <boost/fusion/adapted.hpp>
//#include <boost/fusion/include/io.hpp>
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
namespace x3 = boost::spirit::x3;
namespace ns { namespace ast {
// forward definition
struct uintObj;
//struct varVec;
// define type
using uintPair_t = std::pair<unsigned int, unsigned int>;
using uintVec_t = std::vector<uintObj>;
// general token value:
struct uintObj : x3::variant<unsigned int, uintPair_t> {
using base_type::base_type;
using base_type::operator=;
friend std::ostream& operator<<(std::ostream& os, uintObj const& This) {
struct {
std::ostream& os;
void operator()(unsigned int v) const { os << v; }
void operator()(uintPair_t v) const { os << v.first << "-" << v.second; }
} vis { os };
boost::apply_visitor(vis, This);
return os;
}
};
using varVec = uintVec_t;
} }
namespace ns { namespace parser {
using namespace boost::spirit::x3;
template <typename T> auto as = [](auto p) { return rule<struct _, T> {} = p; };
auto const uintPair = as<ast::uintPair_t> ( uint_ >> '-' >> uint_ );
auto const uintObj = as<ast::uintObj> ( uintPair | uint_ );
auto const varVec = as<ast::varVec> ( '[' >> uintObj % ',' >> ']' );
} }
int main() {
using namespace ns;
std::string const input("[1, 11, 3, 6-4]\n");
auto begin = input.begin(), end = input.end();
ast::varVec result; // ast tree
bool success = phrase_parse(begin, end, parser::varVec, x3::ascii::space, result);
if (success) {
std::cout << "good\n";
for (auto& r : result)
std::cout << r << "\n";
}
else
std::cout << "bad\n";
if (begin != end)
std::cout << "Remaining unparsed: " << std::quoted(std::string(begin, end)) << std::endl;
}
Prints
good
1
11
3
6-4
In Boost Spirit QI it was easy to template the parser so that it could be instantiated for various attribute types. It is unclear to me how to do this with X3. Consider this stripped down version of the roman numerals parser example:
#include <iostream>
#include <iterator>
#include <string>
#include <boost/spirit/home/x3.hpp>
namespace parser {
namespace x3 = boost::spirit::x3;
struct numbers_ : x3::symbols<unsigned> {
numbers_() {
add
("I" , 1)
("II" , 2)
("III" , 3)
("IV" , 4)
("V" , 5)
("VI" , 6)
("VII" , 7)
("VIII" , 8)
("IX" , 9)
;
}
} numbers;
x3::rule<class roman, unsigned> const roman = "roman";
auto init = [](auto &x) { x3::_val(x) = 0; };
auto add = [](auto &x) { x3::_val(x) += x3::_attr(x); };
auto const roman_def = x3::eps [init] >> numbers [add];
BOOST_SPIRIT_DEFINE(roman);
}
int main()
{
std::string input = "V";
auto iter = input.begin();
auto end = input.end();
unsigned result;
bool r = parse(iter, end, parser::roman, result);
if (r && iter == end) {
std::cout << "Success :) Result = " << result << '\n';
} else {
std::cout << "Failed :(\n";
}
}
I'd like to template the parser on the attribute type which is currently hardcoded as unsigned. My first guess was to replace
namespace parser {
// ...
}
with
template < typename int_t >
struct parser {
// ...
};
which is obviously too naïve. How to do this correctly?
In X3 there's not so much pain in combining parsers dynamically. So I'd write your sample as:
template <typename Attribute>
auto make_roman() {
using namespace boost::spirit::x3;
struct numbers_ : symbols<Attribute> {
numbers_() { this-> add
("I", Attribute{1}) ("II", Attribute{2}) ("III", Attribute{3}) ("IV", Attribute{4})
("V", Attribute{5}) ("VI", Attribute{6}) ("VII", Attribute{7}) ("VIII", Attribute{8})
("IX", Attribute{9}) ;
}
} numbers;
return rule<class roman, Attribute> {"roman"} =
eps [([](auto &x) { _val(x) = 0; })]
>> numbers [([](auto &x) { _val(x) += _attr(x); })];
}
See it Live On Coliru
the BNF I implement has a funny rule where, depending on operator, the terms can be chained or event not at this production rule. Hence I use the same AST data structure since only the enumeration changes:
#include <boost/spirit/home/x3.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <iostream>
#include <string>
#include <list>
namespace ast
{
struct identifer {
int name;
};
struct expression {
struct chunk {
char operator_;
ast::identifer identifer;
};
ast::identifer identifer;
std::list<chunk> chunk_list;
};
}
BOOST_FUSION_ADAPT_STRUCT(ast::identifer,
name
)
BOOST_FUSION_ADAPT_STRUCT(ast::expression::chunk,
operator_, identifer
)
BOOST_FUSION_ADAPT_STRUCT(ast::expression,
identifer, chunk_list
)
namespace boost { namespace spirit { namespace x3 { namespace traits {
void move_to(ast::expression::chunk&& chunk, std::list<ast::expression::chunk>& chunk_list,
mpl::identity<container_attribute>)
{
chunk_list.emplace(chunk_list.end(), std::move(chunk));
}
} } } }
namespace parser
{
namespace x3 = boost::spirit::x3;
auto const identifier = x3::rule<struct _, int> { "identifier" } =
x3::int_;
auto const operator_1 = x3::rule<struct _, char> { "operator" } =
x3::char_("ABC");
auto const operator_2 = x3::rule<struct _, char> { "operator" } =
x3::char_("XYZ");
auto const expression_chunk_1 = x3::rule<struct _, ast::expression::chunk> { "expression" } =
operator_1 > identifier
;
auto const expression_chunk_2 = x3::rule<struct _, ast::expression::chunk> { "expression" } =
operator_2 > identifier
;
auto const expression = x3::rule<struct _, ast::expression> { "expression" } =
identifier >> *expression_chunk_1 // foo { and foo }
// rule below fails to compile
| identifier >> expression_chunk_2 // foo [ nand foo ]
;
}
struct visitor {
visitor(std::ostream& os) : os{ os } { }
void operator()(ast::expression const& node) {
os << "(";
(*this)(node.identifer);
for(auto const& chunk : node.chunk_list) {
os << "(" << chunk.operator_ << " ";
(*this)(chunk.identifer);
os << ")";
}
os << ")\n";
}
void operator()(ast::identifer const& node) {
os << "(" << node.name << ")";
}
std::ostream& os;
};
int main()
{
namespace x3 = boost::spirit::x3;
for(std::string const str: {
"1 X 2",
"3 A 4 A 5"
}) {
auto iter = str.begin(), end = str.end();
ast::expression attr;
bool r = x3::phrase_parse(iter, end, parser::expression, x3::space, attr);
std::cout << "parse '" << str << "': ";
if (r && iter == end) {
std::cout << "succeeded:\n";
visitor(std::cout)(attr);
} else {
std::cout << "*** failed ***\n";
}
}
return 0;
}
This was the idea - the operator X,Y,Z adds only one chunk to list. Following the compiler errors, I have to specialize x3::traits::move_to, but I don't found any solution to get this to compile. What is the wayy to do? is the list::emplace() and std::move() safe here?
I'd do without the trait. Instead, make the grammar result in a vector<T> artificially using repeat:
auto const expression = x3::rule<struct _, ast::expression> { "expression" } =
identifier >> *expression_chunk_1 // foo { and foo }
| identifier >> x3::repeat(1) [ expression_chunk_2 ] // foo [ nand foo ]
;
I want use Boost.Spirit.Lex to lex a binary file; for this purpose I wrote the following program (here is an extract):
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/support_multi_pass.hpp>
#include <boost/bind.hpp>
#include <boost/ref.hpp>
#include <fstream>
#include <iterator>
#include <string>
namespace spirit = boost::spirit;
namespace lex = spirit::lex;
#define X 1
#define Y 2
#define Z 3
template<typename L>
class word_count_tokens : public lex::lexer<L>
{
public:
word_count_tokens () {
this->self.add
("[^ \t\n]+", X)
("\n", Y)
(".", Z);
}
};
class counter
{
public:
typedef bool result_type;
template<typename T>
bool operator () (const T &t, size_t &c, size_t &w, size_t &l) const {
switch (t.id ()) {
case X:
++w; c += t.value ().size ();
break;
case Y:
++l; ++c;
break;
case Z:
++c;
break;
}
return true;
}
};
int main (int argc, char **argv)
{
std::ifstream ifs (argv[1], std::ios::in | std::ios::binary);
auto first = spirit::make_default_multi_pass (std::istream_iterator<char> (ifs));
auto last = spirit::make_default_multi_pass (std::istream_iterator<char> ());
size_t w, c, l;
word_count_tokens<lex::lexertl::lexer<>> word_count_functor;
w = c = l = 0;
bool r = lex::tokenize (first, last, word_count_functor, boost::bind (counter (), _1, boost::ref (c), boost::ref (w), boost::ref (l)));
ifs.close ();
if (r) {
std::cout << l << ", " << w << ", " << c << std::endl;
}
return 0;
}
The build returns the following error:
lexer.hpp:390:46: error: non-const lvalue reference to type 'const char *' cannot bind to a value of unrelated type
Now, the error is due to definition of concrete lexer, lex::lexer<>; in fact its first parameter is defaulted to const char *. I obtain the same error also if I use spirit::istream_iterator or spirit::make_default_multi_pass (.....).
But if I specify the correct template parameters of lex::lexer<> I obtain a plethora of errors!
Solutions?
Update
I have putted all source file; it's the word_counter site's example.
Okay, since the question was changed, here's a new answer, addressing some points with the complete code sample.
Firstly, you need to use a custom token type. I.e.
word_count_tokens<lex::lexertl::lexer<lex::lexertl::token<boost::spirit::istream_iterator>>> word_count_functor;
// instead of:
// word_count_tokens<lex::lexertl::lexer<>> word_count_functor;
Obviously, it's customary to typedef lex::lexertl::token<boost::spirit::istream_iterator>
You need to use min_token_id instead of token IDs 1,2,3. Also, make it an enum for ease of maintenance:
enum token_ids {
X = lex::min_token_id + 1,
Y,
Z,
};
You can no longer just use .size() on the default token value() since the iterator range is not RandomAccessRange anymore. Instead, employ boost::distance() which is specialized for iterator_range:
++w; c += boost::distance(t.value()); // t.value ().size ();
Combining these fixes: Live On Coliru
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/support_istream_iterator.hpp>
#include <boost/bind.hpp>
#include <fstream>
namespace spirit = boost::spirit;
namespace lex = spirit::lex;
enum token_ids {
X = lex::min_token_id + 1,
Y,
Z,
};
template<typename L>
class word_count_tokens : public lex::lexer<L>
{
public:
word_count_tokens () {
this->self.add
("[^ \t\n]+", X)
("\n" , Y)
("." , Z);
}
};
struct counter
{
typedef bool result_type;
template<typename T>
bool operator () (const T &t, size_t &c, size_t &w, size_t &l) const {
switch (t.id ()) {
case X:
++w; c += boost::distance(t.value()); // t.value ().size ();
break;
case Y:
++l; ++c;
break;
case Z:
++c;
break;
}
return true;
}
};
int main (int argc, char **argv)
{
std::ifstream ifs (argv[1], std::ios::in | std::ios::binary);
ifs >> std::noskipws;
boost::spirit::istream_iterator first(ifs), last;
word_count_tokens<lex::lexertl::lexer<lex::lexertl::token<boost::spirit::istream_iterator>>> word_count_functor;
size_t w = 0, c = 0, l = 0;
bool r = lex::tokenize (first, last, word_count_functor,
boost::bind (counter (), _1, boost::ref (c), boost::ref (w), boost::ref (l)));
ifs.close ();
if (r) {
std::cout << l << ", " << w << ", " << c << std::endl;
}
}
When run on itself, prints
65, 183, 1665
I think the real problem is not shown. You don't show first or last and I have a feeling you might have temporaries there.
Here's a sample I came up with to verify, perhaps you can see what it is you're doing ---wrong--- differently :)
Live on Coliru (memory mapped an byte-vector, via const char*)
And this alternative (using spirit::istream_iterator)
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/qi.hpp>
#include <fstream>
#ifdef MEMORY_MAPPED
# include <boost/iostreams/device/mapped_file.hpp>
#endif
namespace /*anon*/
{
namespace qi =boost::spirit::qi;
namespace lex=boost::spirit::lex;
template <typename Lexer>
struct mylexer_t : lex::lexer<Lexer>
{
mylexer_t()
{
fileheader = "hello";
this->self = fileheader
| space [ lex::_pass = lex::pass_flags::pass_ignore ];
}
lex::token_def<lex::omit>
fileheader, space;
};
template <typename Iterator> struct my_grammar_t
: public qi::grammar<Iterator>
{
template <typename TokenDef>
my_grammar_t(TokenDef const& tok)
: my_grammar_t::base_type(header)
{
header = tok.fileheader;
BOOST_SPIRIT_DEBUG_NODE(header);
}
private:
qi::rule<Iterator> header;
};
}
namespace /* */ {
std::string safechar(char ch) {
switch (ch) {
case '\t': return "\\t"; break;
case '\0': return "\\0"; break;
case '\r': return "\\r"; break;
case '\n': return "\\n"; break;
}
return std::string(1, ch);
}
template <typename It>
std::string showtoken(const boost::iterator_range<It>& range)
{
std::ostringstream oss;
oss << '[';
std::transform(range.begin(), range.end(), std::ostream_iterator<std::string>(oss), safechar);
oss << ']';
return oss.str();
}
}
bool parsefile(const std::string& spec)
{
#ifdef MEMORY_MAPPED
typedef char const* It;
boost::iostreams::mapped_file mmap(spec.c_str(), boost::iostreams::mapped_file::readonly);
char const *first = mmap.const_data();
char const *last = first + mmap.size();
#else
typedef char const* It;
std::ifstream in(spec.c_str());
in.unsetf(std::ios::skipws);
std::string v(std::istreambuf_iterator<char>(in.rdbuf()), std::istreambuf_iterator<char>());
It first = &v[0];
It last = first+v.size();
#endif
typedef lex::lexertl::token<It /*, boost::mpl::vector<char, unsigned int, std::string> */> token_type;
typedef lex::lexertl::actor_lexer<token_type> lexer_type;
typedef mylexer_t<lexer_type>::iterator_type iterator_type;
try
{
static mylexer_t<lexer_type> mylexer;
static my_grammar_t<iterator_type> parser(mylexer);
auto iter = mylexer.begin(first, last);
auto end = mylexer.end();
bool r = qi::parse(iter, end, parser);
r = r && (iter == end);
if (!r)
std::cerr << spec << ": parsing failed at: \"" << std::string(first, last) << "\"\n";
return r;
}
catch (const qi::expectation_failure<iterator_type>& e)
{
std::cerr << "FIXME: expected " << e.what_ << ", got '";
for (auto it=e.first; it!=e.last; it++)
std::cerr << showtoken(it->value());
std::cerr << "'" << std::endl;
return false;
}
}
int main()
{
if (parsefile("input.bin"))
return 0;
return 1;
}
For the variant:
typedef boost::spirit::istream_iterator It;
std::ifstream in(spec.c_str());
in.unsetf(std::ios::skipws);
It first(in), last;