Parsing characters into an std::map<char,int> using boost::qi

Parsing characters into an std::map<char,int> using boost::qi - c++

I am trying to parse a sequence of characters separated by a "," into an std::map<char,int> of pairs where the key is the character and the value just the a count of parsed characters.
For example, if the input is
a,b,c
The map should contain the pairs:
(a,1) , (b,2) , (c,3)
Here's the code I am using :
namespace myparser
{
std::map<int, std::string> mapping;
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace phoenix = boost::phoenix;
int i = 0;
template <typename Iterator>
bool parse_numbers(Iterator first, Iterator last, std::map<char,int>& v)
{
using qi::double_;
using qi::char_;
using qi::phrase_parse;
using qi::_1;
using ascii::space;
using phoenix::push_back;
bool r = phrase_parse(first, last,
// Begin grammar
(
char_[v.insert(std::make_pair(_1,0)]
>> *(',' >> char_[v.insert(std::make_pair(_1,0)])
)
,
// End grammar
space);
if (first != last) // fail if we did not get a full match
return false;
return r;
}
//]
}
Then I try to print the pair in main like this:
int main() {
std::string str;
while (getline(std::cin, str))
{
if (str.empty() || str[0] == 'q' || str[0] == 'Q')
break;
std::map<char,int> v;
std::map<std::string, int>::iterator it = v.begin();
if (myparser::parse_numbers(str.begin(), str.end(), v))
{
std::cout << "-------------------------\n";
std::cout << "Parsing succeeded\n";
std::cout << str << " Parses OK: " << std::endl;
while (it != v.end())
{
// Accessing KEY from element pointed by it.
std::string word = it->first;
// Accessing VALUE from element pointed by it.
int count = it->second;
std::cout << word << " :: " << count << std::endl;
// Increment the Iterator to point to next entry
it++;
}
std::cout << "\n-------------------------\n";
}
else
{
std::cout << "-------------------------\n";
std::cout << "Parsing failed\n";
std::cout << "-------------------------\n";
}
}
return 0;
}
I am a beginner and I don't know how to fix this code . I also want to use strings instead of characters so I enter a sequence of strings separated by a "," and store them in a map similar to the one mentioned above. I would appreciate any help !

You cannot use Phoenix place holders outside Phoenix deferred actors. E.g. the type of std::make_pair(qi::_1, 0) is std::pair<boost::phoenix::actor<boost::phoenix::argument<0>>, int>.
Nothing interoperates with such a thing. Certainly not std::map<>::insert.
What you'd need to do is wrap all the operations in semantic actions as Phoenix actors.
#include <boost/phoenix.hpp>
namespace px = boost::phoenix;
Then you can:
#include <boost/phoenix.hpp>
#include <boost/spirit/include/qi.hpp>
namespace qi = boost::spirit::qi;
namespace px = boost::phoenix;
namespace myparser {
using Map = std::map<char, int>;
template <typename Iterator>
bool parse_numbers(Iterator first, Iterator last, Map& m) {
auto action = px::insert(px::ref(m), px::end(px::ref(m)),
px::construct<std::pair<char, int>>(qi::_1, 0));
bool r = qi::phrase_parse( //
first, last,
// Begin grammar
qi::char_[action] >> *(',' >> qi::char_[action]),
// End grammar
qi::space);
return r && first == last;
}
} // namespace myparser
See it Live
Easy peasy. Right.
I spent half an hour on that thing debugging why it wouldn't work. Why is this so hard?
It's because someone invented a whole meta-DSL to write "normal C++" but with defferred execution. Back when that happened it was pretty neat, but it is the mother of all leaky abstractions, with razor sharp edges.
So, what's new? Using C++11 you could:
Live
template <typename Iterator>
bool parse_numbers(Iterator first, Iterator last, Map& m) {
struct action_f {
Map& m_;
void operator()(char ch) const { m_.emplace(ch, 0); }
};
px::function<action_f> action{{m}};
bool r = qi::phrase_parse( //
first, last,
// Begin grammar
qi::char_[action(qi::_1)] >> *(',' >> qi::char_[action(qi::_1)]),
// End grammar
qi::space);
return r && first == last;
}
Or using c++17:
Live
template <typename Iterator>
bool parse_numbers(Iterator first, Iterator last, Map& m) {
px::function action{[&m](char ch) { m.emplace(ch, 0); }};
bool r = qi::phrase_parse( //
first, last,
// Begin grammar
qi::char_[action(qi::_1)] >> *(',' >> qi::char_[action(qi::_1)]),
// End grammar
qi::space);
return r && first == last;
}
On a tangent, you probably wanted to count things, so, maybe use
Live
px::function action{[&m](char ch) { m[ch] += 1; }};
By this time, you could switch to Spirit X3 (which requires C++14):
Live
#include <boost/spirit/home/x3.hpp>
#include <map>
namespace x3 = boost::spirit::x3;
namespace myparser {
using Map = std::map<char, int>;
template <typename Iterator>
bool parse_numbers(Iterator first, Iterator last, Map& m) {
auto action = [&m](auto& ctx) { m[_attr(ctx)] += 1; };
return x3::phrase_parse( //
first, last,
// Begin grammar
x3::char_[action] >> *(',' >> x3::char_[action]) >> x3::eoi,
// End grammar
x3::space);
}
} // namespace myparser
Now finally, let's simplify. p >> *(',' >> p) is just a clumsy way of saying p % ',':
Live
template <typename Iterator>
bool parse_numbers(Iterator first, Iterator last, Map& m) {
auto action = [&m](auto& ctx) { m[_attr(ctx)] += 1; };
return x3::phrase_parse( //
first, last, //
x3::char_[action] % ',', //
x3::space);
}
And you wanted words, not characters:
Live
#include <boost/spirit/home/x3.hpp>
#include <map>
namespace x3 = boost::spirit::x3;
namespace myparser {
using Map = std::map<std::string, int>;
template <typename Iterator>
bool parse_numbers(Iterator first, Iterator last, Map& m) {
auto action = [&m](auto& ctx) { m[_attr(ctx)] += 1; };
auto word_ = (*~x3::char_(','))[action];
return phrase_parse(first, last, word_ % ',', x3::space);
}
} // namespace myparser
#include <iomanip>
#include <iostream>
int main() {
for (std::string const str : {"foo,c++ is strange,bar,qux,foo,c++ is strange ,cuz"}) {
std::map<std::string, int> m;
std::cout << "Parsing " << std::quoted(str) << std::endl;
if (myparser::parse_numbers(str.begin(), str.end(), m)) {
std::cout << m.size() << " words:\n";
for (auto& [word,count]: m)
std::cout << " - " << std::quoted(word) << " :: " << count << std::endl;
} else {
std::cerr << "Parsing failed\n";
}
}
}
Prints
Parsing "foo,c++ is strange,bar,qux,foo,c++ is strange ,cuz"
5 words:
- "bar" :: 1
- "c++isstrange" :: 2
- "cuz" :: 1
- "foo" :: 2
- "qux" :: 1
Note the behaviour of the x3::space (like qi::space and qi::ascii::space above).

Related

boost spirit x3 variant and std::pair

I tried to run some simple parser that will parse [ 1, 11, 3, 6-4]. Basically, integer list with range notation.
I want to put everything into AST without semantic action. So I use x3::variant. My code 'seems' very similar to the expression example. However, it can't compile under g++ 6.2. It indeed compile ok with clang++ 6.0 but yield wrong result.
The boost version is 1.63.
It seems that I have some 'move' or initialization issue.
#include <iostream>
#include <list>
#include <vector>
#include <utility>
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/adapted/std_pair.hpp>
#include <boost/fusion/include/io.hpp>
namespace ns
{
namespace ast
{
namespace x3 = boost::spirit::x3;
// forward definition
class uintObj;
struct varVec;
// define type
using uintPair_t = std::pair<unsigned int, unsigned int>;
using uintVec_t = std::vector<uintObj>;
// general token value:
class uintObj : public x3::variant <
unsigned int,
uintPair_t
>
{
public:
using base_type::base_type;
using base_type::operator=;
};
struct varVec
{
uintVec_t valVector;
};
}
}
BOOST_FUSION_ADAPT_STRUCT(
ns::ast::varVec,
valVector
)
namespace ns
{
namespace parser
{
// namespace x3 = boost::spirit::x3;
// using namespace x3;
using namespace boost::spirit::x3;
// definition of the range pair:
rule<class uintPair, ast::uintPair_t> const uintPair = "uintPair";
auto const uintPair_def =
uint_
>> '-'
>> uint_
;
rule<class uintObj, ast::uintObj> const uintObj = "uintObj";
auto const uintObj_def =
uint_
| uintPair
;
// define rule definition : rule<ID, attrib>
// more terse definition :
// struct varVec_class;
// using varVec_rule_t = x3::rule<varVec_class, ast::varVec>;
// varVec_rule_t const varVec = "varVec";
// varVec is the rule, "varVec" is the string name of the rule.
rule<class varVec, ast::varVec> const varVec = "varVec";
auto const varVec_def =
'['
>> uintObj % ','
>> ']'
;
BOOST_SPIRIT_DEFINE(
varVec,
uintObj,
uintPair
);
}
}
int main()
{
std::string input ("[1, 11, 3, 6-4]\n");
std::string::const_iterator begin = input.begin();
std::string::const_iterator end = input.end();
ns::ast::varVec result; // ast tree
using ns::parser::varVec; // grammar
using boost::spirit::x3::ascii::space;
bool success = phrase_parse(begin, end, varVec, space, result);
if (success && begin == end)
std::cout << "good" << std::endl;
else
std::cout << "bad" << std::endl;
return 0;
}

Swap the alternative order for the uintObj_def
auto const uintObj_def =
uintPair
| uint_
;
The formulation you have now will always match on a uint_ because the uintPair begins with a valid uint_.

mjcaisse's answer calls out the main problem I think you had. There were a few missing pieces, so I decided to make a simplified version that shows parsing results:
Live On Wandbox
#include <iostream>
#include <iomanip>
//#include <boost/fusion/adapted.hpp>
//#include <boost/fusion/include/io.hpp>
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
namespace x3 = boost::spirit::x3;
namespace ns { namespace ast {
// forward definition
struct uintObj;
//struct varVec;
// define type
using uintPair_t = std::pair<unsigned int, unsigned int>;
using uintVec_t = std::vector<uintObj>;
// general token value:
struct uintObj : x3::variant<unsigned int, uintPair_t> {
using base_type::base_type;
using base_type::operator=;
friend std::ostream& operator<<(std::ostream& os, uintObj const& This) {
struct {
std::ostream& os;
void operator()(unsigned int v) const { os << v; }
void operator()(uintPair_t v) const { os << v.first << "-" << v.second; }
} vis { os };
boost::apply_visitor(vis, This);
return os;
}
};
using varVec = uintVec_t;
} }
namespace ns { namespace parser {
using namespace boost::spirit::x3;
template <typename T> auto as = [](auto p) { return rule<struct _, T> {} = p; };
auto const uintPair = as<ast::uintPair_t> ( uint_ >> '-' >> uint_ );
auto const uintObj = as<ast::uintObj> ( uintPair | uint_ );
auto const varVec = as<ast::varVec> ( '[' >> uintObj % ',' >> ']' );
} }
int main() {
using namespace ns;
std::string const input("[1, 11, 3, 6-4]\n");
auto begin = input.begin(), end = input.end();
ast::varVec result; // ast tree
bool success = phrase_parse(begin, end, parser::varVec, x3::ascii::space, result);
if (success) {
std::cout << "good\n";
for (auto& r : result)
std::cout << r << "\n";
}
else
std::cout << "bad\n";
if (begin != end)
std::cout << "Remaining unparsed: " << std::quoted(std::string(begin, end)) << std::endl;
}
Prints
good
1
11
3
6-4

Templating Spirit X3 parser

In Boost Spirit QI it was easy to template the parser so that it could be instantiated for various attribute types. It is unclear to me how to do this with X3. Consider this stripped down version of the roman numerals parser example:
#include <iostream>
#include <iterator>
#include <string>
#include <boost/spirit/home/x3.hpp>
namespace parser {
namespace x3 = boost::spirit::x3;
struct numbers_ : x3::symbols<unsigned> {
numbers_() {
add
("I" , 1)
("II" , 2)
("III" , 3)
("IV" , 4)
("V" , 5)
("VI" , 6)
("VII" , 7)
("VIII" , 8)
("IX" , 9)
;
}
} numbers;
x3::rule<class roman, unsigned> const roman = "roman";
auto init = [](auto &x) { x3::_val(x) = 0; };
auto add = [](auto &x) { x3::_val(x) += x3::_attr(x); };
auto const roman_def = x3::eps [init] >> numbers [add];
BOOST_SPIRIT_DEFINE(roman);
}
int main()
{
std::string input = "V";
auto iter = input.begin();
auto end = input.end();
unsigned result;
bool r = parse(iter, end, parser::roman, result);
if (r && iter == end) {
std::cout << "Success :) Result = " << result << '\n';
} else {
std::cout << "Failed :(\n";
}
}
I'd like to template the parser on the attribute type which is currently hardcoded as unsigned. My first guess was to replace
namespace parser {
// ...
}
with
template < typename int_t >
struct parser {
// ...
};
which is obviously too naïve. How to do this correctly?

In X3 there's not so much pain in combining parsers dynamically. So I'd write your sample as:
template <typename Attribute>
auto make_roman() {
using namespace boost::spirit::x3;
struct numbers_ : symbols<Attribute> {
numbers_() { this-> add
("I", Attribute{1}) ("II", Attribute{2}) ("III", Attribute{3}) ("IV", Attribute{4})
("V", Attribute{5}) ("VI", Attribute{6}) ("VII", Attribute{7}) ("VIII", Attribute{8})
("IX", Attribute{9}) ;
}
} numbers;
return rule<class roman, Attribute> {"roman"} =
eps [([](auto &x) { _val(x) = 0; })]
>> numbers [([](auto &x) { _val(x) += _attr(x); })];
}
See it Live On Coliru

Segmentation fault for nested boost::variant

The following program has been reduced from the original. I get a segmentation fault when it runs. If I remove line 24 with ArithmeticUnaryExpression then the program no longer crashes. How do I get rid of the segmentation fault?
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
#include <boost/spirit/include/qi_expect.hpp>
#include <boost/spirit/home/x3/directive/expect.hpp>
#include <iostream>
#include <string>
namespace wctl_parser {
namespace x3 = boost::spirit::x3;
namespace ascii = x3::ascii;
namespace qi = boost::spirit::qi;
using x3::ulong_;
using x3::lexeme;
//--- Ast structures
struct ArithmeticUnaryExpression;
using AtomicProp = std::string;
using ArithmeticExpression = x3::variant<
x3::forward_ast<ArithmeticUnaryExpression>,
unsigned long
>;
struct ArithmeticUnaryExpression {
std::string op;
ArithmeticExpression operand;
};
using Expression = x3::variant<
ArithmeticExpression
>;
template <typename T> auto rule = [](const char* name = typeid(T).name()) {
struct _{};
return x3::rule<_, T> {name};
};
template <typename T> auto as = [](auto p) { return rule<T>() = p; };
//--- Rules
x3::rule<struct aTrivRule, ArithmeticExpression> aTriv("aTriv");
x3::rule<struct exprRule, Expression> expr("expression");
auto const aTriv_def = rule<ArithmeticExpression>("aTriv")
= ulong_
// | '(' > expr > ')'
;
auto const primitive = rule<Expression>("primitive")
= aTriv
;
auto const expr_def
= primitive
;
BOOST_SPIRIT_DEFINE(aTriv)
BOOST_SPIRIT_DEFINE(expr)
auto const entry = x3::skip(ascii::space) [expr];
} //End namespace
int main() {
std::string str("prop");
namespace x3 = boost::spirit::x3;
wctl_parser::Expression root;
auto iter = str.begin();
auto end = str.end();
bool r = false;
r = parse(iter, end, wctl_parser::entry, root);
if (r) {
std::cout << "Parses OK:" << std::endl << str << std::endl;
if (iter != end) std::cout << "Partial match" << std::endl;
std::cout << std::endl << "----------------------------\n";
}
else {
std::cout << "!! Parsing failed:" << std::endl << str << std::endl << std::endl << "----------------------------\n";
}
return 0;
}

Your variant
using ArithmeticExpression = x3::variant<
x3::forward_ast<ArithmeticUnaryExpression>,
unsigned long
>;
will default-construct to the first element type. The first element type contains ArithmeticExpression which is also default constructed. Can you see the problem already?
Just make sure the default constructed state doesn't lead to infinite recursion:
using ArithmeticExpression = x3::variant<
unsigned long,
x3::forward_ast<ArithmeticUnaryExpression>
>;

boost::spirit::qi strange attribute print out

#include <iostream>
#include <string>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix.hpp>
namespace qi = boost::spirit::qi;
namespace test {
template< typename Rng,typename Expr >
bool parse(Rng const& rng,Expr const& expr) noexcept {
auto itBegin = boost::begin(rng);
auto itEnd = boost::end(rng);
try {
return qi::parse(itBegin,itEnd,expr);
} catch(qi::expectation_failure<decltype(itBegin)> const& exfail) {
exfail;
return false;
}
}
template< typename Rng,typename Expr,typename Attr >
bool parse(Rng const& rng,Expr const& expr,Attr& attr) noexcept {
auto itBegin = boost::begin(rng);
auto itEnd = boost::end(rng);
try {
return qi::parse(itBegin,itEnd,expr,attr);
} catch(qi::expectation_failure<decltype(itBegin)> const&) {
return false;
}
}
}
void print1(std::string const& s) {
std::cout<<"n1 = "<<s<<std::endl;
}
void print2(std::string const& s) {
std::cout<<"n2 = "<<s<<std::endl;
}
int main() {
qi::rule<std::string::const_iterator, std::string()> number = +qi::digit;
std::string input = "1+2";
std::string result;
if( test::parse(input, number[print1] >> *( qi::char_("+-") >> number[print2]) >> qi::eoi, result) ) {
std::cout<<"Match! result = "<<result<<std::endl;
} else {
std::cout<<"Not match!"<<std::endl;
}
return 0;
}
I'm expecting the output of this program is,
n1 = 1
n2 = 2
Match! result = 1+2
But the output is actually very strange for n2,
n1 = 1
n2 = 1+2
Match! result = 1+2
Why is the second number's attribute "1+2" instead of just "2"?
I know there are some other ways to parse this expression such as using qi::int_. I'm just wondering why do I get this strange attribute from it. Thanks!

Backtracking doesn't undo changes to container attributes. Adjacent compatible parser expressions bind to the same container attribute.
Both attributes get bound to the same container attribute (result) which means you are printing the same variable twice.
If you didn't want that, be explicit, e.g.
Live On Coliru
std::string result;
std::vector<std::string> v;
if( test::parse(input, number[px::bind(print1, qi::_1)] >> *qi::as_string[qi::char_("+-") >> number[print2]] >> qi::eoi, result, v) ) {
std::cout<<"Match! result = "<<result<<std::endl;
for (auto s : v)
std::cout << s << "\n";
} else {
Prints
n1 = 1
n2 = +2
Match! result = 1
+2
Now I don't know what you want to achieve, but this could be close:
if (test::parse(input, qi::raw [number[print_("n1",_1)] > *(qi::char_("+-") >> number[print_("n2",_1)]) ] >> qi::eoi, result)) {
Live On Coliru
#include <iostream>
#include <string>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix.hpp>
namespace qi = boost::spirit::qi;
namespace px = boost::phoenix;
namespace test {
template< typename Rng,typename Expr,typename... Attr >
bool parse(Rng const& rng,Expr const& expr,Attr&... attr) noexcept {
auto itBegin = boost::begin(rng);
auto itEnd = boost::end(rng);
try {
return qi::parse(itBegin,itEnd,expr,attr...);
} catch(qi::expectation_failure<decltype(itBegin)> const&) {
return false;
}
}
}
void printn(std::string const& label, std::string const& s) {
std::cout << label << " = " << s << std::endl;
}
BOOST_PHOENIX_ADAPT_FUNCTION(void, print_, printn, 2)
int main() {
qi::rule<std::string::const_iterator, std::string()> number = +qi::digit;
std::string input = "1+2";
std::string result;
using qi::_1;
if (test::parse(input, qi::raw [number[print_("n1",_1)] > *(qi::char_("+-") >> number[print_("n2",_1)]) ] >> qi::eoi, result)) {
std::cout<<"Match! result = "<<result<<std::endl;
} else {
std::cout<<"Not match!"<<std::endl;
}
return 0;
}
Prints
n1 = 1
n2 = 2
Match! result = 1+2
BONUS
A little more separation of concern:
Live On Coliru
void printn(int n, std::string const& s) {
std::cout << "n" << n << " = " << s << std::endl;
}
BOOST_PHOENIX_ADAPT_FUNCTION(void, print_, printn, 2)
int main() {
qi::rule<std::string::const_iterator, std::string()> number;
int n = 1;
number %= qi::as_string[+qi::digit] [print_(px::ref(n)++, qi::_1)];
std::string input = "1+2";
std::string result;
if (test::parse(input, qi::raw [number > *(qi::char_("+-") >> number) ] >> qi::eoi, result)) {
std::cout << "Match! result = " << result << std::endl;
} else {
std::cout << "Not match!" << std::endl;
}
return 0;
}

How to stop string concatenation in Spirit Qi 'repeat' parser?

I would like to split a string into parts:
input = "part1/part2/part3/also3"
and fill the structure that consist of three std::string with these parts.
struct strings
{
std::string a; // <- part1
std::string b; // <- part2
std::string c; // <- part3/also3
};
However my parser seems to merge the parts together and store it into the first std::string.
Here is the code on coliru
#include <iostream>
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapted.hpp>
namespace qi = ::boost::spirit::qi;
struct strings
{
std::string a;
std::string b;
std::string c;
};
BOOST_FUSION_ADAPT_STRUCT(strings,
(std::string, a) (std::string, b) (std::string, c))
template <typename It>
struct split_string_grammar: qi::grammar<It, strings ()>
{
split_string_grammar (int parts)
: split_string_grammar::base_type (split_string)
{
assert (parts > 0);
using namespace qi;
split_string = repeat (parts-1) [part > '/'] > last_part;
part = +(~char_ ("/"));
last_part = +char_;
BOOST_SPIRIT_DEBUG_NODES ((split_string) (part) (last_part))
}
private:
qi::rule<It, strings ()> split_string;
qi::rule<It, std::string ()> part, last_part;
};
int main ()
{
std::string const input { "one/two/three/four" };
auto const last = input.end ();
auto first = input.begin ();
// split into 3 parts.
split_string_grammar<decltype (first)> split_string (3);
strings ss;
bool ok = qi::parse (first, last, split_string, ss);
std::cout << "Parsed: " << ok << "\n";
if (ok) {
std::cout << "a:" << ss.a << "\n";
std::cout << "b:" << ss.b << "\n";
std::cout << "c:" << ss.c << "\n";
}
}
The output is:
Parsed: 1
a:onetwo
b:three/four
c:
while I expected:
Parsed: 1
a:one
b:two
c:three/four
I'd like not to modify the grammar heavily and leave "repeat" statement in it, because the "real" grammar is much more complex of course and I will need to have it there. Just need to find the way to disable the concatenations. I tried
repeat (parts-1) [as_string[part] > '/']
but that does not compile.

The trouble here is specifically that qi::repeat is documented to expose a container of element-types.
Now, because the exposed attribute type of the rule (strings) is not a container-type, Spirit "knows" how to flatten the values.
Of course it's not what you wanted in this case, but usually this heuristic makes for really convenient accumulation of string values.
Fix 1: use a container attribute
You could witness the reverse fix by getting rid of the non-container (sequence) target attribute:
Live On Coliru
//#define BOOST_SPIRIT_DEBUG
#include <iostream>
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapted.hpp>
namespace qi = ::boost::spirit::qi;
using strings = std::vector<std::string>;
template <typename It>
struct split_string_grammar: qi::grammar<It, strings ()>
{
split_string_grammar (int parts)
: split_string_grammar::base_type (split_string)
{
assert (parts > 0);
using namespace qi;
split_string = repeat (parts-1) [part > '/']
> last_part
;
part = +(~char_ ("/"))
;
last_part = +char_
;
BOOST_SPIRIT_DEBUG_NODES ((split_string) (part) (last_part))
}
private:
qi::rule<It, strings ()> split_string;
qi::rule<It, std::string ()> part, last_part;
};
int main ()
{
std::string const input { "one/two/three/four" };
auto const last = input.end ();
auto first = input.begin ();
// split into 3 parts.
split_string_grammar<decltype (first)> split_string (3);
strings ss;
bool ok = qi::parse (first, last, split_string, ss);
std::cout << "Parsed: " << ok << "\n";
if (ok) {
for(auto i = 0ul; i<ss.size(); ++i)
std::cout << static_cast<char>('a'+i) << ":" << ss[i] << "\n";
}
}
What you really wanted:
Of course you want to keep the struct/sequence adaptation (?); In this case that's really tricky because as soon as you use any kind of Kleene operator (*,%) or qi::repeat you'll have the attribute transformation rules as outlined above, ruining your mood.
Luckily, I just remembered I have a hacky solution based on the auto_ parser. Note the caveat in this older answer though:
Read empty values with boost::spirit
CAVEAT Specializing for std::string directly like this might not be the best idea (it might not always be appropriate and might interact badly with other parsers).
By default create_parser<std::string> is not defined, so you might decide this usage is good enough for your case:
Live On Coliru
#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/include/qi.hpp>
namespace qi = boost::spirit::qi;
struct strings {
std::string a;
std::string b;
std::string c;
};
namespace boost { namespace spirit { namespace traits {
template <> struct create_parser<std::string> {
typedef proto::result_of::deep_copy<
BOOST_TYPEOF(
qi::lexeme [+(qi::char_ - '/')] | qi::attr("(unspecified)")
)
>::type type;
static type call() {
return proto::deep_copy(
qi::lexeme [+(qi::char_ - '/')] | qi::attr("(unspecified)")
);
}
};
}}}
BOOST_FUSION_ADAPT_STRUCT(strings, (std::string, a)(std::string, b)(std::string, c))
template <typename Iterator>
struct google_parser : qi::grammar<Iterator, strings()> {
google_parser() : google_parser::base_type(entry, "contacts") {
using namespace qi;
entry =
skip('/') [auto_]
;
}
private:
qi::rule<Iterator, strings()> entry;
};
int main() {
using It = std::string::const_iterator;
google_parser<It> p;
std::string const input = "part1/part2/part3/also3";
It f = input.begin(), l = input.end();
strings ss;
bool ok = qi::parse(f, l, p >> *qi::char_, ss, ss.c);
if (ok)
{
std::cout << "a:" << ss.a << "\n";
std::cout << "b:" << ss.b << "\n";
std::cout << "c:" << ss.c << "\n";
}
else
std::cout << "Parse failed\n";
if (f!=l)
std::cout << "Remaining unparsed: '" << std::string(f,l) << "'\n";
}
Prints
a:part1
b:part2
c:part3/also3
Update/Bonus
In reponse to the OP's own answer I wanted to challenge myself to write it more generically indeed.
The main thing is to to write set_field_ in such a way that it doesn't know/assume more than required about the destination sequence type.
With a bit of Boost Fusion magic that became:
struct set_field_
{
template <typename Seq, typename Value>
void operator() (Seq& seq, Value const& src, unsigned idx) const {
fus::fold(seq, 0u, Visit<Value> { idx, src });
}
private:
template <typename Value>
struct Visit {
unsigned target_idx;
Value const& value;
template <typename B>
unsigned operator()(unsigned i, B& dest) const {
if (target_idx == i) {
boost::spirit::traits::assign_to(value, dest);
}
return i + 1;
}
};
};
It has the added flexibility of applying Spirit's attribute compatibility rules¹. So, you can use the same grammar with both the following types:
struct strings {
std::string a, b, c;
};
struct alternative {
std::vector<char> first;
std::string second;
std::string third;
};
To drive the point home, I made the adaptation of the second struct reverse the field order:
BOOST_FUSION_ADAPT_STRUCT(strings, a, b, c)
BOOST_FUSION_ADAPT_STRUCT(alternative, third, second, first) // REVERSE ORDER :)
Without further ado, the demo program:
Live On Coliru
#define BOOST_SPIRIT_USE_PHOENIX_V3
#define BOOST_RESULT_OF_USE_DECLTYPE
#include <boost/fusion/adapted.hpp>
#include <boost/fusion/algorithm/iteration.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
namespace qi = boost::spirit::qi;
namespace fus = boost::fusion;
struct strings {
std::string a, b, c;
};
struct alternative {
std::vector<char> first;
std::string second;
std::string third;
};
BOOST_FUSION_ADAPT_STRUCT(strings, a, b, c)
BOOST_FUSION_ADAPT_STRUCT(alternative, third, second, first) // REVERSE ORDER :)
// output helpers for demo:
namespace {
inline std::ostream& operator<<(std::ostream& os, strings const& data) {
return os
<< "a:\"" << data.a << "\" "
<< "b:\"" << data.b << "\" "
<< "c:\"" << data.c << "\" ";
}
inline std::ostream& operator<<(std::ostream& os, alternative const& data) {
os << "first: vector<char> { \""; os.write(&data.first[0], data.first.size()); os << "\" } ";
os << "second: \"" << data.second << "\" ";
os << "third: \"" << data.third << "\" ";
return os;
}
}
struct set_field_
{
template <typename Seq, typename Value>
void operator() (Seq& seq, Value const& src, unsigned idx) const {
fus::fold(seq, 0u, Visit<Value> { idx, src });
}
private:
template <typename Value>
struct Visit {
unsigned target_idx;
Value const& value;
template <typename B>
unsigned operator()(unsigned i, B& dest) const {
if (target_idx == i) {
boost::spirit::traits::assign_to(value, dest);
}
return i + 1;
}
};
};
boost::phoenix::function<set_field_> const set_field = {};
template <typename It, typename Target>
struct split_string_grammar: qi::grammar<It, Target(), qi::locals<unsigned> >
{
split_string_grammar (int parts)
: split_string_grammar::base_type (split_string)
{
assert (parts > 0);
using namespace qi;
using boost::phoenix::val;
_a_type _current; // custom placeholder
split_string =
eps [ _current = 0u ]
> repeat (parts-1)
[part [ set_field(_val, _1, _current++) ] > '/']
> last_part [ set_field(_val, _1, _current++) ];
part = +(~char_ ("/"));
last_part = +char_;
BOOST_SPIRIT_DEBUG_NODES ((split_string) (part) (last_part))
}
private:
qi::rule<It, Target(), qi::locals<unsigned> > split_string;
qi::rule<It, std::string()> part, last_part;
};
template <size_t N = 3, typename Target>
void run_test(Target target) {
using It = std::string::const_iterator;
std::string const input { "one/two/three/four" };
It first = input.begin(), last = input.end();
split_string_grammar<It, Target> split_string(N);
bool ok = qi::parse (first, last, split_string, target);
if (ok) {
std::cout << target << '\n';
} else {
std::cout << "Parse failed\n";
}
if (first != last)
std::cout << "Remaining input left unparsed: '" << std::string(first, last) << "'\n";
}
int main ()
{
run_test(strings {});
run_test(alternative {});
}
Output:
a:"one" b:"two" c:"three/four"
first: vector<char> { "three/four" } second: "two" third: "one"
¹ as with BOOST_SPIRIT_ACTIONS_ALLOW_ATTR_COMPAT

Besides sehe's suggestions one more possible way is to use semantic actions (coliru):
struct set_field_
{
void operator() (strings& dst, std::string const& src, unsigned& idx) const
{
assert (idx < 3);
switch (idx++) {
case 0: dst.a = src; break;
case 1: dst.b = src; break;
case 2: dst.c = src; break;
}
}
};
boost::phoenix::function<set_field_> const set_field { set_field_ {} };
template <typename It>
struct split_string_grammar: qi::grammar<It, strings (), qi::locals<unsigned> >
{
split_string_grammar (int parts)
: split_string_grammar::base_type (split_string)
{
assert (parts > 0);
using namespace qi;
using boost::phoenix::val;
split_string = eps [ _a = val (0) ]
> repeat (parts-1) [part [ set_field (_val, _1, _a) ] > '/']
> last_part [ set_field (_val, _1, _a) ];
part = +(~char_ ("/"));
last_part = +char_;
BOOST_SPIRIT_DEBUG_NODES ((split_string) (part) (last_part))
}
private:
qi::rule<It, strings (), qi::locals<unsigned> > split_string;
qi::rule<It, std::string ()> part, last_part;
};

We Keep Coding

c++ django amazon-web-services regex python-2.7 google-cloud-platform list unit-testing opengl ember.js

Parsing characters into an std::map<char,int> using boost::qi - c++

Related

boost spirit x3 variant and std::pair

Templating Spirit X3 parser

Segmentation fault for nested boost::variant

boost::spirit::qi strange attribute print out

How to stop string concatenation in Spirit Qi 'repeat' parser?

Categories

Resources