boost spirit istream_iterator consumes too much from stream - c++

Consider the following example extracted from a more complex code:
#include <boost/fusion/adapted.hpp>
#include <boost/fusion/include/std_pair.hpp>
#include <boost/phoenix.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/support_istream_iterator.hpp>
#include <map>
#include <string>
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;
// The class implements a XML tag storing the name and a variable number of attributes:
struct Tag
{
// The typedef defines the type used for a XML name:
typedef std::string name_type;
// The typedef defines the type used for a XML value:
typedef std::string value_type;
// The typedef defines the type of a XML attribute:
typedef std::pair<
name_type,
value_type
> attribute_type;
// The type defines a list of attributes.
// Note: We use a std::map to simplify the attribute search.
typedef std::map<
name_type,
value_type
> list_type;
// Clear all information stored within the instance:
void clear( )
{
m_name.clear( ); m_attribute.clear( );
}
std::size_t m_indent; // The tag shall be / is indented by m_indent number of tabs.
name_type m_name; // Name of the tag.
list_type m_attribute; // List of tag attributes.
};
// Define the mapping between Tools::Serialization::Archive::Type::Xml::Format::Tag and boost::fusion:
BOOST_FUSION_ADAPT_STRUCT( Tag,
( std::size_t , m_indent )
( Tag::name_type, m_name )
( Tag::list_type, m_attribute ) )
// This class implements the decoder skipper grammar:
template < typename _Iterator >
struct skipper
: qi::grammar< _Iterator >
{
skipper( ) : skipper::base_type( m_skipper )
{
// The rule defines the default skipper grammar:
m_skipper = ( qi::space ) // Skip all "spaces".
|
( qi::cntrl ); // Skip all "cntrl".
}
// The following variables define the rules used within this grammar:
qi::rule< _Iterator > m_skipper;
};
// This class implements the grammar used to parse a XML "begin tag".
// The expected format is as follows: <name a="xyz" b="xyz" ... N="xyz">
template < typename _Iterator, typename _Skipper >
struct tag_begin : qi::grammar< _Iterator, Tag( ), _Skipper >
{
tag_begin( ) : tag_begin::base_type( m_tag )
{
// The rule for a XML name shall stop when a ' ' or '>' is detected:
m_string = qi::lexeme[ *( qi::char_( "a-zA-Z0-9_.:" ) ) ];
// The rule for a XML attribute shall parse the following format: 'name="value"':
m_attribute = m_string
>> "=\""
>> m_string
>> '"';
// The rule for an XML attribute list is a sequence of attributes separated by ' ':
m_list = *( m_attribute - '>' );
// Finally the resulting XML tag has the following format: <name a="xyz" b="xyz" ... N="xyz">
m_tag = '<'
>> -qi::int_
>> m_string
>> m_list
>> '>';
// Enable debug support for the used rules. To activate the debug output define macro BOOST_SPIRIT_DEBUG:
BOOST_SPIRIT_DEBUG_NODES( ( m_string )( m_attribute )( m_list ) )
}
// The following variables define the rules used within this grammar:
qi::rule< _Iterator, Tag::name_type( ) , _Skipper > m_string;
qi::rule< _Iterator, Tag::attribute_type( ), _Skipper > m_attribute;
qi::rule< _Iterator, Tag::list_type( ) , _Skipper > m_list;
qi::rule< _Iterator, Tag( ) , _Skipper > m_tag;
};
bool beginTag( std::istream& stream, Tag& tag )
{
// Ensure that no whitespace characters are skipped:
stream.unsetf( std::ios::skipws );
// Create begin and end iterator for given stream:
boost::spirit::istream_iterator begin( stream );
boost::spirit::istream_iterator end;
// Define the grammar skipper type:
typedef skipper<
boost::spirit::istream_iterator
> skipper_type;
// Create an instance of the used skipper:
skipper_type sk;
// Create an instance of the used grammar:
tag_begin<
boost::spirit::istream_iterator,
skipper_type
> gr;
// Try to parse the data stored within the stream according the grammar and store the result in the tag variable:
bool r = boost::spirit::qi::phrase_parse( begin,
end,
gr,
sk,
tag );
char nextSym = 0;
stream >> nextSym;
for( auto i = tag.m_attribute.begin( ); i != tag.m_attribute.end( ); ++i )
{
std::cout << i->first << " : " << i->second << std::endl;
}
std::cout << "Next symbol: " << nextSym << std::endl;
return r;
}
int main( )
{
std::stringstream s;
s << "<object cName=\"bool\" cVersion=\"1\" vName=\"bool\"> <value>0</value></object>";
Tag t;
beginTag( s, t );
return 0;
}
I use the grammar to extract the xml tag content. In principle this works as expected and the results are as follows:
cName : bool
cVersion : 1
vName : bool
Next symbol: v
The problem is that the parser consumes to much data. My expectation is that the parser stops when first tag is closed '>'. But it seems that the parser consumes also the following spaces and the '<' symbol. So the next symbol read from the stream is equal to 'v'. I would like to avoid this because following parser calls expect the '<' symbol. Any ideas ?

There's no reliable way to achieve this.
The problem is you're not re-using the istream_iterators across parse calls. The whole purpose for boost::spirit::istream_iterator is provide a multi_pass capable iterator on top of an InputIterator¹.
Because Spirit allows arbitrary grammars with arbitrary backtracking, you cannot prevent consuming more than the input that was actually successfully parsed.
The obvious solution here is to integrate all the subsequent steps into the same grammar and/or to reuse the iterators (so the iterator's stored backtrack buffer still contains the characters you need).
Demonstration / Proof of Concept
Here is a version that parses open tags in a loop
while (boost::spirit::qi::phrase_parse(begin, end, gr, sk, tag)) {
std::cout << "============\nParsed open tag '" << tag.m_name << "'\n";
for (auto const& p: tag.m_attribute)
std::cout << p.first << ": " << p.second << "\n";
count += 1;
tag.clear();
};
std::cout << "Next symbol: ";
std::copy(begin, end, std::ostream_iterator<char>(std::cout));
And it prints:
============
Parsed open tag 'object'
cName: bool
cVersion: 1
vName: bool
============
Parsed open tag 'value'
Next symbol: 0</value>
</object>
Live On Coliru
//#define BOOST_SPIRIT_DEBUG
#include <boost/fusion/adapted.hpp>
#include <boost/spirit/include/qi.hpp>
#include <map>
namespace qi = boost::spirit::qi;
// The class implements a XML tag storing the name and a variable number of
// attributes:
struct Tag {
typedef std::string name_type;
typedef std::string value_type;
typedef std::pair<name_type, value_type> attribute_type;
typedef std::map<name_type, value_type> list_type;
// Clear all information stored within the instance:
void clear() {
m_name.clear();
m_attribute.clear();
}
std::size_t m_indent; // The tag shall be / is indented by m_indent number of tabs.
name_type m_name; // Name of the tag.
list_type m_attribute; // List of tag attributes.
};
BOOST_FUSION_ADAPT_STRUCT(Tag, m_indent, m_name, m_attribute)
// This class implements the grammar used to parse a "XML" begin tag.
// The expected format is as follows: <name a="xyz" b="xyz" ... N="xyz">
template <typename Iterator, typename Skipper> struct tag_begin : qi::grammar<Iterator, Tag(), Skipper> {
tag_begin() : tag_begin::base_type(m_tag) {
m_string = *qi::char_("a-zA-Z0-9_.:");
m_attribute = m_string >> '=' >> qi::lexeme['"' >> m_string >> '"'];
m_attributes = *m_attribute;
m_tag = '<' >> -qi::int_ >> m_string >> m_attributes >> '>';
BOOST_SPIRIT_DEBUG_NODES((m_string)(m_attribute)(m_attributes))
}
private:
// The following variables define the rules used within this grammar:
qi::rule<Iterator, Tag::attribute_type(), Skipper> m_attribute;
qi::rule<Iterator, Tag::list_type(), Skipper> m_attributes;
qi::rule<Iterator, Tag(), Skipper> m_tag;
// lexemes
qi::rule<Iterator, Tag::name_type()> m_string;
};
bool beginTag(std::istream &stream, Tag &tag) {
// Ensure that no whitespace characters are skipped:
stream.unsetf(std::ios::skipws);
typedef boost::spirit::istream_iterator It;
typedef qi::rule<It> skipper_type;
skipper_type sk = qi::space | qi::cntrl;
tag_begin<boost::spirit::istream_iterator, skipper_type> gr;
It begin(stream), end;
int count = 0;
while (boost::spirit::qi::phrase_parse(begin, end, gr, sk, tag)) {
std::cout << "============\nParsed open tag '" << tag.m_name << "'\n";
for (auto const& p: tag.m_attribute)
std::cout << p.first << ": " << p.second << "\n";
count += 1;
tag.clear();
};
std::cout << "Next symbol: ";
std::copy(begin, end, std::ostream_iterator<char>(std::cout));
return count > 0;
}
int main() {
std::stringstream s;
s << R"(
<object cName="bool" cVersion="1" vName="bool">
<value>0</value>
</object>
)";
Tag t;
beginTag(s, t);
}
¹ (which is strictly forward-only and cannot be repeatedly dereferenced)

Related

Boost::Spirit doubles character when followed by a default value

I use boost::spirit to parse (a part) of a monomial like x, y, xy, x^2, x^3yz. I want to save the variables of the monomial into a map, which also stores the corresponding exponent. Therefore the grammar should also save the implicit exponent of 1 (so x stores as if it was written as x^1).
start = +(potVar);
potVar=(varName>>'^'>>exponent)|(varName>> qi::attr(1));// First try: This doubles the variable name
//potVar = varName >> (('^' >> exponent) | qi::attr(1));// Second try: This works as intended
exponent = qi::int_;
varName = qi::char_("a-z");
When using the default attribute as in the line "First try", Spirit doubles the variable name.
Everything works as intended when using the default attribute as in the line "Second try".
'First try' reads a variable x and stores the pair [xx, 1].
'Second try' reads a variable x and stores the pair [x, 1].
I think I solved the original problem myself. The second try works. However, I don't see how I doubled the variable name. Because I am about to get familiar with boost::spirit, which is a collection of challenges for me, and there are probably more to come, I would like to understand this behavior.
This is the whole code to recreate the problem. The frame of the grammar is copied from a presentation of the KIT https://panthema.net/2018/0912-Boost-Spirit-Tutorial/ , and Stackoverflow was already very helpful, when I needed the header, which enables me to use the std::pair.
#include <iostream>
#include <iomanip>
#include <stdexcept>
#include <cmath>
#include <map>
#include <utility>//for std::pair
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/adapted/std_pair.hpp> //https://stackoverflow.com/questions/53953642/parsing-map-of-variants-with-boost-spirit-x3
namespace qi = boost::spirit::qi;
template <typename Parser, typename Skipper, typename ... Args>
void PhraseParseOrDie(
const std::string& input, const Parser& p, const Skipper& s,
Args&& ... args)
{
std::string::const_iterator begin = input.begin(), end = input.end();
boost::spirit::qi::phrase_parse(
begin, end, p, s, std::forward<Args>(args) ...);
if (begin != end) {
std::cout << "Unparseable: "
<< std::quoted(std::string(begin, end)) << std::endl;
throw std::runtime_error("Parse error");
}
}
class ArithmeticGrammarMonomial : public qi::grammar<
std::string::const_iterator,
std::map<std::string, int>(), qi::space_type>
{
public:
using Iterator = std::string::const_iterator;
ArithmeticGrammarMonomial() : ArithmeticGrammarMonomial::base_type(start)
{
start = +(potVar);
potVar=(varName>>'^'>>exponent)|(varName>> qi::attr(1));
//potVar = varName >> (('^' >> exponent) | qi::attr(1));
exponent = qi::int_;
varName = qi::char_("a-z");
}
qi::rule<Iterator, std::map<std::string, int>(), qi::space_type> start;
qi::rule<Iterator, std::pair<std::string, int>(), qi::space_type> potVar;
qi::rule<Iterator, int()> exponent;
qi::rule<Iterator, std::string()> varName;
};
void test2(std::string input)
{
std::map<std::string, int> out_map;
PhraseParseOrDie(input, ArithmeticGrammarMonomial(), qi::space, out_map);
std::cout << "test2() parse result: "<<std::endl;
for(auto &it: out_map)
std::cout<< it.first<<it.second << std::endl;
}
/******************************************************************************/
int main(int argc, char* argv[])
{
std::cout << "Parse Monomial 1" << std::endl;
test2(argc >= 2 ? argv[1] : "x^3y^1");
test2(argc >= 2 ? argv[1] : "xy");
return 0;
}
Live demo
I think I solved the original problem myself. The second try works.
Indeed. It's how I'd do this (always match the AST with your parser expressions).
However, I don't see how I doubled the variable name.
It's due to backtracking with container attributes. They don't get rolled back. So the first branch parses potVar into a string, and then the parser backtracks into the second branch, which parses potVar into the same string.
boost::spirit::qi duplicate parsing on the output
Understanding Boost.spirit's string parser
Parsing with Boost::Spirit (V2.4) into container
Boost Spirit optional parser and backtracking
boost::spirit alternative parsers return duplicates
It can also crop up with semantic actions:
Boost Semantic Actions causing parsing issues
Boost Spirit optional parser and backtracking
In short:
match your AST structure in your rule expression, or use qi::hold to force the issue (at performance cost)
avoid semantic actions (Boost Spirit: "Semantic actions are evil"?)
For inspiration, here's a simplified take using Spirit X3
Live On Compiler Explorer
#include <boost/fusion/adapted.hpp>
#include <boost/spirit/home/x3.hpp>
#include <fmt/ranges.h>
#include <map>
namespace Parsing {
namespace x3 = boost::spirit::x3;
auto exponent = '^' >> x3::int_ | x3::attr(1);
auto varName = x3::repeat(1)[x3::char_("a-z")];
auto potVar
= x3::rule<struct P, std::pair<std::string, int>>{}
= varName >> exponent;
auto start = x3::skip(x3::space)[+potVar >> x3::eoi];
template <typename T = x3::unused_type>
void StrictParse(std::string_view input, T&& into = {})
{
auto f = input.begin(), l = input.end();
if (!x3::parse(f, l, start, into)) {
fmt::print(stderr, "Error at: '{}'\n", std::string(f, l));
throw std::runtime_error("Parse error");
}
}
} // namespace Parsing
void test2(std::string input) {
std::map<std::string, int> out_map;
Parsing::StrictParse(input, out_map);
fmt::print("{} -> {}\n", input, out_map);
}
int main() {
for (auto s : {"x^3y^1", "xy"})
test2(s);
}
Prints
x^3y^1 -> [("x", 3), ("y", 1)]
xy -> [("x", 1), ("y", 1)]
Bonus Notes
It looks to me like you should be more careful. Even if you assume that all variables are 1 letter and no terms can occur (only factors), then still you need to correctly handle x^5y^2x to be x^6y^2 right?
Here's Qi version that uses semantic actions to correctly accumulate like factors:
Live On Coliru
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/include/qi.hpp>
#include <iomanip>
#include <iostream>
#include <map>
namespace qi = boost::spirit::qi;
using Iterator = std::string::const_iterator;
using Monomial = std::map<char, int>;
struct ArithmeticGrammarMonomial : qi::grammar<Iterator, Monomial()> {
ArithmeticGrammarMonomial() : ArithmeticGrammarMonomial::base_type(start) {
using namespace qi;
exp_ = '^' >> int_ | attr(1);
start = skip(space)[ //
+(char_("a-z") >> exp_)[_val[_1] += _2] //
];
}
private:
qi::rule<Iterator, Monomial()> start;
qi::rule<Iterator, int(), qi::space_type> exp_;
};
void do_test(std::string_view input) {
Monomial output;
static const ArithmeticGrammarMonomial p;
Iterator f(begin(input)), l(end(input));
qi::parse(f, l, qi::eps > p, output);
std::cout << std::quoted(input) << " -> " << std::endl;
for (auto& [var,exp] : output)
std::cout << " - " << var << '^' << exp << std::endl;
}
int main() {
for (auto s : {"x^3y^1", "xy", "x^5y^2x"})
do_test(s);
}
Prints
"x^3y^1" ->
- x^3
- y^1
"xy" ->
- x^1
- y^1
"x^5y^2x" ->
- x^6
- y^2

Got stuck porting legacy boost::spirit code

I am porting some legacy code from VS2010 & boost1.53 to VS2017 & boost1.71.
I have got stuck last two hours while trying compiling it.
The code is:
#include <string>
#include <vector>
#include <fstream>
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
namespace qi = boost::spirit::qi;
using qi::_1; using qi::_2; using qi::_3; using qi::_4;
enum TYPE { SEND, CHECK, COMMENT };
struct Command
{
TYPE type;
std::string id;
std::string arg1;
std::string arg2;
bool checking;
};
class Parser
{
typedef boost::spirit::istream_iterator It;
typedef std::vector<Command> Commands;
struct deferred_fill
{
template <typename R, typename S, typename T, typename U> struct result { typedef void type; };//Not really sure still necessary
typedef void result_type;//Not really sure still necessary
void operator() (boost::iterator_range<It> const& id, boost::iterator_range<It> const& arg1, bool checking, Command& command) const
{
command.type = TYPE::SEND;
command.id.assign(id.begin(), id.end());
command.arg1.assign(arg1.begin(), arg1.end());
command.checking = checking;
}
};
private:
qi::symbols<char, bool> value;
qi::rule<It> ignore;
qi::rule<It, Command()> send;
qi::rule<It, Commands()> start;
boost::phoenix::function<deferred_fill> fill;
public:
std::vector<Command> commands;
Parser()
{
using namespace qi;
using boost::phoenix::push_back;
value.add("TRUE", true)
("FALSE", false);
send = ("SEND_CONFIRM" >> *blank >> '(' >> *blank >> raw[*~char_(',')] >> ','
>> *blank >> raw[*~char_(',')] >> ','
>> *blank >> value >> *blank >> ')' >> *blank >> ';')[fill(_1, _2, _3, _val)];
ignore = *~char_("\r\n");
start = (send[push_back(_val, _1)] | ignore) % eol;
}
void parse(const std::string& path)
{
std::ifstream in(path, std::ios_base::in);
if (!in) return;
in >> std::noskipws;//No white space skipping
boost::spirit::istream_iterator first(in);
boost::spirit::istream_iterator last;
qi::parse(first, last, start, commands);
}
};
int main(int argc, char* argv[])
{
Parser parser;
parser.parse("file.txt");
return 0;
}
The compiler complains in the next way (only copy first lines):
1>z:\externos\boost_1_71_0\boost\phoenix\core\detail\function_eval.hpp(116): error C2039: 'type': no es un miembro de 'boost::result_of<const Parser::deferred_fill (std::vector<Value,std::allocator<char>> &,std::vector<Value,std::allocator<char>> &,boost::iterator_range<Parser::It> &,Command &)>'
1> with
1> [
1> Value=char
1> ]
1>z:\externos\boost_1_71_0\boost\phoenix\core\detail\function_eval.hpp(114): note: vea la declaración de 'boost::result_of<const Parser::deferred_fill (std::vector<Value,std::allocator<char>> &,std::vector<Value,std::allocator<char>> &,boost::iterator_range<Parser::It> &,Command &)>'
1> with
1> [
1> Value=char
1> ]
1>z:\externos\boost_1_71_0\boost\phoenix\core\detail\function_eval.hpp(89): note: vea la referencia a la creación de instancias de plantilla clase de 'boost::phoenix::detail::function_eval::result_impl<F,void (Head,const boost::phoenix::actor<boost::spirit::argument<1>>&,const boost::phoenix::actor<boost::spirit::argument<2>>&,const boost::phoenix::actor<boost::spirit::attribute<0>>&),const boost::phoenix::vector2<Env,Actions> &>' que se está compilando
1> with
1> [
1> F=const boost::proto::exprns_::basic_expr<boost::proto::tagns_::tag::terminal,boost::proto::argsns_::term<Parser::deferred_fill>,0> &,
1> Head=const boost::phoenix::actor<boost::spirit::argument<0>> &,
1> Env=boost::phoenix::vector4<const boost::phoenix::actor<boost::proto::exprns_::basic_expr<boost::phoenix::detail::tag::function_eval,boost::proto::argsns_::list5<boost::proto::exprns_::basic_expr<boost::proto::tagns_::tag::terminal,boost::proto::argsns_::term<Parser::deferred_fill>,0>,boost::phoenix::actor<boost::spirit::argument<0>>,boost::phoenix::actor<boost::spirit::argument<1>>,boost::phoenix::actor<boost::spirit::argument<2>>,boost::phoenix::actor<boost::spirit::attribute<0>>>,5>> *,boost::fusion::vector<std::vector<char,std::allocator<char>>,std::vector<char,std::allocator<char>>,boost::iterator_range<Parser::It>,std::vector<char,std::allocator<char>>,boost::iterator_range<Parser::It>,std::vector<char,std::allocator<char>>,bool,std::vector<char,std::allocator<char>>,std::vector<char,std::allocator<char>>> &,boost::spirit::context<boost::fusion::cons<Command &,boost::fusion::nil_>,boost::fusion::vector<>> &,bool &> &,
1> Actions=const boost::phoenix::default_actions &
1> ]
I guess that error is related with the use of boost::spirit::istream_iterator, instead of char*, but I cann't figure out how to fix it to work again.
I have run out of ideas, please, anyone can see where my mistake is?
Aw. You're doing awesome things. Sadly/fortunately it's overcomplicated.
So let's first fix, and then simplify.
The Error
It's like you said,
void operator() (boost::iterator_range<It> const& id, boost::iterator_range<It> const& arg1, bool checking, Command& command) const
Doesn't match what actually gets invoked:
void Parser::deferred_fill::operator()(T&& ...) const [with T =
{std::vector<char>&, std::vector<char>&,
boost::iterator_range<boost::spirit::basic_istream_iterator<...> >&,
Command&}]
The reason is NOT the iterator (as you can see it's boost::spirit__istream_iterator alright).
However it's because you're getting other things as attributes. Turns out *blank exposes the attribute as a vector<char>. So you can "fix" that by omit[]-ing those. Let's instead wrap it in an attribute-less rule like ignore so we reduce the clutter.
Now the invocation is with
void Parser::deferred_fill::operator()(T&& ...) const [with T = {boost::iterator_range<It>&, boost::iterator_range<It>&, bool&, Command&}]
So it is compatible and compiles. Parsing:
SEND_CONFIRM("this is the id part", "this is arg1", TRUE);
With
Parser parser;
parser.parse("file.txt");
std::cout << std::boolalpha;
for (auto& cmd : parser.commands) {
std::cout << '{' << cmd.id << ", "
<< cmd.arg1 << ", "
<< cmd.arg2 << ", "
<< cmd.checking << "}\n";
}
Prints
{"this is the id part", "this is arg1", , TRUE}
Let's improve this
This calls for a skipper
This calls for automatic attribute propagation
Some other elements of style
Skippers
Instead of "calling" a skipper explicitly, let's use the built in capability:
rule<It, Attr(), Skipper> x;
defines a rule that skips over inputs sequences matched by a parser of the Skipper type. You need to actually pass in the skipper of that type.
using qi::phrase_parse instead of qi::parse
by using the qi::skip() directive
I always advocate the second approach, because it makes for a friendlier, less error-prone interface.
So declaring the skipper type:
qi::rule<It, Command(), qi::blank_type> send;
We can reduce the rule to:
send = (lit("SEND_CONFIRM") >> '('
>> raw[*~char_(',')] >> ','
>> raw[*~char_(',')] >> ','
>> value >> ')' >> ';')
[fill(_1, _2, _3, _val)];
And than pass a skipper from the start rule:
start = skip(blank) [
(send[push_back(_val, _1)] | ignore) % eol
];
That's all. Still compiles and matches the same.
Live On Coliru
Skipping with Lexemes
Still the same topic, lexemes actually inhibit the skipper¹, so you don't have to raw[]. This changes the exposed attributes to vector<char> as well:
void operator() (std::vector<char> const& id, std::vector<char> const& arg1, bool checking, Command& command) const
Live On Coliru
Automatic Attribute Propagation
Qi has semantic actions, but its real strength is in them being optional: Boost Spirit: "Semantic actions are evil"?
push_back(_val, _1) is actually the automatic attribute propagation semantics anwyays for *p, +p and p % delim² anyways, so just drop it:
start = skip(blank) [
(send | ignore) % eol
];
(note that send|ignore actually synthesizes optional<Command> which is fine for automatic propagation)
std::vector is attribute-compatible with std::string, e.g.. So if we can add a placeholder for arg2 we can match the Command structure layout:
send = lit("SEND_CONFIRM") >> '('
>> attr(SEND) // fill type
>> lexeme[*~char_(',')] >> ','
>> lexeme[*~char_(',')] >> ','
>> attr(std::string()) // fill for arg2
>> value >> ')' >> ';'
;
Now to be able to drop fill and its implementation, we have to adapt Command as a fusion sequence:
BOOST_FUSION_ADAPT_STRUCT(Command, type, id, arg1, arg2, checking)
Elements of Style 1
Using a namespace for your Command types makes it easier to ADL use the operator<< overloads for Commands, se we can just std::cout << cmd;
At this point, it all works in a fraction of the code: Live On Coliru
Elements of Style 2
If you can, make your parser stateless. That means it can be const, so you can:
reuse it without costly construction
the optimizer has more to work with
it's more testable (stateful things are harder to prove idempotent)
So, instead of having commands a member, just return them. While we're at it, we can make parse a static function
Instead of hardcoding the iterator type, it's flexible to have it as a template argument. That way you're not stuck with the overhead of multi_pass_adaptor and istream_iterator if you have a command in a char[] buffer, string or string_view at some point.
Also, deriving your Parser from qi::grammar with a suitable entry-point means you can use it as a parser expression (actually a non-terminal, just like rule<>) as any other parser.
Consider enabling rule debugging (see example)
Full Code
Live On Coliru
#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <fstream>
namespace qi = boost::spirit::qi;
namespace Commands {
enum TYPE { SEND, CHECK, COMMENT };
enum BOOL { FALSE, TRUE };
struct Command {
TYPE type;
std::string id;
std::string arg1;
std::string arg2;
BOOL checking;
};
typedef std::vector<Command> Commands;
// for (debug) output
static inline std::ostream& operator<<(std::ostream& os, TYPE t) {
switch (t) {
case SEND: return os << "SEND";
case CHECK: return os << "CHECK";
case COMMENT: return os << "COMMENT";
}
return os << "(unknown)";
}
static inline std::ostream& operator<<(std::ostream& os, BOOL b) {
return os << (b?"TRUE":"FALSE");
}
using boost::fusion::operator<<;
}
BOOST_FUSION_ADAPT_STRUCT(Commands::Command, type, id, arg1, arg2, checking)
namespace Commands {
template <typename It>
class Parser : public qi::grammar<It, Commands()> {
public:
Commands commands;
Parser() : Parser::base_type(start) {
using namespace qi;
value.add("TRUE", TRUE)
("FALSE", FALSE);
send = lit("SEND_CONFIRM") >> '('
>> attr(SEND) // fill type
>> lexeme[*~char_(',')] >> ','
>> lexeme[*~char_(',')] >> ','
>> attr(std::string()) // fill for arg2
>> value >> ')' >> ';'
;
ignore = +~char_("\r\n");
start = skip(blank) [
(send | ignore) % eol
];
BOOST_SPIRIT_DEBUG_NODES((start)(send)(ignore))
}
private:
qi::symbols<char, BOOL> value;
qi::rule<It> ignore;
qi::rule<It, Command(), qi::blank_type> send;
qi::rule<It, Commands()> start;
};
static Commands parse(std::istream& in) {
using It = boost::spirit::istream_iterator;
static const Parser<It> parser;
It first(in >> std::noskipws), //No white space skipping
last;
Commands commands;
if (!qi::parse(first, last, parser, commands)) {
throw std::runtime_error("command parse error");
}
return commands; // c++11 move semantics
}
}
int main() {
try {
for (auto& cmd : Commands::parse(std::cin))
std::cout << cmd << "\n";
} catch(std::exception const& e) {
std::cout << e.what() << "\n";
}
}
Prints
(SEND "this is the id part" "this is arg1" TRUE)
Or indeed with BOOST_SPIRIT_DEBUG defined:
<start>
<try>SEND_CONFIRM("this i</try>
<send>
<try>SEND_CONFIRM("this i</try>
<success>\n</success>
<attributes>[[SEND, [", t, h, i, s, , i, s, , t, h, e, , i, d, , p, a, r, t, "], [", t, h, i, s, , i, s, , a, r, g, 1, "], [], TRUE]]</attributes>
</send>
<send>
<try></try>
<fail/>
</send>
<ignore>
<try></try>
<fail/>
</ignore>
<success>\n</success>
<attributes>[[[SEND, [", t, h, i, s, , i, s, , t, h, e, , i, d, , p, a, r, t, "], [", t, h, i, s, , i, s, , a, r, g, 1, "], [], TRUE]]]</attributes>
</start>
¹ while pre-skipping as you require; see Boost spirit skipper issues
² (and then some, but let's not digress)

How to keep space character on breaking down input into a sequence of different parts using Alternative Parser?

I want to write a simple C++ parser which extracts the block hierarchy. I am using this rule:
std::string rest_content;
std::vector<boost::tuple<std::string, std::string>> scopes;
qi::rule<It, qi::ascii::space_type> block = *(
r.comment
| r.scope[push_back(boost::phoenix::ref(scopes), _1)]
| qi::char_[boost::phoenix::ref(rest_content) += _1] // rest
);
qi::phrase_parse(first, last,
block,
ascii::space);
which is supposed to break down code into three parts: comment, scope (code surrounded by "{}") and "rest".
The problem is that all the space characters are removed from the "rest". I need those spaces for later parsing (such as extracting identifiers).
I have tried using qi::skip, qi::lexeme and qi::raw to keep spaces:
// one of many failed attempts
qi::rule<It, qi::ascii::space_type> block = qi::lexeme[*(
qi::skip[r.comment]
| qi::skip[r.scope[push_back(boost::phoenix::ref(scopes), _1)]]
| qi::char_[push_back(boost::phoenix::ref(rest_content), _1)]
)];
but it never works.
So how to keep space characters? Any help is welcome. Thanks.
If you're parsing C++ code this way you may be biting off more than you can chew.
I'll answer, but the answer should show you how limited this approach is going to be. Just imagine parsing through
namespace q::x {
namespace y {
struct A {
template <typename = ns1::c<int>, typename...> struct C;
};
template <typename T, typename... Ts>
struct A::C final : ns2::ns3::base<A::C<T, Ts...>, Ts...> {
int simple = [](...) {
enum class X : unsigned { answer = 42, };
struct {
auto operator()(...) -> decltype(auto) {
return static_cast<int>(X::answer);
}
} iife;
return iife();
}("/* }}} */"); // {{{
};
}
}
and getting it right. And yes. that's valid code.
In fact it's so tricky, that it's easy to make "grown compilers" (GCC) trip: https://wandbox.org/permlink/FzcaSl6tbn18jq4f (Clang has no issue: https://wandbox.org/permlink/wu0mFwQiTOogKB5L).
That said, let me refer to my old explanation of how rule declarations and skippers work together: Boost spirit skipper issues
And show an approximation of what I'd do.
The comments
Actually, the comments should be part of your skipper, so let's make it so:
using SkipRule = qi::rule<It>;
SkipRule comment_only
= "//" >> *~qi::char_("\r\n") >> qi::eol
| "/*" >> *(qi::char_ - "*/") >> "*/"
;
Now for general skipping, we want to include whitespace:
SkipRule comment_or_ws
= qi::space | comment_only;
Now we want to parse types and identifiers:
qi::rule<It, std::string()> type
= ( qi::string("struct")
| qi::string("class")
| qi::string("union")
| qi::string("enum") >> -(*comment_or_ws >> qi::string("class"))
| qi::string("namespace")
)
>> !qi::graph // must be followed by whitespace
;
qi::rule<It, std::string()> identifier =
qi::char_("a-zA-Z_") >> *qi::char_("a-zA-Z_0-9")
;
I've /guessed/ that struct X { }; would be an example of a "scope" for you, and the tuple would contain ("struct", "X").
As a bonus I used attribute adaption of std::pair and show how to insert into a multimap for good measure later on
qi::rule<It, std::pair<std::string, std::string>()> scope
= qi::skip(comment_or_ws.alias()) [
type >> identifier
>> *~qi::char_(";{") // ignore some stuff like base classes
>> qi::omit["{" >> *~qi::char_("}") >> "}" | ';']
];
Note a big short-coming here is that the first non-commented '}' will "end" the scope. That's not how the language works (see the leading example)
Now we can conclude with an improved "block" rule:
qi::rule<It, SkipRule> block
= *(
scope [px::insert(px::ref(scopes), _1)]
| qi::skip(comment_only.alias()) [
qi::as_string[qi::raw[+(qi::char_ - scope)]] [px::ref(rest_content) += _1]
] // rest
);
Note that
- we override the comment_or_ws skipper with comment_only so we don't drop all whitespace from "rest content"
- inversely, we override the skipper to include whitespace inside the scope rule because otherwise the negative scope invocation (char_ - scope) would do the wrong thing because it wouldn't skip whitespace
Full Demo
Live On Coliru
//#define BOOST_SPIRIT_DEBUG
#include <boost/fusion/adapted/std_pair.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
namespace qi = boost::spirit::qi;
namespace px = boost::phoenix;
int main() {
using It = std::string::const_iterator;
using namespace qi::labels;
std::string rest_content;
std::multimap<std::string, std::string> scopes;
using SkipRule = qi::rule<It>;
SkipRule comment_only
= "//" >> *~qi::char_("\r\n") >> qi::eol
| "/*" >> *(qi::char_ - "*/") >> "*/"
;
SkipRule comment_or_ws
= qi::space | comment_only;
qi::rule<It, std::string()> type
= ( qi::string("struct")
| qi::string("class")
| qi::string("union")
| qi::string("enum") >> -(*comment_or_ws >> qi::string("class"))
| qi::string("namespace")
)
>> !qi::graph // must be followed by whitespace
;
qi::rule<It, std::string()> identifier =
qi::char_("a-zA-Z_") >> *qi::char_("a-zA-Z_0-9")
;
qi::rule<It, std::pair<std::string, std::string>()> scope
= qi::skip(comment_or_ws.alias()) [
type >> identifier
>> *~qi::char_(";{") // ignore some stuff like base classes
>> qi::omit["{" >> *~qi::char_("}") >> "}" | ';']
];
qi::rule<It, SkipRule> block
= *(
scope [px::insert(px::ref(scopes), _1)]
| qi::skip(comment_only.alias()) [
qi::as_string[qi::raw[+(qi::char_ - scope)]] [px::ref(rest_content) += _1]
] // rest
);
//BOOST_SPIRIT_DEBUG_NODES((block)(scope)(identifier)(type))
std::string const code = R"(
// some random sample "code"
struct base {
std::vector<int> ints;
};
/* class skipped_comment : base { };
*/
namespace q { namespace nested { } } // nested is not supported
class forward_declared;
template <typename T> // actually basically ignored
class
Derived
: base {
std::string more_data_members;
};
enum class MyEnum : int32_t {
foo = 0,
bar, /* whoop } */
qux = foo + bar
};
int main() {
return 0;
}
)";
qi::phrase_parse(begin(code), end(code), block, comment_or_ws);
for (auto& [k,v] : scopes) {
std::cout << k << ": " << v << "\n";
}
std::cout << "------------------ BEGIN REST_CONTENT -----------------\n";
std::cout << rest_content << "\n";
std::cout << "------------------ END REST_CONENT --------------------\n";
}
Which parses the following sample input:
// some random sample "code"
struct base {
std::vector<int> ints;
};
/* class skipped_comment : base { };
*/
namespace q { namespace nested { } } // nested is not supported
class forward_declared;
template <typename T> // actually basically ignored
class
Derived
: base {
std::string more_data_members;
};
enum class MyEnum : int32_t {
foo = 0,
bar, /* whoop } */
qux = foo + bar
};
int main() {
return 0;
}
Printing
class: forward_declared
class: Derived
enumclass: MyEnum
namespace: q
struct: base
------------------ BEGIN REST_CONTENT -----------------
;}template <typename T>;;
int main() {
return 0;
}
------------------ END REST_CONENT --------------------
Conclusion
This result seems a decent pointer to
explain how to tackle the specific hurdle
demonstrate how this approach to parsing is breaking down at the slightest obstacle (namespace a { namespace b { } } for example)
Caveat Emptor

boost spirit - improving error output

This question leads on from its predecessor here: decoding an http header value
The Question:
In my test assertion failure, I am printing out the following contents of error_message:
Error! Expecting <alternative><media_type_no_parameters><media_type_with_parameters> in header value: "text/html garbage ; charset = \"ISO-8859-5\"" at position: 0
Which is unhelpful...
What is the correct way to get a nice syntax error that says:
Error! token_pair has invalid syntax here:
text/html garbage ; charset = "ISO-8859-5"
^ must be eoi or separator of type ;
Background:
HTTP Content-Type in a request has the following form:
type/subtype *( ; param[=param_value]) <eoi>
Where type and subtype many not be quoted or be separated by spaces, param is not quoted, and param_value is both optional and optionally quoted.
Other than between type/subtype spaces or horizontal tabs may be used as white space. There may also be space before type/subtype.
For now I am ignoring the possibility of HTTP line breaks or comments as I understand that they are deprecated.
Summary:
There shall be one type, one subtype and zero or more parameters. type and subtype are HTTP tokens, which is to say that they may not contain delimiters ("/\[]<>,; and so on) or spaces.
Thus, the following header is legal:
text/html ; charset = "ISO-8859-5"
And the following header is illegal:
text/html garbage ; charset = "ISO-8859-5"
^^^^^^^ illegal - must be either ; or <eoi>
The code I am using to parse this (seemingly simple, but actually quite devious) protocol component is below.
Code
My code, adapted from sehe's fantasic answer here
(warning, prerequisites are google test and boost)
//#define BOOST_SPIRIT_DEBUG
#include <boost/config/warning_disable.hpp>
#include <gtest/gtest.h>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/include/adapted.hpp>
#include <utility>
#include <vector>
#include <string>
#include <iostream>
using token_pair = std::pair<std::string, std::string>;
struct parameter {
std::string name;
std::string value;
bool has_value;
};
struct media_type {
token_pair type_subtype;
std::vector<parameter> params;
};
BOOST_FUSION_ADAPT_STRUCT(parameter, name, value, has_value)
BOOST_FUSION_ADAPT_STRUCT(media_type, type_subtype, params)
namespace qi = boost::spirit::qi;
namespace phoenix = boost::phoenix;
using namespace std::literals;
template<class Iterator>
struct components
{
components()
{
using qi::ascii::char_;
spaces = char_(" \t");
token = +~char_( "()<>#,;:\\\"/[]?={} \t");
token_pair_rule = token >> '/' >> token;
quoted_string = '"' >> *('\\' >> char_ | ~char_('"')) >> '"';
value = quoted_string | token;
name_only = token >> qi::attr("") >> qi::attr(false);
nvp = token >> '=' >> value >> qi::attr(true);
any_parameter = ';' >> (nvp | name_only);
some_parameters = +any_parameter;
parameters = *any_parameter;
qi::on_error<qi::fail>(
token,
this->report_error(qi::_1, qi::_2, qi::_3, qi::_4)
);
BOOST_SPIRIT_DEBUG_NODES((token)
(quoted_string)
(value)
(name_only)
(nvp)
(any_parameter)
(parameters)
)
}
protected:
using Skipper = qi::space_type;
Skipper spaces;
qi::rule<Iterator, std::string()> quoted_string, token, value;
qi::rule<Iterator, parameter(), Skipper> nvp, name_only, any_parameter;
qi::rule<Iterator, std::vector<parameter>(), Skipper> parameters, some_parameters;
qi::rule<Iterator, token_pair()> token_pair_rule;
public:
std::string error_message;
protected:
struct ReportError {
// the result type must be explicit for Phoenix
template<typename, typename, typename, typename>
struct result { typedef void type; };
ReportError(std::string& error_message)
: error_message(error_message) {}
// contract the string to the surrounding new-line characters
template<typename Iter>
void operator()(Iter first, Iter last,
Iter error, const qi::info& what) const
{
using namespace std::string_literals;
std::ostringstream ss;
ss << "Error! Expecting "
<< what
<< " in header value: " << std::quoted(std::string(first, last))
<< " at position: " << error - first;
error_message = ss.str();
}
std::string& error_message;
};
const phoenix::function<ReportError> report_error = ReportError(error_message);
};
template<class Iterator>
struct token_grammar
: components<Iterator>
, qi::grammar<Iterator, media_type()>
{
token_grammar() : token_grammar::base_type(media_type_rule)
{
media_type_with_parameters = token_pair_rule >> qi::skip(spaces)[some_parameters];
media_type_no_parameters = token_pair_rule >> qi::attr(std::vector<parameter>()) >> qi::skip(spaces)[qi::eoi];
media_type_rule = qi::eps > (qi::hold[media_type_no_parameters]
| qi::hold[media_type_with_parameters]);
BOOST_SPIRIT_DEBUG_NODES((media_type_with_parameters)
(media_type_no_parameters)
(media_type_rule))
qi::on_error<qi::fail>(
media_type_rule,
this->report_error(qi::_1, qi::_2, qi::_3, qi::_4)
);
}
private:
using Skipper = typename token_grammar::components::Skipper;
using token_grammar::components::spaces;
using token_grammar::components::token;
using token_grammar::components::token_pair_rule;
using token_grammar::components::value;
using token_grammar::components::any_parameter;
using token_grammar::components::parameters;
using token_grammar::components::some_parameters;
public:
qi::rule<Iterator, media_type()> media_type_no_parameters, media_type_with_parameters, media_type_rule;
};
TEST(spirit_test, test1)
{
token_grammar<std::string::const_iterator> grammar{};
auto test = R"__test(application/json )__test"s;
auto ct = media_type {};
bool r = parse(test.cbegin(), test.cend(), grammar, ct);
EXPECT_EQ("application", ct.type_subtype.first);
EXPECT_EQ("json", ct.type_subtype.second);
EXPECT_EQ(0, ct.params.size());
ct = {};
test = R"__test(text/html ; charset = "ISO-8859-5")__test"s;
parse(test.cbegin(), test.cend(), grammar, ct);
EXPECT_EQ("text", ct.type_subtype.first);
EXPECT_EQ("html", ct.type_subtype.second);
ASSERT_EQ(1, ct.params.size());
EXPECT_TRUE(ct.params[0].has_value);
EXPECT_EQ("charset", ct.params[0].name);
EXPECT_EQ("ISO-8859-5", ct.params[0].value);
auto mt = media_type {};
parse(test.cbegin(), test.cend(), grammar.media_type_rule, mt);
EXPECT_EQ("text", mt.type_subtype.first);
EXPECT_EQ("html", mt.type_subtype.second);
EXPECT_EQ(1, mt.params.size());
//
// Introduce a failure case
//
mt = media_type {};
test = R"__test(text/html garbage ; charset = "ISO-8859-5")__test"s;
r = parse(test.cbegin(), test.cend(), grammar.media_type_rule, mt);
EXPECT_FALSE(r);
EXPECT_EQ("", grammar.error_message);
}

boost spirit parsing CSV with columns in variable order

I'm trying to parse a CSV file (with header line) using boost spirit.
The csv is not in a constant format. Sometimes there is some extra column or the order of the column is mixed. I'm interested in few columns, whose header name is well known.
For instance my CSV may look like:
Name,Surname,Age
John,Doe,32
Or:
Age,Name
32,John
I want to parse only the content of Name and Age (N.B. Age is integer type). At the moment i come out with a very ugly solution where Spirit parses the first line and creates a vector that contains an enum in the positions i'm interested into. And then i have to do the parsing of the terminal symbols by hand...
enum LineItems {
NAME, AGE, UNUSED
};
struct CsvLine {
string name;
int age;
};
using Column = std::string;
using CsvFile = std::vector<CsvLine>;
template<typename It>
struct CsvGrammar: qi::grammar<It, CsvFile(), qi::locals<std::vector<LineItems>>, qi::blank_type> {
CsvGrammar() :
CsvGrammar::base_type(start) {
using namespace qi;
static const char colsep = ',';
start = qi::omit[header[qi::_a = qi::_1]] >> eol >> line(_a) % eol;
header = (lit("Name")[phx::push_back(phx::ref(qi::_val), LineItems::NAME)]
| lit("Age")[phx::push_back(phx::ref(qi::_val), LineItems::AGE)]
| column[phx::push_back(phx::ref(qi::_val), LineItems::UNUSED)]) % colsep;
line = (column % colsep)[phx::bind(&CsvGrammar<It>::convertFunc, this, qi::_1, qi::_r1,
qi::_val)];
column = quoted | *~char_(",\n");
quoted = '"' >> *("\"\"" | ~char_("\"\n")) >> '"';
}
void convertFunc(std::vector<string>& columns, std::vector<LineItems>& positions, CsvLine &csvLine) {
//terminal symbol parsing here, and assign to csvLine struct.
...
}
private:
qi::rule<It, CsvFile(), qi::locals<std::vector<LineItems>>, qi::blank_type> start;
qi::rule<It, std::vector<LineItems>(), qi::blank_type> header;
qi::rule<It, CsvLine(std::vector<LineItems>), qi::blank_type> line;
qi::rule<It, Column(), qi::blank_type> column;
qi::rule<It, std::string()> quoted;
qi::rule<It, qi::blank_type> empty;
};
Here is the full source.
What if the header parser could prepare a vector<rule<...>*> and the "line parser" just use this vector to parse itself? a sort of advanced nabialek trick (i've been trying but i couldn't make it).
Or is there any better way to parse this kind of CSV with Spirit?
(any help is appreciated, thank you in advance)
I'd go with the concept that you have,
I think it's plenty elegant (the qi locals even allow reentrant use of this).
To reduce the cruft in the rules (Boost Spirit: "Semantic actions are evil"?) you could move the "conversion function" off into attribute transformation customization points.
Oops. As commented that was too simple. However, you can still reduce the cruftiness quite a bit. With two simple tweaks, the grammar reads:
item.add("Name", NAME)("Age", AGE);
start = omit[ header[_a=_1] ] >> eol >> line(_a) % eol;
header = (item | omit[column] >> attr(UNUSED)) % colsep;
line = (column % colsep) [convert];
column = quoted | *~char_(",\n");
quoted = '"' >> *("\"\"" | ~char_("\"\n")) >> '"';
The tweaks:
using qi::symbols to map from header to LineItem
using a raw semantinc action ([convert]) which directly access the context (see boost spirit semantic action parameters):
struct final {
using Ctx = typename decltype(line)::context_type;
void operator()(Columns const& columns, Ctx &ctx, bool &pass) const {
auto& csvLine = boost::fusion::at_c<0>(ctx.attributes);
auto& positions = boost::fusion::at_c<1>(ctx.attributes);
int i =0;
for (LineItems position : positions) {
switch (position) {
case NAME: csvLine.name = columns[i]; break;
case AGE: csvLine.age = atoi(columns[i].c_str()); break;
default: break;
}
i++;
}
pass = true; // returning false fails the `line` rule
}
} convert;
Arguably the upshot is akin to doing auto convert = phx::bind(&CsvGrammar<It>::convertFunc, this, qi::_1, qi::_r1, qi::_val) but using auto with Proto/Phoenix/Spirit expressions is notoriously error prone (UB due to dangling refs to temporaries from the expression template), so I'd certainly prefer the way shown above.
Live On Coliru
//#define BOOST_SPIRIT_DEBUG
#define BOOST_SPIRIT_USE_PHOENIX_V3
#include <iostream>
#include <boost/fusion/include/at_c.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <string>
#include <vector>
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;
using std::string;
enum LineItems { NAME, AGE, UNUSED };
struct CsvLine {
string name;
int age;
};
using Column = std::string;
using Columns = std::vector<Column>;
using CsvFile = std::vector<CsvLine>;
template<typename It>
struct CsvGrammar: qi::grammar<It, CsvFile(), qi::locals<std::vector<LineItems>>, qi::blank_type> {
CsvGrammar() : CsvGrammar::base_type(start) {
using namespace qi;
static const char colsep = ',';
item.add("Name", NAME)("Age", AGE);
start = qi::omit[ header[_a=_1] ] >> eol >> line(_a) % eol;
header = (item | omit[column] >> attr(UNUSED)) % colsep;
line = (column % colsep) [convert];
column = quoted | *~char_(",\n");
quoted = '"' >> *("\"\"" | ~char_("\"\n")) >> '"';
BOOST_SPIRIT_DEBUG_NODES((header)(column)(quoted));
}
private:
qi::rule<It, std::vector<LineItems>(), qi::blank_type> header;
qi::rule<It, CsvFile(), qi::locals<std::vector<LineItems>>, qi::blank_type> start;
qi::rule<It, CsvLine(std::vector<LineItems> const&), qi::blank_type> line;
qi::rule<It, Column(), qi::blank_type> column;
qi::rule<It, std::string()> quoted;
qi::rule<It, qi::blank_type> empty;
qi::symbols<char, LineItems> item;
struct final {
using Ctx = typename decltype(line)::context_type;
void operator()(Columns const& columns, Ctx &ctx, bool &pass) const {
auto& csvLine = boost::fusion::at_c<0>(ctx.attributes);
auto& positions = boost::fusion::at_c<1>(ctx.attributes);
int i =0;
for (LineItems position : positions) {
switch (position) {
case NAME: csvLine.name = columns[i]; break;
case AGE: csvLine.age = atoi(columns[i].c_str()); break;
default: break;
}
i++;
}
pass = true; // returning false fails the `line` rule
}
} convert;
};
int main() {
const std::string s = "Surname,Name,Age,\nJohn,Doe,32\nMark,Smith,43";
auto f(begin(s)), l(end(s));
CsvGrammar<std::string::const_iterator> p;
CsvFile parsed;
bool ok = qi::phrase_parse(f, l, p, qi::blank, parsed);
if (ok) {
for (CsvLine line : parsed) {
std::cout << '[' << line.name << ']' << '[' << line.age << ']';
std::cout << std::endl;
}
} else {
std::cout << "Parse failed\n";
}
if (f != l)
std::cout << "Remaining unparsed: '" << std::string(f, l) << "'\n";
}
Prints
[Doe][32]
[Smith][43]