I am trying to parse a line from a text file which of the form:
[int_:] [int_/int_] [(int_, string)] string [string:int_]...
Where [] are optional parameter but will contain tags such as (":", "(", ")", "/").
Also the last format is repeat format "key:value" combination. e.g.:
10: 0x1/2 (8, INC) rd API:2 SI:100
I am able to parse the whole line when all the parameter are available.
But if any of the starting optional parameter is missing then the parser fails.
How can I ignore the optional parameters in the Boost Spirit library? (i.e. skip the assignment of optional variables to default values.)
These are the qi grammar rules:
quoted_string = lexeme[+(char_ -(lit(' ') | lit(')')))];
hex_num = ((lit("0x") | lit("0X")) >> hex) | uint_;
start = (hex_num >> lit(":"))
>> (hex_num >> lit("/") >> hex_num )
>> lit("(") >> hex_num >> lit(",") >> quoted_string >> lit(")")
>> quoted_string
>> quoted_string;
qi::rule<Iterator, std::string(), ascii::space_type> quoted_string;
qi::rule<Iterator, uint32_t(), ascii::space_type> hex_num;
qi::rule<Iterator, employee(), ascii::space_type> start;
Model your AST node to reflect the Parser tree:
struct ratio_t { uint32_t a,b; };
struct opcode_t { uint32_t id; std::string name; };
struct Node {
uint32_t label; // prefix:
boost::optional<ratio_t> ratio; // a/b
boost::optional<opcode_t> opcode; // (id, name)
std::string extra;
std::multimap<std::string, uint32_t> params;
};
(Just making stuff up as I go, because I can only guess what the data means. I'm assuming employee, hex_num and quoted_string are somehow remnants from sample code you started with).
Now when you adapt these structures:
BOOST_FUSION_ADAPT_STRUCT(AST::ratio_t, a, b)
BOOST_FUSION_ADAPT_STRUCT(AST::opcode_t, id, name)
BOOST_FUSION_ADAPT_STRUCT(AST::Node, label, ratio, opcode, extra, params)
You can simply parse into it with an analogous parse tree:
// lexemes
unquoted_string = +(graph - ')');
num = (no_case[ "0x" ] >> hex) | uint_;
param = +(graph - ':') >> ':' >> num;
// skipping productions
opcode = '(' >> num >> ',' >> unquoted_string >> ')';
ratio = num >> '/' >> num;
prefix = (num >> ':') | attr(0); // defaults to 0
start = prefix
>> -ratio
>> -opcode
>> unquoted_string
>> *param;
Now when you parse these test cases:
for (std::string const input : {
"10: 0x1/2 (8, INC) rd API:2 SI:100",
"10: 0x1/2 (8, INC) rd API:2",
"10: 0x1/2 (8, INC) rd",
"10: 0x1/2 rd API:2 SI:100",
"10: rd API:2 SI:100",
"0x1/2 rd API:2 SI:100",
"rd API:2 SI:100",
})
{
It f = input.begin(), l = input.end();
AST::Node data;
bool ok = qi::phrase_parse(f, l, p, qi::ascii::space, data);
if (ok) {
std::cout << "Parse success: " << data << "\n";
}
else {
std::cout << "Parse failure ('" << input << "')\n";
}
if (f!=l) {
std::cout << "Remaining unparsed input: '" << std::string(f,l) << "'\n";
}
}
you get:
Parse success: 10: 1/2 (8, 'INC') rd API:2 SI:100
Parse success: 10: 1/2 (8, 'INC') rd API:2
Parse success: 10: 1/2 (8, 'INC') rd
Parse success: 10: 1/2 -- rd API:2 SI:100
Parse success: 10: -- -- rd API:2 SI:100
Parse success: 0: 1/2 -- rd API:2 SI:100
Parse success: 0: -- -- rd API:2 SI:100
FULL DEMO
Live On Coliru
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/adapted.hpp>
#include <boost/optional/optional_io.hpp>
#include <map>
namespace qi = boost::spirit::qi;
namespace AST {
struct ratio_t { uint32_t a,b; };
struct opcode_t { uint32_t id; std::string name; };
struct Node {
uint32_t label; // prefix:
boost::optional<ratio_t> ratio; // a/b
boost::optional<opcode_t> opcode; // (id, name)
std::string extra;
std::multimap<std::string, uint32_t> params;
};
std::ostream& operator<<(std::ostream& os, ratio_t const& v) {
return os << v.a << "/" << v.b;
}
std::ostream& operator<<(std::ostream& os, opcode_t const& v) {
return os << "(" << v.id << ", '" << v.name << "')";
}
std::ostream& operator<<(std::ostream& os, Node const& v) {
os << v.label << ": " << v.ratio << " " << v.opcode << " " << v.extra;
for (auto& p : v.params) os << " " << p.first << ":" << p.second;
return os;
}
}
BOOST_FUSION_ADAPT_STRUCT(AST::ratio_t, a, b)
BOOST_FUSION_ADAPT_STRUCT(AST::opcode_t, id, name)
BOOST_FUSION_ADAPT_STRUCT(AST::Node, label, ratio, opcode, extra, params)
template <typename It, typename Skipper = qi::ascii::space_type>
struct P : qi::grammar<It, AST::Node(), Skipper> {
P() : P::base_type(start)
{
using namespace qi;
// lexemes
unquoted_string = +(graph - ')');
num = (no_case[ "0x" ] >> hex) | uint_;
param = +(graph - ':') >> ':' >> num;
// skipping productions
opcode = '(' >> num >> ',' >> unquoted_string >> ')';
ratio = num >> '/' >> num;
prefix = (num >> ':') | attr(0); // defaults to 0
start = prefix
>> -ratio
>> -opcode
>> unquoted_string
>> *param;
BOOST_SPIRIT_DEBUG_NODES((start)(unquoted_string)(num)(prefix)(ratio)(opcode)(param))
}
private:
qi::rule<It, AST::ratio_t(), Skipper> ratio;
qi::rule<It, AST::opcode_t(), Skipper> opcode;
qi::rule<It, AST::Node(), Skipper> start;
qi::rule<It, uint32_t(), Skipper> prefix;
//lexemes
qi::rule<It, std::string()> unquoted_string;
qi::rule<It, uint32_t()> num;
qi::rule<It, std::pair<std::string, uint32_t>> param;
};
int main() {
using It = std::string::const_iterator;
P<It> const p;
for (std::string const input : {
"10: 0x1/2 (8, INC) rd API:2 SI:100",
"10: 0x1/2 (8, INC) rd API:2",
"10: 0x1/2 (8, INC) rd",
"10: 0x1/2 rd API:2 SI:100",
"10: rd API:2 SI:100",
"0x1/2 rd API:2 SI:100",
"rd API:2 SI:100",
})
{
It f = input.begin(), l = input.end();
AST::Node data;
bool ok = qi::phrase_parse(f, l, p, qi::ascii::space, data);
if (ok) {
std::cout << "Parse success: " << data << "\n";
}
else {
std::cout << "Parse failure ('" << input << "')\n";
}
if (f!=l) {
std::cout << "Remaining unparsed input: '" << std::string(f,l) << "'\n";
}
}
}
Related
I am trying to parse the following messages with Spirit Qi:
"A/B AND C/D", "A/B", "A/B AND C/D AND E/F"
I am able to parse "A/B" but cannot get the correct results for the other strings.
I tried to following code:
qi::rule<It, AstNodeVector()> entries;
qi::rule<It, AstNodeVector()> lists;
qi::rule<It, std::string()> element;
this->entries= *(this->lists % " AND ");
this->lists= this->element >> '/' >> this->element;
this->element = qi::char_("A-Z");
What is wrong with my grammar?
It seems you're not skipping whitespace. Maybe that's a conceptual problem (see Boost spirit skipper issues).
Regardless, it does parse:
Live On Coliru
#include <boost/spirit/include/qi.hpp>
#include <iomanip>
namespace qi = boost::spirit::qi;
using AstNodeVector = std::vector<std::string>;
template <typename It>
struct P : qi::grammar<It, AstNodeVector()> {
P() : P::base_type(entries) {
entries = *(lists % " AND ");
lists = element >> '/' >> element;
element = qi::char_("A-Z");
}
private:
qi::rule<It, AstNodeVector()> entries;
qi::rule<It, AstNodeVector()> lists;
qi::rule<It, std::string()> element;
};
int main() {
using It = std::string::const_iterator;
P<It> const p {};
for (std::string const input: {
"A/B AND C/D",
"A/B",
"A/B AND C/D AND E/F",
})
{
It f = begin(input), l = end(input);
AstNodeVector results;
if (phrase_parse(f, l, p, qi::space, results)) {
std::cout << "Success: " << std::quoted(input) << "\n";
for (auto& el : results) {
std::cout << " -- " << std::quoted(el) << "\n";
}
} else {
std::cout << "FAIL: " << std::quoted(input) << "\n";
}
if (f != l) {
std::cout << "Remaining input: " << std::quoted(std::string(f,l)) << "\n";
}
}
}
Prints
Success: "A/B AND C/D"
-- "A"
-- "B"
-- "C"
-- "D"
Success: "A/B"
-- "A"
-- "B"
Success: "A/B AND C/D AND E/F"
-- "A"
-- "B"
-- "C"
-- "D"
-- "E"
-- "F"
Perhaps you should have included self-contained code, or elaborate on what exactly is the problem.
Starting from the Employee - Parsing into structs example:
template <typename Iterator>
struct employee_parser : qi::grammar<Iterator, employee(), ascii::space_type>
{
employee_parser() : employee_parser::base_type(start)
{
using qi::int_;
using qi::lit;
using qi::double_;
using qi::lexeme;
using ascii::char_;
quoted_string %= lexeme['"' >> +(char_ - '"') >> '"'];
start %=
lit("employee")
>> '{'
>> int_ >> ','
>> quoted_string >> ','
>> quoted_string >> ','
>> double_
>> '}'
;
}
qi::rule<Iterator, std::string(), ascii::space_type> quoted_string;
qi::rule<Iterator, employee(), ascii::space_type> start;
};
suppose I wanted to replace quoted_string with a rule that matches any string stored in a given container.
For example, if I have a container such as:
std::array<std::string, 4> match_list =
{ "string0", "string1", "string2", "string3" };
and I want the parser to only match the input against one of the values in the array (the container doesn't have to be an array).
I'm sure this is simple, but the Spirit help pages don't seem to address this.
It is simple: https://www.boost.org/doc/libs/1_67_0/libs/spirit/doc/html/spirit/qi/reference/string/symbols.html
Live On Coliru
#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/include/qi.hpp>
namespace qi = boost::spirit::qi;
struct employee {
int id;
std::string sym;
std::string name;
double value;
};
BOOST_FUSION_ADAPT_STRUCT(employee, id, sym, name, value)
template <typename Iterator, typename Skipper = qi::space_type>
struct employee_parser : qi::grammar<Iterator, employee(), Skipper>
{
employee_parser() : employee_parser::base_type(start)
{
using namespace qi;
quoted_string = lexeme['"' >> +(char_ - '"') >> '"'];
symbol_.add
("string0")
("string1")
("string2")
("string3")
;
start =
lit("employee")
>> '{'
>> int_ >> ','
>> symbol_ >> ','
>> quoted_string >> ','
>> double_
>> '}'
;
}
qi::rule<Iterator, std::string(), Skipper> quoted_string;
qi::rule<Iterator, employee(), Skipper> start;
qi::symbols<char, std::string> symbol_;
};
int main() {
std::string const input = "employee { 42, string3, \"more names or stuff\", 6.7 }";
using It = std::string::const_iterator;
It f = input.begin(), l = input.end();
employee_parser<It> p;
employee e;
if (phrase_parse(f, l, p, qi::space, e)) {
using boost::fusion::operator<<;
std::cout << boost::fusion::tuple_delimiter(';');
std::cout << "Parsed: " << e << "\n";
} else {
std::cout << "Parse failed\n";
}
if (f!=l)
std::cout << "Remaining input: '" << std::string(f,l) << "'\n";
}
Prints
Parsed: (42;;more names or stuff;6.7)
To actually include values:
symbol_.add
("string0", "STRING0")
("string1", "STRING1")
("string2", "STRING2")
("string3", "STRING3")
;
Prints Live On Coliru
Parsed: (42;STRING3;more names or stuff;6.7)
Or you can use another type altogether:
symbol_.add
("string0", 0)
("string1", 1)
("string2", 2)
("string3", 3)
;
With
symbol_.add
("string0", 0)
("string1", 1)
("string2", 2)
("string3", 3)
;
Which prints Live On Coliru
Parsed: (42;3;more names or stuff;6.7)
Finally, you might use raw[] instead to "transduce" the input sequence, for example combined with qi::no_space[]: Live On Coliru
>> raw[no_case[symbol_]] >> ','
Prints
Parsed: (42;sTrInG3;more names or stuff;6.7)
After Seth encouraged me in the comments to post my answer as well, here it is. As expected, it is very similar with the difference that I dynamically construct the symbols from a std::array passed to the grammar.
#include <iostream>
#include <string>
#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/include/qi.hpp>
struct employee {
int age;
std::string surname;
std::string forename;
double salary;
};
BOOST_FUSION_ADAPT_STRUCT(employee, age, surname, forename, salary)
namespace ascii = boost::spirit::ascii;
namespace qi = boost::spirit::qi;
template <typename Iterator, std::size_t N>
struct employee_parser : qi::grammar<Iterator, employee(), ascii::space_type> {
employee_parser(std::array<std::string, N> const &match_list)
: employee_parser::base_type(start) {
using namespace qi;
for (auto match : match_list) {
employees.add(match, match);
}
quoted_string %= lexeme['"' >> +(char_ - '"') >> '"'];
start %=
lit("employee")
>> '{'
>> int_ >> ','
>> quoted_string >> ','
>> employees >> ','
>> double_
>> '}'
;
}
qi::rule<Iterator, std::string(), ascii::space_type> quoted_string;
qi::rule<Iterator, employee(), ascii::space_type> start;
qi::symbols<typename std::iterator_traits<Iterator>::value_type,
std::string> employees;
};
template <typename Iterator, std::size_t N>
employee parse(Iterator first, Iterator last,
std::array<std::string, N> const &match_list) {
employee_parser<Iterator, N> const grammar(match_list);
employee e;
bool r = qi::phrase_parse(first, last, grammar, ascii::space, e);
if (!r || first != last) {
std::cerr << "Parsing failed at " + std::string(first, last) + "\n";
}
return e;
}
int main() {
employee e;
std::array<std::string, 4> match_list = {"Homer", "Marge", "Lisa", "Bart"};
std::string homer = "employee { 38, \"Simpson\", Homer, 3.0 }";
e = parse(homer.begin(), homer.end(), match_list);
std::cout << "employee { " << e.age << ", " << e.surname << ", "
<< e.forename << ", " << e.salary << " }\n";
// Fails parsing because Hans Mole is not in the list
std::string mole = "employee { 100, \"Mole\", Hans, 0.0 }";
e = parse(mole.begin(), mole.end(), match_list);
std::cout << "employee { " << e.age << ", " << e.surname << ", "
<< e.forename << ", " << e.salary << " }\n";
}
$ clang++ -Wall -Wextra -Wpedantic -std=c++11 test.cpp
$ ./a.out
employee { 38, Simpson, Homer, 3 }
Parsing failed at employee { 100, "Mole", Hans, 0.0 }
employee { 100, Mole, , 0 }
Here is also a reference for Homer's salary being 3.0: https://www.youtube.com/watch?v=HIEWgwRrY9s
I'm trying to parse a time string using boost spirit and not sure why this doesn't work.
auto fill_ts_nanos = [&t] (int h, int m, int s, int ms) -> int
{ t.tv_nsec = ( ( h * 3600 + m * 60 + s ) * 1000 + ms ) * 1000000; return t.tv_sec; };
auto fill_suffix = [&suffix] (string &s) { suffix=s; };
auto parse_ok = qi::parse(input.begin(), input.end(),
( qi::int_ >> qi::char_(":") >> qi::int_ >> qi::char_(":") >>
qi::int_ >> qi::char_(".") >> qi::int_ )
[boost::bind(fill_ts_nanos, qi::_1, qi::_3, qi::_5, qi::_7
>> qi::char_(",") >> qi::as_string[*qi::char_][fill_suffix] ;
A sample input is "04:00:00.512,2251812698588658"
After guessing a lot of details (e.g. what the type of t is supposed to be), here's the fixed code with some debug output:
Live On Coliru
Note: I fixed the signed-ness of the numbers as well as I changed the types to prevent overflow.
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <ctime>
#include <chrono>
#include <iomanip>
namespace qi = boost::spirit::qi;
namespace px = boost::phoenix;
using namespace std::chrono_literals;
using namespace qi::labels;
int main() {
timespec t;
std::string suffix;
auto fill_ts_nanos = [&t](int h, unsigned m, unsigned s, unsigned ms) -> long {
t = {};
t.tv_nsec = ((h * 3600 + m * 60 + s) * 1000 + ms) * 1000000l;
return t.tv_sec;
};
auto fill_suffix = [&suffix](std::string &s) { suffix = s; };
std::string const input = "04:00:00.512,2251812698588658";
auto parse_ok = qi::parse(input.begin(), input.end(),
(qi::int_ >> ':' >> qi::uint_ >> ':' >> qi::uint_ >> '.' >> qi::uint_)
[px::bind(fill_ts_nanos, _1, _2, _3, _4) ]
>> ',' >> qi::as_string[*qi::char_]
[fill_suffix] );
std::printf("%lld.%.9ld\n", (long long)t.tv_sec, t.tv_nsec);
auto ns = t.tv_nsec * 1ns;
std::cout << std::fixed << std::setprecision(6);
std::cout << "hours: " << (ns / 1.0h) << "\n";
std::cout << "minutes: " << (ns / 1.0min) << "\n";
std::cout << "seconds: " << (ns / 1.0s) << "\n";
std::cout << "suffix: " << suffix << "\n";
return parse_ok? 0:255;
}
Prints
0.14400512000000
hours: 4.000142
minutes: 240.008533
seconds: 14400.512000
suffix: 2251812698588658
Suggestions
I'd try to simplify this by a lot, e.g., by creating a rule:
qi::rule<It, long()> timespec_ =
(qi::int_ >> ':' >> qi::uint_ >> ':' >> qi::uint_ >> '.' >> qi::uint_)
[ _val = ((_1 * 3600 + _2 * 60 + _3) * 1000 + _4) * 1000000l ];
Which means you can then parse with nothing else but:
timespec t {};
std::string suffix;
It f = input.begin(), l = input.end();
parse(f, l, timespec_ >> ',' >> *qi::char_, t.tv_nsec, suffix);
This has the same output:
Live On Coliru
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <ctime>
#include <chrono>
#include <iomanip>
namespace qi = boost::spirit::qi;
namespace px = boost::phoenix;
using namespace std::chrono_literals;
using namespace qi::labels;
using It = std::string::const_iterator;
qi::rule<It, long()> timespec_ =
(qi::int_ >> ':' >> qi::uint_ >> ':' >> qi::uint_ >> '.' >> qi::uint_)
[ _val = ((_1 * 3600 + _2 * 60 + _3) * 1000 + _4) * 1000000l ];
int main() {
std::string const input = "04:00:00.512,2251812698588658";
timespec t {};
std::string suffix;
It f = input.begin(), l = input.end();
if (parse(f, l, timespec_ >> ',' >> *qi::char_, t.tv_nsec, suffix)) {
std::printf("%lld.%.9ld\n", (long long)t.tv_sec, t.tv_nsec);
auto ns = t.tv_nsec * 1ns;
std::cout << std::fixed << std::setprecision(6);
std::cout << "hours: " << (ns / 1.0h) << "\n";
std::cout << "minutes: " << (ns / 1.0min) << "\n";
std::cout << "seconds: " << (ns / 1.0s) << "\n";
std::cout << "suffix: " << suffix << "\n";
} else {
std::cout << "Parse failed\n";
}
if (f!=l) {
std::cout << "Remaining unparsed: '" << std::string(f,l) << "'\n";
}
}
In my desperation I've also figured out how the container version works. See below,
auto test_fn = [&t](auto c) {
t.tv_nsec = ( ( at_c<0>(c) * 3600 +
at_c<2>(c) * 60 +
at_c<4>(c) ) * 1000 +
at_c<6>(c) ) * 1000000;
auto parse_ok = qi::parse(input.begin(), input.end(),
( qi::int_ >> qi::char_(":") >> qi::int_ >> qi::char_(":") >>
qi::int_ >> qi::char_(".") >> qi::int_ )[ test_fn ]
Admittedly, it's quite ugly.
I have the following C++ Struct:
struct Dimension {
enum Type { PARENT, CHILD, PIXEL };
Type mWidth_type = Type::PIXEL;
int mWidth = 0;
Type mHeight_type = Type::PIXEL;
int mHeight = 0;
};
My grammar looks like this:
+(char_ - "{") >> "{" >>
-(lit("width") >> ":" >> (int_ | lit("_parent") | lit("_child")) >> ";") >>
-(lit("height") >> ":" >> (int_ | lit("_parent") | lit("_child")) >> ";") >>
"}"
I have a hierarchical structure, where some nodes might take the width or/and height of the parent or child node. So in my logic I check each node's Dimension type first. If it is PIXEL I get the value, otherwise I ask the value form the parent or child node. Because of this in my file I can have the following possibilities (same for height):
width: 10;
In this case, I want to leave Type with the default enum PIXEL and set the value for mWidth.
or
widht: _parent;
In this case, I want to set Type to PARENT and leave mWidth on default 0.
or
width: _child;
In this case, I want to set Type to CHILD and leave mWidth on default 0.
How can I parse this into a Struct? If my dimensions could take only numbers, then I would be able to proceed, but I am stuck because this is a different case. Any hints, ideas, help is much appreciated!
EDIT1:
Here is an example of the text file which needs to be parsed into the above Struct:
.struct1 {
width: 12;
height: 50;
}
.struct2 {
width: _parent;
height: 50;
}
.struct3 {
width: 40;
height: _child;
}
.struct4 {
width: _parent;
height: _child;
}
I'd suggest to factor the AST type so as to not repeat yourself:
struct Dimension {
struct Value {
enum Type { PARENT, CHILD, PIXEL } type;
int value;
friend std::ostream& operator<<(std::ostream& os, Value const& v) {
switch(v.type) {
case PARENT: return os << "[PARENT:" << v.value << "]";
case CHILD: return os << "[CHILD:" << v.value << "]";
case PIXEL: return os << "[PIXEL:" << v.value << "]";
}
return os << "?";
}
};
Value mWidth, mHeight;
};
Adapt it for fusion:
BOOST_FUSION_ADAPT_STRUCT(Dimension::Value, (Dimension::Value::Type, type)(int, value))
BOOST_FUSION_ADAPT_STRUCT(Dimension, (Dimension::Value, mWidth)(Dimension::Value, mHeight))
Now, I'd write the grammar to match:
start = width_ ^ height_;
width_ = lit("width") >> ':' >> value_ >> ';';
height_ = lit("height") >> ':' >> value_ >> ';';
value_ =
( "_child" >> attr(Dimension::Value::CHILD) >> attr(0)
| "_parent" >> attr(Dimension::Value::PARENT) >> attr(0)
| eps >> attr(Dimension::Value::PIXEL) >> int_
);
Note:
you could use the permutation parser to be more versatile
you can see the use of attr to inject attributes so the branches all synthesize a vector2<Type, int>
Adding debugging and a few test cases:
Live On Coliru
#define BOOST_SPIRIT_DEBUG
#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/include/qi.hpp>
namespace qi = boost::spirit::qi;
struct Dimension {
struct Value {
enum Type { PARENT, CHILD, PIXEL } type;
int value;
friend std::ostream& operator<<(std::ostream& os, Value const& v) {
switch(v.type) {
case PARENT: return os << "[PARENT:" << v.value << "]";
case CHILD: return os << "[CHILD:" << v.value << "]";
case PIXEL: return os << "[PIXEL:" << v.value << "]";
}
return os << "?";
}
};
Value mWidth, mHeight;
};
BOOST_FUSION_ADAPT_STRUCT(Dimension::Value, (Dimension::Value::Type, type)(int, value))
BOOST_FUSION_ADAPT_STRUCT(Dimension, (Dimension::Value, mWidth)(Dimension::Value, mHeight))
template <typename It, typename Skipper>
struct grammar : qi::grammar<It, Dimension(), Skipper>
{
grammar() : grammar::base_type(start) {
using namespace qi;
start = width_ ^ height_;
width_ = lit("width") >> ':' >> value_ >> ';';
height_ = lit("height") >> ':' >> value_ >> ';';
value_ =
( "_child" >> attr(Dimension::Value::CHILD) >> attr(0)
| "_parent" >> attr(Dimension::Value::PARENT) >> attr(0)
| eps >> attr(Dimension::Value::PIXEL) >> int_
);
BOOST_SPIRIT_DEBUG_NODES((start)(value_)(width_)(height_))
}
private:
qi::rule<It, Dimension(), Skipper> start;
qi::rule<It, Dimension::Value(), Skipper> value_, width_, height_;
};
int main() {
using It = std::string::const_iterator;
grammar<It, qi::space_type> p;
for (std::string const input : {
"width: 10; height: _child;",
"width: _parent; height: 10;",
"width: _child; height: 10;"
})
{
It f = input.begin(), l = input.end();
std::cout << "\n-----------------------------------\n"
<< "Parsing '" << input << "'\n";
Dimension parsed;
bool ok = qi::phrase_parse(f, l, p, qi::space, parsed);
if (ok)
std::cout << "Parsed: (" << parsed.mWidth << "x" << parsed.mHeight << ")\n";
else
std::cout << "Parse failed\n";
if (f!=l)
std::cout << "Remaining input: '" << std::string(f,l) << "'\n";
}
}
Output (without debug information):
-----------------------------------
Parsing 'width: 10; height: _child;'
Parsed: ([PIXEL:10]x[CHILD:0])
-----------------------------------
Parsing 'width: _parent; height: 10;'
Parsed: ([PARENT:0]x[PIXEL:10])
-----------------------------------
Parsing 'width: _child; height: 10;'
Parsed: ([CHILD:0]x[PIXEL:10])
Why the parser using the rules below returns an empty container? There're 3 rules. One is for parsing a string of characters except double quote, the second parses a pair (e.g. "col1" : 2) and the third parses the vector of such pairs. The ouput of the program below in MSVS2012 is
parse success
result: '' : 0
result: '' : 0
result: '' : 0
.
namespace parsers
{
spirit::qi::rule< iterator, column_name_t() > quoted_string =
spirit::qi::lexeme["\"" >> +~spirit::qi::char_("\"") >> "\""];
spirit::qi::rule< iterator, column_and_aggregate(), spirit::qi::space_type > agg_pair =
quoted_string//::boost::bind( &apply_col_and_aggr_visitor, spirit::qi::_val, spirit::qi::_1 )]
> ':'
// A rule validation technic is used below.
> spirit::int_[spirit::qi::_pass = (spirit::qi::_1 >=AVG && spirit::qi::_1<=SUM)];//::boost::bind( &apply_col_and_aggr_visitor, spirit::qi::_val, spirit::qi::_1 )];
spirit::qi::rule< iterator, column_and_aggregate_container(), spirit::qi::space_type > aggregates_parser =
'{'
> agg_pair/*[phoenix::push_back(spirit::qi::_val, spirit::qi::_1)]*/ % ',' // N.B.!!! list-redux technic
> '}';
}
using namespace parsers;
using namespace boost::spirit;
bool doParse(const std::string& input)
{
typedef std::string::const_iterator It;
auto f(begin(input)), l(end(input));
//parser<It, qi::space_type> p;
column_and_aggregate_container data;
typedef BOOST_TYPEOF(qi::space) skipper_type;
try
{
bool ok = qi::phrase_parse(f,l,aggregates_parser,qi::space,data);
if (ok)
{
std::cout << "parse success\n";
for (auto& pair : data)
std::cout << "result: '" << pair.first << "' : " << (int) pair.second << "\n";
}
else std::cerr << "parse failed: '" << std::string(f,l) << "'\n";
if (f!=l) std::cerr << "trailing unparsed: '" << std::string(f,l) << "'\n";
return ok;
}
catch(const qi::expectation_failure<It>& e)
{
std::string frag(e.first, e.last);
std::cerr << e.what() << "'" << frag << "'\n";
}
return false;
}
int main()
{
//bool ok = doParse("{ 'column 1' : 1, 'column 2' : 0, 'column 3' : 1 }");
doParse("{ \"column 1\" : 1, \"column 2\" : 0, \"column 3\" : 1 }");
//return ok? 0 : 255;
}
template <typename it, typename skipper = qi::space_type>
struct quoted_string_parser
{
quoted_string_parser()
{
using namespace qi;
quoted_string %= lexeme['"' >> *~char_('"') >> '"'];
BOOST_SPIRIT_DEBUG_NODE(quoted_string);
}
qi::rule<it, std::string(), skipper> quoted_string;
};
template <typename it, typename skipper = qi::space_type>
struct aggregates_parser : qi::grammar<it, column_and_aggregate_container(), skipper>
{
aggregates_parser() : aggregates_parser::base_type(aggregates_parser_)
{
using namespace qi;
agg_pair %= quoted_string_parser<it,skipper> > ':' > int_[_pass = (qi::_1 >= AVG && qi::_1 <= SUM)];
aggregates_parser_ = '{' > agg_pair % ',' > '}';
BOOST_SPIRIT_DEBUG_NODE(aggregates_parser_);
}
private:
qi::rule<it, sql_faggregate(), skipper> faggr;
qi::rule<it, column_and_aggregate(), skipper> agg_pair;
qi::rule<it, column_and_aggregate_container(), skipper> aggregates_parser_;
};
Like I said in the answer right where I suggested this semantic action for validation:
When a semantic action is present, automatic attribute propagation does not normally occur. Using %= forces automatic attribute propagation (we want this because the semantic action doesn't assign the attribute value(s), it just validates them).
Again, a fully working demonstration, incorporating your rules:
#include <boost/fusion/adapted/std_pair.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;
typedef std::string column_name_t;
enum sql_faggregate
{
AVG,
// ....
SUM,
};
typedef std::pair<column_name_t, sql_faggregate> column_and_aggregate;
typedef std::vector<column_and_aggregate> column_and_aggregate_container;
template <typename It, typename Skipper = qi::space_type>
struct parser : qi::grammar<It, column_and_aggregate_container(), Skipper>
{
parser() : parser::base_type(aggregates_parser)
{
using namespace qi;
quoted_string = lexeme['"' >> +~char_('"') >> '"'];
agg_pair %= quoted_string > ':' // A rule validation technic is used below.
> int_[_pass = (_1 >=AVG && _1<=SUM)];
aggregates_parser = '{' > agg_pair % ',' > '}';
BOOST_SPIRIT_DEBUG_NODE(aggregates_parser);
}
private:
qi::rule<It, std::string(), qi::space_type> quoted_string;
qi::rule<It, sql_faggregate(), qi::space_type> faggr;
qi::rule<It, column_and_aggregate(), qi::space_type> agg_pair;
qi::rule<It, column_and_aggregate_container(), qi::space_type> aggregates_parser;
};
bool doParse(const std::string& input)
{
typedef std::string::const_iterator It;
auto f(begin(input)), l(end(input));
parser<It, qi::space_type> p;
column_and_aggregate_container data;
try
{
bool ok = qi::phrase_parse(f,l,p,qi::space,data);
if (ok)
{
std::cout << "parse success\n";
for (auto& pair : data)
std::cout << "result: '" << pair.first << "' : " << (int) pair.second << "\n";
}
else std::cerr << "parse failed: '" << std::string(f,l) << "'\n";
if (f!=l) std::cerr << "trailing unparsed: '" << std::string(f,l) << "'\n";
return ok;
} catch(const qi::expectation_failure<It>& e)
{
std::string frag(e.first, e.last);
std::cerr << e.what() << "'" << frag << "'\n";
}
return false;
}
int main()
{
bool ok = doParse("{ \"column 1\" : 1, \"column 2\" : 0, \"column 3\" : 1 }");
return ok? 0 : 255;
}
Prints
parse success
result: 'column 1' : 1
result: 'column 2' : 0
result: 'column 3' : 1
as expected