Problems with optional expression and parsing error position - c++

I'm trying to write my first boost spirit parser for a specific messaging format and I encountered some problems. The boost library version used is 1.49.0!
#include <iostream>
#include <sstream>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/support_multi_pass.hpp>
#include <boost/spirit/include/classic_position_iterator.hpp>
#include <boost/fusion/adapted/struct/adapt_struct.hpp>
namespace qi = boost::spirit::qi;
struct message
{
std::string title;
std::string sender;
std::string receiver;
unsigned int seqNo;
std::string senderRef;
std::string receiverRef;
unsigned int seqNoRef;
std::string id;
};
BOOST_FUSION_ADAPT_STRUCT(
message,
(std::string, title)
(std::string, sender)
(std::string, receiver)
(unsigned int, seqNo)
(std::string, senderRef)
(std::string, receiverRef)
(unsigned int, seqNoRef)
(std::string, id)
)
template<typename Iterator>
struct MyQiGrammar : qi::grammar<Iterator, message(), qi::space_type>
{
MyQiGrammar() : MyQiGrammar::base_type(start) {
qi::uint_parser<unsigned int, 10, 3, 3> uint_3p;
delim = qi::char_("-/"); // some values are delimited by '-' or '/'
title %= qi::repeat(3)[qi::upper]; // exactly 3 upper case letters
sender %= +qi::upper; // at least one upper case letter
receiver %= +qi::upper; // at least one upper case letter
seqNo %= uint_3p; // exactly 3 digits (e.g. 001)
id %= qi::repeat(1,7)[qi::alnum]; // at least 1 upper case letter and maximum 7
start %=
'('
>> title
>> sender >> delim >> receiver >> seqNo
>> -(sender >> delim >> receiver >> seqNo)
>> delim >> id
>>
')';
}
qi::rule<Iterator> delim;
qi::rule<Iterator, std::string(), qi::space_type> title;
qi::rule<Iterator, std::string(), qi::space_type> sender, receiver, id;
qi::rule<Iterator, unsigned int(), qi::space_type> seqNo;
qi::rule<Iterator, message(), qi::space_type> start;
};
int
main(int args, char** argv)
{
typedef std::istreambuf_iterator<char> base_iterator_type;
typedef boost::spirit::multi_pass<base_iterator_type> forward_iterator_type;
typedef boost::spirit::classic::position_iterator2<forward_iterator_type> pos_iterator_type;
typedef MyQiGrammar<pos_iterator_type> qi_parser;
std::string rawMsg = "(ABCZ/Y002-GWI4576)";
qi_parser myGrammarParser;
message msg;
std::istringstream iss(rawMsg);
base_iterator_type in_begin(iss);
forward_iterator_type fwd_begin = boost::spirit::make_default_multi_pass(in_begin);
forward_iterator_type fwd_end;
pos_iterator_type pos_begin(fwd_begin, fwd_end);
pos_iterator_type pos_end;
std::cout << rawMsg << std::endl;
try {
bool msgRes = qi::phrase_parse(pos_begin, pos_end,
myGrammarParser,
qi::space,
msg);
if(msgRes) {
std::cout << "Parsing succeeded!" << std::endl;
if(pos_begin == pos_end) {
std::cout << "Full match!" << std::endl;
std::cout << "Title : " << msg.title << std::endl;
std::cout << "Sender : " << msg.sender << std::endl;
std::cout << "Receiver : " << msg.receiver << std::endl;
std::cout << "Sequence number : " << msg.seqNo << std::endl;
std::cout << "Sender (ref.) : " << msg.senderRef << std::endl;
std::cout << "Receiver (ref.) : " << msg.receiverRef << std::endl;
std::cout << "Sequence number (ref.) : " << msg.seqNoRef << std::endl;
std::cout << "Message Identifier : " << msg.id << std::endl;
}
} else {
std::cout << "Parsing failed!" << std::endl;
std::cout << "Stopped at: " << pos_begin.get_position().line
<< ":" << pos_begin.get_position().column << std::endl;
}
} catch(qi::expectation_failure<pos_iterator_type>& e) {
const boost::spirit::classic::file_position_base<std::string>& pos = e.first.get_position();
std::stringstream ss;
ss << "Parse error at line " << pos.line << " column " << pos.column
<< "\n\t" << e.first.get_currentline()
<< "\n\t" << std::string(pos.column, ' ') << "^--here";
std::cerr << ss.str() << std::endl;
}
return 0;
}
In general, the message format looks like this:
'('<TITLE><SENDER>'/'<RECEIVER><SEQNO>[<SENDERREF>'/'<RECEIVERREF><SEQNOREF>]'-'<MID>')'
The output obviously shows that I'm doing something wrong with the optional message reference part:
Parsing succeeded!
Full match!
Title : ABC
Sender : Z
Receiver : Y
Sequence number : 2
Sender (ref.) :
Receiver (ref.) : GWI4576 <--- Message identifier
Sequence number (ref.) : 3072563792 <--- uninitialized, can be neglected
Message Identifier :
There are more wrong member assignments if rawMsg includes a message reference, for example "(ABCZ/Y002Y/Z001-GWI4576)":
Parsing succeeded!
Full match!
Title : ABC
Sender : Z
Receiver : Y
Sequence number : 2
Sender (ref.) : YZ <--- Sender and receiver!?
Receiver (ref.) : GWI4576 <--- Message identifier
Sequence number (ref.) : 3214704440 <--- uninitialized, but should be 1
Message Identifier :
What did I wrong in my rule(s)?
In addition, I observed that if an incorrect message format is parsed, then the line and column of the iterator is always set to 1 independent of the error position:
Parsing failed!
Stopped at: 1:1
Why is that?

Related

Boost Spirit nested components

I am trying to parse the following messages with Spirit Qi:
"A/B AND C/D", "A/B", "A/B AND C/D AND E/F"
I am able to parse "A/B" but cannot get the correct results for the other strings.
I tried to following code:
qi::rule<It, AstNodeVector()> entries;
qi::rule<It, AstNodeVector()> lists;
qi::rule<It, std::string()> element;
this->entries= *(this->lists % " AND ");
this->lists= this->element >> '/' >> this->element;
this->element = qi::char_("A-Z");
What is wrong with my grammar?
It seems you're not skipping whitespace. Maybe that's a conceptual problem (see Boost spirit skipper issues).
Regardless, it does parse:
Live On Coliru
#include <boost/spirit/include/qi.hpp>
#include <iomanip>
namespace qi = boost::spirit::qi;
using AstNodeVector = std::vector<std::string>;
template <typename It>
struct P : qi::grammar<It, AstNodeVector()> {
P() : P::base_type(entries) {
entries = *(lists % " AND ");
lists = element >> '/' >> element;
element = qi::char_("A-Z");
}
private:
qi::rule<It, AstNodeVector()> entries;
qi::rule<It, AstNodeVector()> lists;
qi::rule<It, std::string()> element;
};
int main() {
using It = std::string::const_iterator;
P<It> const p {};
for (std::string const input: {
"A/B AND C/D",
"A/B",
"A/B AND C/D AND E/F",
})
{
It f = begin(input), l = end(input);
AstNodeVector results;
if (phrase_parse(f, l, p, qi::space, results)) {
std::cout << "Success: " << std::quoted(input) << "\n";
for (auto& el : results) {
std::cout << " -- " << std::quoted(el) << "\n";
}
} else {
std::cout << "FAIL: " << std::quoted(input) << "\n";
}
if (f != l) {
std::cout << "Remaining input: " << std::quoted(std::string(f,l)) << "\n";
}
}
}
Prints
Success: "A/B AND C/D"
-- "A"
-- "B"
-- "C"
-- "D"
Success: "A/B"
-- "A"
-- "B"
Success: "A/B AND C/D AND E/F"
-- "A"
-- "B"
-- "C"
-- "D"
-- "E"
-- "F"
Perhaps you should have included self-contained code, or elaborate on what exactly is the problem.

Support BOOST_FUSION_ADAPT_STRUCT for objects with fixed arrays?

Assume I already have a struct that looks like this:
struct LETTER
{
double one;
char[12] two;
double three;
char[12] four;
};
And my inputs are comma separated, for example:
"32,CATSANDDOGS,42,WHAT"
"43,BATANDZEBRAS,23,PARROT"
I've been trying to adapt this example (Spirit Qi : rule for char [5]) to to roll through BOOST_FUSION_ADAPT_STRUCT but haven't had any luck. I tried using std::array as shown here (http://www.boost.org/doc/libs/1_64_0/libs/spirit/example/qi/boost_array.cpp) but I haven't been able to make it work in a struct. Is what I'm trying to do even possible? What am I doing wrong here? I would think this would be the most obvious use case.
Is what I'm trying to do even possible?
I'm going to assume you want to write idiomatic C++ code (which is obviously the target domain for Spirit Qi), so you can use std::string:
Live On Coliru
#include <boost/fusion/adapted.hpp>
#include <boost/fusion/include/io.hpp>
#include <boost/spirit/include/qi.hpp>
#include <iostream>
namespace qi = boost::spirit::qi;
struct Letter {
double one;
std::string two;
double three;
std::string four;
};
BOOST_FUSION_ADAPT_STRUCT(Letter, one, two, three, four)
template <typename Iterator> struct LETTERParser : qi::grammar<Iterator, Letter()> {
LETTERParser() : LETTERParser::base_type(start) {
using namespace qi;
_11c = repeat(11) [char_];
start = skip(space) [ "LETTER" >> double_ >> _11c >> double_ >> _11c ];
}
private:
qi::rule<Iterator, Letter()> start;
qi::rule<Iterator, std::string()> _11c;
};
int main() {
const std::string input("LETTER 42 12345678901 +Inf abcdefghijk ");
using It = std::string::const_iterator;
LETTERParser<It> parser;
Letter example;
It f = input.begin(), l = input.end();
if (phrase_parse(f, l, parser, qi::ascii::space, example)) {
std::cout << "parsed: " << boost::fusion::as_vector(example) << "\n";
std::cout << " example.one: " << example.one << "\n";
std::cout << " example.two: '" << example.two << "'\n";
std::cout << " example.three: " << example.three << "\n";
std::cout << " example.four: '" << example.four << "'\n";
} else {
std::cout << "couldn't parse '" << input << "'\n";
}
if (f != l)
std::cout << "Remaining unparsed input: '" << std::string(f,l) << "'\n";
}
Prints
parsed: (42 12345678901 inf abcdefghijk)
example.one: 42
example.two: '12345678901'
example.three: inf
example.four: 'abcdefghijk'

Parsing CSV into Struct Array

I am trying to parse CSV into Array of structures. Delimiter is ';'. If you look into the code it seems that I filled only first element of the array out of nine that is 9 lines are in my CSV right now. Remaining elements of the array are just 0. Anyone got some advice? Thanks
fileTO.open("imput.csv", ios_base :: app);
fileFROM.open("imput.csv", ios_base :: app);
//IntoCsvThruConsole();
// array of structures from csv
string line;
string sID, stype, scategory, samount, sdate;
int lines = CountExistingLines();
Properties * structure = new Properties[lines];
int counter = 0;
int position = 0;
while (getline(fileFROM, line))
{
sID = "";
samount = "";
for (int i = 0; i < line.size(); i++)
{
if (line[i] == ';')
{
position++;
continue;
}
switch (position)
{
case 0 : sID = sID + line[i];
break;
case 1 : structure[counter].type = structure[counter].type + line[i];
break;
case 2 : structure[counter].category = structure[counter].category + line[i];
break;
case 3 : samount = samount + line[i];
break;
case 4 : structure[counter].date = structure[counter].date + line[i];
break;
}
}
structure[counter].ID = atoi(sID.c_str());
structure[counter].amount = atoi(samount.c_str());
cout << "ID zaznamu: " << structure[counter].ID << endl;
cout << "Typ: " << structure[counter].type << endl;
cout << "Kategorie: " << structure[counter].category << endl;
cout << "Castka: " << structure[counter].amount << endl;
cout << "Datum: " << structure[counter].date << endl;
counter++;
}
delete[] structure;
I have globally initialized struct correctly and also fstreams. Hope it is enough. Thanks
enter image description here
I recommend using Boost.Spirit for such parsing tasks.
#include <iostream>
#include <sstream>
#include <string>
#include <vector>
#include <boost/fusion/adapted.hpp>
#include <boost/spirit/home/x3.hpp>
struct Date
{
int day;
int month;
int year;
};
std::ostream& operator<<(std::ostream& os, Date const &d)
{
os << d.day << '/' << d.month << '/' << d.year;
return os;
}
BOOST_FUSION_ADAPT_STRUCT(
Date,
day, month, year)
struct Properties
{
int ID;
std::string type;
std::string category;
int amount;
Date date;
};
BOOST_FUSION_ADAPT_STRUCT(
Properties,
ID, type, category, amount, date)
std::vector<Properties> parse(std::string const &input)
{
auto iter = input.begin();
using namespace boost::spirit::x3;
auto name = rule<class name, std::string>{}
= lexeme[alpha >> *alpha];
auto date = rule<class date, Date>{}
= int_ > '/' > int_ > '/' > int_;
std::vector<Properties> result;
bool r = phrase_parse(iter, input.end(),
(int_ > ';' > name > ';' > name > ';' > int_ > ';' > date) % eol,
space - eol, result);
if (!r)
{
std::string rest(iter, input.end());
throw std::invalid_argument("Parsing failed at " + rest);
}
return result;
}
int main()
{
// This could be a file instead with std::ifstream
std::istringstream input;
input.str(
"1;TypeA;CategoryA;10;05/12/2017\n"
"2;TypeB;CategoryA;21;04/12/2017\n"
"3;TypeB;CategoryB;19;01/12/2017\n"
"4;TypeA;CategoryA; 6;20/11/2017\n");
std::vector<Properties> entry = parse(input.str());
for (auto e : entry)
{
std::cout << "Found the following entries:\n"
<< " ID: " << e.ID << "\n"
<< " Type: " << e.type << "\n"
<< " Category: " << e.category << "\n"
<< " Amount: " << e.amount << "\n"
<< " Date: " << e.date << "\n";
}
}
Live example

parsing 3 floats for glm::vec3 using boost::spirit::qi (error_invalid_expression)

I can parse one float and print it. (test1, test2)
Somehow I am unable to build a rule that parses three floats.
My final goal is to parse three floats and save them to a glm::vec3.
My rule seems to be incorrect:
qi::lexeme[qi::float_ << ' ' << qi::float_ << ' ' << qi::float_]
I am not sure if i am using BOOST_FUSION_ADAPT_STRUCT correctly
Here is a source to show what i came up with so far:
#include <iostream>
#include <string>
#include <glm\glm.hpp>
#include <boost\spirit\include\qi.hpp>
#include <boost\fusion\include\adapt_struct.hpp>
namespace qi = boost::spirit::qi;
void test1()
{
std::string test = "1.2";
auto it = test.begin();
if (qi::phrase_parse(it, test.end(), qi::float_, qi::space) && (it == test.end()))
std::cout << "test 1 ok" << std::endl;
else
std::cout << "test 1 not ok" << std::endl;
}
void test2()
{
std::string test = "1.2";
auto it = test.begin();
float f;
if (qi::phrase_parse(it, test.end(), qi::float_, qi::space, f) && (it == test.end()))
std::cout << "test 2 ok " << f << std::endl;
else
std::cout << "test 2 not ok" << std::endl;
}
void test3()
{
std::string test = "1.2 2.2 3.3";
qi::rule<std::string::iterator, qi::space_type> VEC3;
//error_invalid_expression
VEC3 = qi::lexeme[qi::float_ << ' ' << qi::float_ << ' ' << qi::float_];
auto it = test.begin();
if (qi::phrase_parse(it, test.end(), VEC3, qi::space) && (it == test.end()))
std::cout << "test 3 ok " << std::endl;
else
std::cout << "test 3 not ok" << std::endl;
}
BOOST_FUSION_ADAPT_STRUCT(
glm::vec3,
(float, x)
(float, y)
(float, z)
)
void test4()
{
std::string test = "1.2 2.2 3.3";
qi::rule<std::string::iterator, qi::space_type> VEC3;
//error_invalid_expression
VEC3 = qi::lexeme[qi::float_ << ' ' << qi::float_ << ' ' << qi::float_];
glm::vec3 result;
auto it = test.begin();
if (qi::phrase_parse(it, test.end(), VEC3, qi::space, result) && (it == test.end()))
{
std::cout << "test 4 ok (" << result.x << ", " << result.y << ", " << result.z << ")"<< std::endl;
}
else
std::cout << "test 4 not ok" << std::endl;
}
int main(int argc, char ** argv)
{
test1();
test2();
test3();
test4();
}
The test4 function contains everything I try to get done.
EDIT:
As suggested two things need to be changed:
void test4()
{
std::string test = "1.2 2.2 3.3";
qi::rule<std::string::iterator, glm::vec3(), qi::space_type> VEC3;
//error_invalid_expression
VEC3 = qi::lexeme[qi::float_ >> ' ' >> qi::float_ >> ' ' >> qi::float_];
glm::vec3 result;
auto it = test.begin();
if (qi::phrase_parse(it, test.end(), VEC3, qi::space, result) && (it == test.end()))
{
std::cout << "test 4 ok (" << result.x << ", " << result.y << ", " << result.z << ")"<< std::endl;
}
else
std::cout << "test 4 not ok" << std::endl;
}
I agree with Pete Becker. Simpler is usually better.
Now, since you are going to end up using spirit, let's look at what can be simpler:
use c++11 adapt:
BOOST_FUSION_ADAPT_STRUCT(glm::vec3, x, y, z)
you can do without the adapt (simpler in the demo)
you can (should) drop the skipper from the rule declaration if you're doing a toplevel lexeme[] directive anyways¹. See stackoverflow.com/questions/17072987/boost-spirit-skipper-issues/17073965#17073965
better yet, do without the rule for readability here. In reality, you would have a larger grammar with multiple rules. Be sure not to instantiate the grammar each parse (for efficiency).
Demo testbed Live On Coliru
//#include <boost/spirit/home/x3.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/spirit/include/qi.hpp>
#include <glm/glm.hpp>
#include <iostream>
#include <string>
namespace qi = boost::spirit::qi;
glm::vec3 simpler(std::string const& test) {
auto it = test.begin();
glm::vec3 result;
using namespace qi;
if (parse(it, test.end(),
float_ >> ' ' >> float_ >> ' ' >> float_ >> eoi,
result.x, result.y, result.z))
{
return result;
}
throw std::runtime_error("invalid input");
}
glm::vec3 use_skipper(std::string const& test) {
auto it = test.begin();
glm::vec3 result;
using namespace qi;
if (phrase_parse(it, test.end(),
float_ >> float_ >> float_ >> eoi,
space, result.x, result.y, result.z))
{
return result;
}
throw std::runtime_error("invalid input");
}
BOOST_FUSION_ADAPT_STRUCT(glm::vec3, x, y, z)
glm::vec3 with_adapt(std::string const& test) {
auto it = test.begin();
glm::vec3 result;
using namespace qi;
if (phrase_parse(it, test.end(), float_ >> float_ >> float_ >>eoi, space, result))
{
return result;
}
throw std::runtime_error("invalid input");
}
int main() {
struct { glm::vec3 (&f)(std::string const&); std::string name; } tests[] = {
{simpler, "simpler"},
{use_skipper, "use_skipper"},
{with_adapt, "with_adapt"}
};
for (auto t : tests) try {
glm::vec3 v = t.f("1.2 2.2 3.3");
std::cout << t.name << " ok (" << v.x << ", " << v.y << ", " << v.z << ")\n";
} catch(std::exception const& e) {
std::cout << t.name << " fail (" << e.what() << ")\n";
}
}
You can be slightly more lightweight Using x3. Not really less work, but certainly less heavy on the compiler.
¹ subtle difference w.r.t. pre/post skipping, but that difference would be gone if there's a containing grammar with skipper
The sequence parser operator is >>, use that instead of << in your rules
VEC3 = qi::lexeme[qi::float_ >> ' ' >> qi::float_ >> ' ' >> qi::float_];
You've specified a whitespace skipper in your rule, so it can be further simplified by removing the lexeme directive and letting the skipper skip spaces automatically (unless you want to ensure there is a single space character in between the inputs).
VEC3 = qi::float_ >> qi::float_ >> qi::float_;
Also, if you want your rule to return a value, you need to add that signature to the rule type
qi::rule<std::string::iterator, glm::vec3(), qi::space_type> VEC3;
This is unrelated to the compilation error you're seeing, but use forward slashes in your #include directives, that works on all platforms.
#include <boost/spirit/include/qi.hpp>
Seems like an awful lot of work for something that's straightforward.
#include <iostream>
#include <string>
#include <sstream>
int main() {
std::string test = "1.2 2.2 3.3";
float f1, f2, f3;
std::istringstream in(test);
in >> f1 >> f2 >> f3;
return 0;
}

Result of parsing a boost::spirit grammar with sub-expressions

I'm trying to get into boost spirit 2, but the following code does not work as expected:
template<typename Iterator>
struct my_grammar : qi::grammar<Iterator, std::string()>
{
my_grammar() : my_grammar::base_type(time_literal)
{
using ascii::char_;
using ascii::digit;
time_literal = digit >> -digit >> char_(':') >> digit >> digit >> -(char_(':') >> digit >> digit);
}
qi::rule<Iterator, std::string()> time_literal;
};
void main()
{
my_grammar<std::string::iterator> g;
std::string input("01:02:03");
std::string::iterator begin = input.begin();
std::string::iterator iter = begin;
std::string::iterator end = input.end();
std::string result;
bool matched = phrase_parse(iter, end, g, ascii::space, result);
std::cout << (matched ? "matched "+std::string(begin, iter) : "no match") << std::endl;
if (iter != end)
std::cout << "remaining: " << std::string(iter, end) << std::endl;
else
std::cout << "result: " << result << std::endl;
std::cout << std::endl;
}
This prints:
matched: 01:02:03
result: 01:02:
But I expected to see:
matched: 01:02:03
result: 01:02:03
So where did those last two digits go and how can I get them back?