boost spirit parse with the source - c++

I would like to be able to parse a Number, to store its original source and to track its position in the source preserving it in the structure itself.
This is what I have so far:
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_object.hpp>
#include <boost/spirit/home/support/iterators/line_pos_iterator.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>
#include <iostream>
#include <iomanip>
#include <ios>
#include <string>
#include <complex>
#include <boost/spirit/include/phoenix_fusion.hpp>
#include <boost/spirit/include/phoenix_stl.hpp>
struct Position
{
Position()
: line(-1)
{
}
size_t line;
};
struct Number : public Position
{
Number()
: Position()
, value(-1)
, source()
{
}
unsigned value;
std::string source;
};
using namespace boost::spirit;
BOOST_FUSION_ADAPT_STRUCT(Number,
(unsigned, value)
(std::string, source)
(size_t, line)
);
template <typename Iterator>
struct source_hex : qi::grammar<Iterator, Number()>
{
source_hex() : source_hex::base_type(start)
{
using qi::eps;
using qi::hex;
using qi::lit;
using qi::raw;
using qi::_val;
using qi::_1;
using ascii::char_;
namespace phx = boost::phoenix;
using phx::at_c;
using phx::begin;
using phx::end;
using phx::construct;
start = raw[ (lit("0x") | lit("0X"))
>> hex [at_c<0>(_val) = _1]
][at_c<2>(_val) = get_line(begin(_1))]
[at_c<1>(_val) = construct<std::string>(begin(_1), end(_1))]
;
}
qi::rule<Iterator, Number()> start;
};
and the test code is:
typedef line_pos_iterator<std::string::const_iterator> Iterator;
source_hex<Iterator> g;
Iterator iter(str.begin());
Iterator end(str.end());
Number number;
bool r = parse(iter, end, g, number);
if (r && iter == end) {
std::cout << number.line << ": 0x" << std::setw(8) << std::setfill('0') << std::hex << number.value << " // " << number.source << "\n";
} else
std::cout << "Parsing failed\n";
what I am not getting is why the iterator on line:
[at_c<2>(_val) = get_line(begin(_1))]
is not a line_pos_iterator even this is the one I am using for the parser.
I will appreciate explanation as well as ideas how to solve the problem - in whatever way.

Have a look at
#include <boost/spirit/repository/include/qi_iter_pos.hpp>
This defines a parser that directly exposes the position as an attribute. Let me add an example in a few minutes.
Edit I found it hard to shoe-horn iter_pos into your sample without "assuming" things and changing your data type layout. I'd very much favour this (I'd strive to lose the semantic actions all the way.). However, time's limited.
Here's a little helper that you can use to fix your problem:
struct get_line_f
{
template <typename> struct result { typedef size_t type; };
template <typename It> size_t operator()(It const& pos_iter) const
{
return get_line(pos_iter);
}
};
^ The polymorphic actor, use as such:
start = raw[ qi::no_case["0x"] >> hex [at_c<0>(_val) = _1] ]
[
at_c<1>(_val) = construct<std::string>(begin(_1), end(_1)),
at_c<2>(_val) = get_line_(begin(_1))
]
;
// with
boost::phoenix::function<get_line_f> get_line_;
Note I changed a few minor points.
Fully running demo with output: Live On Coliru

Related

Spirit Qi First Parser

What did I mess up here? I'm getting 'start': undeclared identifier but I stuck pretty closely to the tutorial, so I'm not sure where I made a typo, or what I did wrong. Any hints? You all see the same thing, right?
#include <iostream>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_object.hpp>
#include <iostream>
#include <string>
#include <boost/array.hpp>
#include <boost/fusion/include/io.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/spirit/include/qi_no_skip.hpp>
#include <boost/spirit/include/phoenix.hpp>
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
using qi::lit;
using qi::int_;
using qi::double_;
using ascii::char_;
using boost::spirit::qi::phrase_parse;
using boost::spirit::qi::no_skip;
using qi::eoi;
struct LETTER
{
char hi;
// int fourtytwo;
// char mom;
};
BOOST_FUSION_ADAPT_STRUCT(
LETTER,
(char, hi)
// (int, fourtytwo)
// (char, mom)
)
template <typename Iterator>
struct LETTERParser : qi::grammar<Iterator, LETTER(), ascii::space_type>
{
LETTERParser(): LETTERParser::base_type(start)
{
start %= lit("LETTER") >> char_;
// >> char_
// >> int_
// >> char_
// >> eoi
// ;
}
};
const std::string wat("Z");
int main()
{
LETTERParser<std::string::const_iterator> f;
LETTER example;
phrase_parse(wat.begin(), wat.end(), f, no_skip, example);
return 0;
}
There are a number of issues, one of which is non obvious
where's no_skip? Why are you passing it to a grammar that requires ascii::space_type?
where is the start rule declared?
don't pollute global namespace - it creates hard problems in generic code
handle errors
the grammar starts with a mandatory character sequence, which doesn't match the input
the non-obvious one: single-element structs interfere in unfortunate ways in Spirit/Fusion land.
Simplify:
Fixing the above and modernizing (c++11) the fusion adaptation:
live On Coliru
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/spirit/include/qi.hpp>
#include <iostream>
namespace qi = boost::spirit::qi;
struct LETTER {
char hi;
int fourtytwo;
char mom;
};
BOOST_FUSION_ADAPT_STRUCT(LETTER, hi, fourtytwo, mom)
template <typename Iterator> struct LETTERParser : qi::grammar<Iterator, LETTER(), qi::ascii::space_type> {
LETTERParser() : LETTERParser::base_type(start) {
using qi::char_;
using qi::int_;
start = "LETTER" >> char_ >> int_ >> char_;
}
private:
qi::rule<Iterator, LETTER(), qi::ascii::space_type> start;
};
int main() {
const std::string input("LETTER Z 42m");
using It = std::string::const_iterator;
LETTERParser<It> parser;
LETTER example;
It f = input.begin(), l = input.end();
if (phrase_parse(f, l, parser, qi::ascii::space, example)) {
std::cout << "parsed: " << boost::fusion::as_vector(example) << "\n";
} else {
std::cout << "couldn't parse '" << input << "'\n";
}
if (f != l)
std::cout << "Remaining unparsed input: '" << std::string(f,l) << "'\n";
}
Prints
parsed: (Z 42 m)
Single Element:
You're in, luck it doesn't bite in your case:
Live On Coliru
Prints
parsed: (Z)
Remaining unparsed input: '42m'
as expected. If it strikes in the future, refer here e.g. Size of struct with a single element
Bonus
Consider encapsulating the choice of skipper. The caller should probably never be able to override it Live On Coliru - see also Boost spirit skipper issues

reuse parsed variable with boost karma

I have a code base which is quite equivalent to the code below.
I try to generate a text file with two times the content of a variable.
I feel that the answer is in semantic actions and _a and _val but cannot manage to get through even with the documentation.
How will you do to have :
"toto" in str
and output :
toto some stuff toto
i.e how to reuse a parsed variable in karma ?
struct data
{
std::string str;
};
BOOST_FUSION_ADAPT_STRUCT(
data,
(std::string, str)
)
template <typename Iterator>
struct data: karma::grammar<Iterator, data() >
{
data():data::base_type(start)
{
start = karma::string << karma::lit("some stuff") << karma::string; //Second string is in fact the first one
}
karma::rule<Iterator, data()> start;
};
Solution (according to posts below :)
#include <iostream>
#include <string>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/spirit/include/karma.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_fusion.hpp>
#include <boost/spirit/include/phoenix_stl.hpp>
#include <boost/spirit/include/phoenix_object.hpp>
#include <boost/spirit/include/support_istream_iterator.hpp>
#include <boost/spirit/include/support_iso8859_1.hpp>
namespace ast
{
struct data
{
std::string str;
};
}
BOOST_FUSION_ADAPT_STRUCT(
ast::data,
(std::string, str)
)
namespace karma = boost::spirit::karma;
namespace parser
{
template <typename Iterator>
struct data: karma::grammar<Iterator, ast::data() >
{
data():data::base_type(start)
{
start =
karma::string[karma::_1 = boost::phoenix::at_c<0>(karma::_val)] <<
karma::lit("some stuff") <<
karma::string[karma::_1 = boost::phoenix::at_c<0>(karma::_val)]
;
}
karma::rule<Iterator, ast::data()> start;
};
}
main()
{
ast::data d;
d.str = "toto";
std::string generated;
typedef std::back_insert_iterator<std::string> iterator_type;
parser::data<iterator_type> d_p;
iterator_type sink(generated);
karma::generate(sink, d_p, d);
std::cout << generated << std::endl;
}
This should do the trick:
start = karma::string[karma::_1 = karma::_val]
<< karma::lit("some stuff")
<< karma::string[karma::_1 = karma::_val];

Generating default value when none is found

I have an input vector that can have any size between empty and 3 elements. I want the generated string to always be 3 floats separated by spaces, where a default value is used if there aren't enough elements in the vector. So far I've managed to output only the contents of the vector:
#include <iostream>
#include <iterator>
#include <vector>
#include "boost/spirit/include/karma.hpp"
namespace karma = boost::spirit::karma;
namespace phx = boost::phoenix;
typedef std::back_insert_iterator<std::string> BackInsertIt;
int main( int argc, char* argv[] )
{
std::vector<float> input;
input.push_back(1.0f);
input.push_back(2.0f);
struct TestGram
: karma::grammar<BackInsertIt, std::vector<float>(), karma::space_type>
{
TestGram() : TestGram::base_type(output)
{
using namespace karma;
floatRule = double_;
output = repeat(3)[ floatRule ];
}
karma::rule<BackInsertIt, std::vector<float>(), karma::space_type> output;
karma::rule<BackInsertIt, float(), karma::space_type> floatRule;
} testGram;
std::string output;
BackInsertIt sink(output);
karma::generate_delimited( sink, testGram, karma::space, input );
std::cout << "Generated: " << output << std::endl;
std::cout << "Press enter to exit" << std::endl;
std::cin.get();
return 0;
}
I've tried modifying the float rule to something like this: floatRule = double_ | lit(0.0f), but that only gave me compilation errors. The same for a lot of other similar stuff I tried.
I really have no idea how to get this working. Some help would be great :)
EDIT: Just to make it clear. If I have a vector containing 2 elements: 1.0 and 2.0, I want to generate a string that looks like this: "1.0 2.0 0.0" (the last value should be the default value).
Not pretty, but working:
#include <iostream>
#include <iterator>
#include <vector>
#define BOOST_SPIRIT_USE_PHOENIX_V3
#include "boost/spirit/include/karma.hpp"
#include <boost/spirit/include/phoenix.hpp>
namespace karma = boost::spirit::karma;
namespace phx = boost::phoenix;
typedef std::back_insert_iterator<std::string> BackInsertIt;
int main(int argc, char* argv[]) {
std::vector<float> input;
input.push_back(1.0f);
input.push_back(2.0f);
struct TestGram: karma::grammar<BackInsertIt, std::vector<float>(),
karma::space_type> {
TestGram()
: TestGram::base_type(output) {
using namespace karma;
floatRule = double_;
output = repeat(phx::bind(&std::vector<float>::size, (karma::_val)))[floatRule]
<< repeat(3 - phx::bind(&std::vector<float>::size, (karma::_val)))[karma::lit("0.0")];
}
karma::rule<BackInsertIt, std::vector<float>(), karma::space_type> output;
karma::rule<BackInsertIt, float(), karma::space_type> floatRule;
} testGram;
std::string output;
BackInsertIt sink(output);
karma::generate_delimited(sink, testGram, karma::space, input);
std::cout << "Generated: " << output << std::endl;
return 0;
}
Big warning:
The code shown is flawed, either due to a bug, or due to abuse of karma attribute propagation (see the comment).
It invokes Undefined Behaviour (presumably) dereferencing the end() iterator on the input vector.
This should work
floatRule = double_ | "0.0";
output = -floatRule << -floatRule << -floatRule;
Note, floatRule should accept an optional<float> instead. See it Live on Coliru
Minimal example:
#include "boost/spirit/include/karma.hpp"
namespace karma = boost::spirit::karma;
using It = boost::spirit::ostream_iterator;
int main( int argc, char* argv[] )
{
const std::vector<float> input { 1.0f, 2.0f };
using namespace karma;
rule<It, boost::optional<float>()> floatRule = double_ | "0.0";
rule<It, std::vector<float>(), space_type> output = -floatRule << -floatRule << -floatRule;
std::cout << format_delimited(output, space, input);
}

unhandled exception using Boost Spirit to parse grammar

I am trying to use Boost Spirit to parse the following grammar:
sentence:
noun verb
sentence conjunction sentence
conjunction:
"and"
noun:
"birds"
"cats"
verb:
"fly"
"meow"
parsing succeeds when the grammar only includes noun >> verb rule.
when grammar is modified to include sentence>>conjunction>>sentence rule and i supply an invalid input such as "birds fly" instead of "birdsfly" i get an unhandled exception when the program runs.
here is the code which is modified from examples found on boost doc
#define BOOST_VARIANT_MINIMIZE_SIZE
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_statement.hpp>
#include <boost/spirit/include/phoenix_container.hpp>
#include <iostream>
#include <string>
using namespace boost::spirit;
using namespace boost::spirit::ascii;
template <typename Lexer>
struct token_list : lex::lexer<Lexer>
{
token_list()
{
noun = "birds|cats";
verb = "fly|meow";
conjunction = "and";
this->self.add
(noun)
(verb)
(conjunction)
;
}
lex::token_def<std::string> noun, verb, conjunction;
};
template <typename Iterator>
struct Grammar : qi::grammar<Iterator>
{
template <typename TokenDef>
Grammar(TokenDef const& tok)
: Grammar::base_type(sentence)
{
sentence = (tok.noun>>tok.verb)
|
(sentence>>tok.conjunction>>sentence)>>eoi
;
}
qi::rule<Iterator> sentence;
};
int main()
{
typedef lex::lexertl::token<char const*, boost::mpl::vector<std::string>> token_type;
typedef lex::lexertl::lexer<token_type> lexer_type;
typedef token_list<lexer_type>::iterator_type iterator_type;
token_list<lexer_type> word_count;
Grammar<iterator_type> g (word_count);
std::string str = "birdsfly";
//std::string str = "birds fly"; this input caused unhandled exception
char const* first = str.c_str();
char const* last = &first[str.size()];
bool r = lex::tokenize_and_parse(first, last, word_count, g);
if (r) {
std::cout << "Parsing passed"<< "\n";
}
else {
std::string rest(first, last);
std::cerr << "Parsing failed\n" << "stopped at: \""
<< rest << "\"\n";
}
system("PAUSE");
return 0;
}
You have left-recursion in the second branch of the sentence rule.
sentence = sentence >> ....
will always recurse on sentence, so you're seeing a stackoverflow.
I suggest writing the rule like, e.g:
sentence =
(tok.noun >> tok.verb)
>> *(tok.conjunction >> sentence)
>> qi::eoi
;
Now the result reads
g++ -Wall -pedantic -std=c++0x -g -O0 test.cpp -o test
Parsing failed
stopped at: " fly"
(and the inevitable "sh: PAUSE: command not found" of course...)
PS. Don't using namespace please. Instead:
namespace qi = boost::spirit::qi;
namespace lex = boost::spirit::lex;
Here's a cleaned up version with some other stuff removed/fixed: http://coliru.stacked-crooked.com/view?id=1fb26ca3e8c207979eaaf4592c319316-e223fd4a885a77b520bbfe69dda8fb91
#define BOOST_VARIANT_MINIMIZE_SIZE
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
// #include <boost/spirit/include/phoenix.hpp>
#include <iostream>
#include <string>
namespace qi = boost::spirit::qi;
namespace lex = boost::spirit::lex;
template <typename Lexer>
struct token_list : lex::lexer<Lexer>
{
token_list()
{
noun = "birds|cats";
verb = "fly|meow";
conjunction = "and";
this->self.add
(noun)
(verb)
(conjunction)
;
}
lex::token_def<std::string> noun, verb, conjunction;
};
template <typename Iterator>
struct Grammar : qi::grammar<Iterator>
{
template <typename TokenDef>
Grammar(TokenDef const& tok) : Grammar::base_type(sentence)
{
sentence =
(tok.noun >> tok.verb)
>> *(tok.conjunction >> sentence)
>> qi::eoi
;
}
qi::rule<Iterator> sentence;
};
int main()
{
typedef std::string::const_iterator It;
typedef lex::lexertl::token<It, boost::mpl::vector<std::string>> token_type;
typedef lex::lexertl::lexer<token_type> lexer_type;
typedef token_list<lexer_type>::iterator_type iterator_type;
token_list<lexer_type> word_count;
Grammar<iterator_type> g(word_count);
//std::string str = "birdsfly";
const std::string str = "birds fly";
It first = str.begin();
It last = str.end();
bool r = lex::tokenize_and_parse(first, last, word_count, g);
if (r) {
std::cout << "Parsing passed"<< "\n";
}
else {
std::string rest(first, last);
std::cerr << "Parsing failed\n" << "stopped at: \"" << rest << "\"\n";
}
}

boost spirit semantic actions and fusion data types

I have a rule which is supposed to return a Boost.Fusion ASSOC_STRUCT. I am trying to assign to _val the results parsed by the rule's parsers, but I cannot make it work. I will skip the talking and present you directly with the relevant code.
#include <boost/fusion/include/define_assoc_struct.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/fusion/include/as_vector.hpp>
#include <boost/spirit/include/qi_symbols.hpp>
#include <iostream>
#include <string>
namespace keys {
struct actor_key;
struct action_key;
struct money_key;
}
BOOST_FUSION_DEFINE_ASSOC_STRUCT(
(), ActionLine,
(int, action, keys::action_key)
(int, amount, keys::money_key)
(int, actor, keys::actor_key)
)
int main ()
{
namespace qi = boost::spirit::qi;
using qi::_1;
using qi::_2;
using qi::_a;
using qi::int_;
using qi::lit;
using boost::spirit::_val;
using boost::fusion::as_vector;
qi::symbols<char, int> name_parser_;
name_parser_.add("name4", 4);
std::string input("string1 ($7) string2 name4");
std::string::const_iterator f(input.begin()), l(input.end());
ActionLine expected{0, 7, 4}, result;
//The following rule is supposed to parse input of the form
//"string1 ($[integer]) string2 [name]"
//and return a triple (ActionLine) with the values { 0, value of (int_), value of (name_parser_) }
qi::rule<std::string::const_iterator, ActionLine()> action_line_ =
lit("string1 ($") >> int_ >> lit(") string2 ") >> name_parser_
// [ _val = ActionLine{0, _1, _2} ]; // this is what I am trying to achieve
[ _val = ActionLine{0, 7, 4} ]; //this compiles, but of course is not what I need
bool b =
qi::parse(f, l, action_line_, result) &&
as_vector(result) == as_vector(expected);
std::cout << "test: " << std::boolalpha << b << std::endl;
return 0;
}
(Compile with g++ above_file.cpp -std=c++0x)
Compiler error is somewhat different in my real application than in this example, but it is something like (in the line of _val = ActionLine{0, _1, _2} ):
No matching function of call ::ActionLine::ActionLine(), and I guess it cannot convert _1 and _2 to ints.
I also tried to add local int variables and use them to copy the parsed values, but it did not work, not did using boost::phoenix::at(_1,0), boost::phoenix::at(_1,1) ( I found those ideas here boost spirit semantic action parameters)
You need to use phoenix::construct in your semantic action to do what you want.
#include <boost/fusion/include/define_assoc_struct.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_object.hpp> //CHANGE:you need to include this to use "phoenix::construct"
#include <boost/fusion/include/as_vector.hpp>
#include <boost/spirit/include/qi_symbols.hpp>
#include <iostream>
#include <string>
namespace keys {
struct actor_key;
struct action_key;
struct money_key;
}
BOOST_FUSION_DEFINE_ASSOC_STRUCT(
(), ActionLine,
(int, action, keys::action_key)
(int, amount, keys::money_key)
(int, actor, keys::actor_key)
)
int main ()
{
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;
using qi::_1;
using qi::_2;
using qi::_a;
using qi::int_;
using qi::lit;
using boost::spirit::_val;
using boost::fusion::as_vector;
qi::symbols<char, int> name_parser_;
name_parser_.add("name4", 4);
std::string input("string1 ($7) string2 name4");
std::string::const_iterator f(input.begin()), l(input.end());
ActionLine expected{0, 7, 4}, result;
//The following rule is supposed to parse input of the form
//"string1 ($[integer]) string2 [name]"
//and return a triple (ActionLine) with the values { 0, value of (int_), value of (name_parser_) }
qi::rule<std::string::const_iterator, ActionLine()> action_line_ =
(lit("string1 ($") >> int_ >> lit(") string2 ") >> name_parser_) //CHANGE:the parentheses are important otherwise the semantic action would be attached to name_parser_
[ _val = phx::construct<ActionLine>(0, _1, _2) ]; //CHANGE: you need to use phoenix to use the placeholders _1, _2, etc
bool b =
qi::parse(f, l, action_line_, result) &&
as_vector(result) == as_vector(expected);
std::cout << "test: " << std::boolalpha << b << std::endl;
std::cout << at_key<keys::action_key>(result)<< ", " << at_key<keys::money_key>(result)<< ", " << at_key<keys::actor_key>(result) << std::endl;
return 0;
}