I have a file containing data on the form:
fractal mand1 {
;lkkj;kj;
}
fractal mand2 {
if (...) {
blablah;
}
}
fractal julia1 {
a = ss;
}
I want to extract the name of data containers, so I want to retrieve a vector containing in the specific case mand1, mand2, julia1.
I've read the sample about parsing a number list into a vector, but I want to maintain the grammar in a separate file.
I've create a struct representing the grammar, and then I use it in order to parse the string containing data. I would expect an output like
mand1
mand2
julia1
Instead I obtain
mand1 {
;lkkj;kj;
}
fractal mand2 {
if (...) {
blablah;
}
}
fractal julia1 {
a = ss;
}
My parser recognizes the first fractal term but then it parses the rest of the file as single string item instead that parse it as I want.
What I'm doing wrong?
#include <boost/spirit/include/qi.hpp>
#include <string>
#include <vector>
#include <iostream>
using boost::spirit::ascii::space;
using boost::spirit::ascii::space_type;
using boost::spirit::qi::phrase_parse;
using boost::spirit::qi::lit;
using boost::spirit::qi::lexeme;
using boost::spirit::qi::skip;
using boost::spirit::ascii::char_;
using boost::spirit::ascii::no_case;
using boost::spirit::qi::rule;
typedef std::string::const_iterator sit;
template <typename Iterator>
struct FractalListParser : boost::spirit::qi::grammar<Iterator, std::vector<std::string>(), boost::spirit::ascii::space_type> {
FractalListParser() : FractalListParser::base_type(start) {
no_quoted_string %= *(lexeme[+(char_ - '"')]);
start %= *(no_case[lit("fractal")] >> no_quoted_string >> '{' >> *(skip[*(char_)]) >> '}');
}
rule<Iterator, std::string(), space_type> no_quoted_string;
rule<Iterator, std::vector<std::string>(), space_type> start;
};
int main() {
const std::string fractalListFile(R"(
fractal mand1 {
;lkkj;kj;
}
fractal mand2 {
if (...) {
blablah;
}
}
fractal julia1 {
a = ss;
}
)");
std::cout << "Read Test:" << std::endl;
FractalListParser<sit> parser;
std::vector<std::string> data;
bool r = phrase_parse(fractalListFile.begin(), fractalListFile.end(), parser, space, data);
for (auto& i : data) std::cout << i << std::endl;
return 0;
}
If you use error handling, you'll find that the parse failed, and nothing got effectively parsed:
Live On Coliru
Output:
Read Test:
Parse success:
----
mand1 {
;lkkj;kj;
}
fractal mand2 {
if (...) {
blablah;
}
}
fractal julia1 {
a = ss;
}
Remaining unparsed input: 'fractal mand1 {
;lkkj;kj;
}
fractal mand2 {
if (...) {
blablah;
}
}
fractal julia1 {
a = ss;
}
'
What was the problem?
You probably want to ignore the "body" (between {}). Therefore I suppose you actually wanted to omit the attribute:
>> '{' >> *(omit[*(char_)]) >> '}'
rather than skip(*char_).
The expression *char_ is greedy, and will always match to the end of input... You probably wanted to limit the charset:
in the "name" *~char_("\"{") to avoid "eating" all of the body as well. To avoid matching spaces use graph (e.g. +graph - '"'). In case you want to parse "identifiers" be explicit e.g.
alpha > *(alnum | char_('_'))
in the body *~char_('}') or *(char_ - '}') (the latter being less efficient).
The nesting of optional quantifiers is not productive:
*(omit[*(char_)])
Will just have very slow worst-case runtime (because *char_ could be empty, and *(omit[*(char_)]) could also be empty). Say what you mean instead:
omit[*char_]
The simplest way to have a lexeme is to drop the skipper from the rule declaration (see also Boost spirit skipper issues)
Program logic:
Since your sample contains nested blocks (mand2 for example), you need to treat the blocks recursively in order to avoid calling the first } the end of the outer block:
block = '{' >> -block % (+~char_("{}")) >> '}';
Loose hints:
use BOOST_SPIRIT_DEBUG to find out where parsing is rejected/matched. E.g. after refactoring the rules a bit:
we got the output (On Coliru):
Read Test:
<start>
<try>fractal mand1 {\n </try>
<no_quoted_string>
<try>mand1 {\n ;lkkj;kj</try>
<success> {\n ;lkkj;kj;\n}\n\n</success>
<attributes>[[m, a, n, d, 1]]</attributes>
</no_quoted_string>
<body>
<try>{\n ;lkkj;kj;\n}\n\nf</try>
<fail/>
</body>
<success>fractal mand1 {\n </success>
<attributes>[[]]</attributes>
</start>
Parse success:
Remaining unparsed input: 'fractal mand1 {
;lkkj;kj;
}
fractal mand2 {
if (...) {
blablah;
}
}
fractal julia1 {
a = ss;
}
'
That output helped me spot that I actually forgot the - '}' part in the body rule... :)
No need for %= when there are no semantic actions involved in that rule definition (docs)
you probably want to make sure fractal is actually a separate word, so you don't match fractalset multi { .... }
Demo Program
With these in place we can have a working demo:
Live On Coliru
//#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <iostream>
namespace qi = boost::spirit::qi;
template <typename Iterator>
struct FractalListParser : qi::grammar<Iterator, std::vector<std::string>(), qi::space_type> {
FractalListParser() : FractalListParser::base_type(start) {
using namespace qi;
identifier = alpha > *(alnum | char_('_'));
block = '{' >> -block % +~char_("{}") >> '}';
start = *(
no_case["fractal"] >> identifier >> block
);
BOOST_SPIRIT_DEBUG_NODES((start)(block)(identifier))
}
qi::rule<Iterator, std::vector<std::string>(), qi::space_type> start;
// lexemes (just drop the skipper)
qi::rule<Iterator, std::string()> identifier;
qi::rule<Iterator> block; // leaving out the attribute means implicit `omit[]`
};
int main() {
using It = boost::spirit::istream_iterator;
It f(std::cin >> std::noskipws), l;
std::cout << "Read Test:" << std::endl;
FractalListParser<It> parser;
std::vector<std::string> data;
bool r = qi::phrase_parse(f, l, parser, qi::space, data);
if (r) {
std::cout << "Parse success:\n";
for (auto& i : data)
std::cout << "----\n" << i << "\n";
} else {
std::cout << "Parse failed\n";
}
if (f != l)
std::cout << "Remaining unparsed input: '" << std::string(f,l) << "'\n";
}
Prints:
Read Test:
Parse success:
----
mand1
----
mand2
----
julia1
Related
I'm trying to create a (pretty simple) parser using boost::spirit::qi to extract messages from a stream. Each message starts from a short marker and ends with \r\n. The message body is ASCII text (letters and numbers) separated by a comma. For example:
!START,01,2.3,ABC\r\n
!START,456.2,890\r\n
I'm using unit tests to check the parser and everything works well when I pass only correct messages one by one. But when I try to emulate some invalid input, like:
!START,01,2.3,ABC\r\n
trash-message
!START,456.2,890\r\n
The parser doesn't see the following messages after an unexpected text.
I'm new in boost::spirit and I'd like to know how a parser based on boost::spirit::qi::grammar is supposed to work.
My question is:
Should the parser slide in the input stream and try to find a beginning of a message?
Or the caller should check the parsing result and in case of failure move an iterator and then recall the parser again?
Many thanks for considering my request.
My question is: Should the parser slide in the input stream and try to find a beginning of a message?
Only when you tell it to. It's called qi::parse, not qi::search. But obviously you can make a grammar ignore things.
Live On Coliru
//#define BOOST_SPIRIT_DEBUG
#include <boost/fusion/adapted.hpp>
#include <boost/spirit/include/qi.hpp>
#include <iomanip>
#include <iostream>
namespace qi = boost::spirit::qi;
struct Command {
enum Type { START, QUIT, TRASH } type = TRASH;
std::vector<std::string> args;
};
using Commands = std::vector<Command>;
BOOST_FUSION_ADAPT_STRUCT(Command, type, args)
template <typename It> struct CmdParser : qi::grammar<It, Commands()> {
CmdParser() : CmdParser::base_type(commands_) {
type_.add("!start", Command::START);
type_.add("!quit", Command::QUIT);
trash_ = *~qi::char_("\r\n"); // just ignore the entire line
arg_ = *~qi::char_(",\r\n");
command_ = qi::no_case[type_] >> *(',' >> arg_);
commands_ = *((command_ | trash_) >> +qi::eol);
BOOST_SPIRIT_DEBUG_NODES((trash_)(arg_)(command_)(commands_))
}
private:
qi::symbols<char, Command::Type> type_;
qi::rule<It, Commands()> commands_;
qi::rule<It, Command()> command_;
qi::rule<It, std::string()> arg_;
qi::rule<It> trash_;
};
int main() {
std::string_view input = "!START,01,2.3,ABC\r\n"
"trash-message\r\n"
"!START,456.2,890\r\n";
using It = std::string_view::const_iterator;
static CmdParser<It> const parser;
Commands parsed;
auto f = input.begin(), l = input.end();
if (parse(f, l, parser, parsed)) {
std::cout << "Parsed:\n";
for(Command const& cmd : parsed) {
std::cout << cmd.type;
for (auto& arg: cmd.args)
std::cout << ", " << quoted(arg);
std::cout << "\n";
}
} else {
std::cout << "Parse failed\n";
}
if (f != l)
std::cout << "Remaining unparsed: " << quoted(std::string(f, l)) << "\n";
}
Printing
Parsed:
0, "01", "2.3", "ABC"
2
0, "456.2", "890"
I have a file which contains some "entity" data in Valve's format. It's basically a key-value deal, and it looks like this:
{
"world_maxs" "3432 4096 822"
"world_mins" "-2408 -4096 -571"
"skyname" "sky_alpinestorm_01"
"maxpropscreenwidth" "-1"
"detailvbsp" "detail_sawmill.vbsp"
"detailmaterial" "detail/detailsprites_sawmill"
"classname" "worldspawn"
"mapversion" "1371"
"hammerid" "1"
}
{
"origin" "553 -441 322"
"targetname" "tonemap_global"
"classname" "env_tonemap_controller"
"hammerid" "90580"
}
Each pair of {} counts as one entity, and the rows inside count as KeyValues. As you can see, it's fairly straightforward.
I want to process this data into a vector<map<string, string> > in C++. To do this, I've tried using regular expressions that come with Boost. Here is what I have so far:
static const boost::regex entityRegex("\\{(\\s*\"([A-Za-z0-9_]+)\"\\s*\"([^\"]+)\")+\\s*\\}");
boost::smatch what;
while (regex_search(entitiesString, what, entityRegex)) {
cout << what[0] << endl;
cout << what[1] << endl;
cout << what[2] << endl;
cout << what[3] << endl;
break; // TODO
}
Easier-to-read regex:
\{(\s*"([A-Za-z0-9_]+)"\s*"([^"]+)")+\s*\}
I'm not sure the regex is well-formed for my problem yet, but it seems to print the last key-value pair (hammerid, 1) at least.
My question is, how would I go about extracting the "nth" matched subexpression within an expression? Or is there not really a practical way to do this? Would it perhaps be better to write two nested while-loops, one which searches for the {} patterns, and then one which searches for the actual key-value pairs?
Thanks!
Using a parser generator you can code a proper parser.
For example, using Boost Spirit you can define the rules of the grammar inline as C++ expressions:
start = *entity;
entity = '{' >> *entry >> '}';
entry = text >> text;
text = '"' >> *~char_('"') >> '"';
Here's a full demo:
Live On Coliru
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/adapted/std_pair.hpp>
#include <map>
using Entity = std::map<std::string, std::string>;
using ValveData = std::vector<Entity>;
namespace qi = boost::spirit::qi;
template <typename It, typename Skipper = qi::space_type>
struct Grammar : qi::grammar<It, ValveData(), Skipper>
{
Grammar() : Grammar::base_type(start) {
using namespace qi;
start = *entity;
entity = '{' >> *entry >> '}';
entry = text >> text;
text = '"' >> *~char_('"') >> '"';
BOOST_SPIRIT_DEBUG_NODES((start)(entity)(entry)(text))
}
private:
qi::rule<It, ValveData(), Skipper> start;
qi::rule<It, Entity(), Skipper> entity;
qi::rule<It, std::pair<std::string, std::string>(), Skipper> entry;
qi::rule<It, std::string()> text;
};
int main()
{
using It = boost::spirit::istream_iterator;
Grammar<It> parser;
It f(std::cin >> std::noskipws), l;
ValveData data;
bool ok = qi::phrase_parse(f, l, parser, qi::space, data);
if (ok) {
std::cout << "Parsing success:\n";
int count = 0;
for(auto& entity : data)
{
++count;
for (auto& entry : entity)
std::cout << "Entity " << count << ": [" << entry.first << "] -> [" << entry.second << "]\n";
}
} else {
std::cout << "Parsing failed\n";
}
if (f!=l)
std::cout << "Remaining unparsed input: '" << std::string(f,l) << "'\n";
}
Which prints (for the input shown):
Parsing success:
Entity 1: [classname] -> [worldspawn]
Entity 1: [detailmaterial] -> [detail/detailsprites_sawmill]
Entity 1: [detailvbsp] -> [detail_sawmill.vbsp]
Entity 1: [hammerid] -> [1]
Entity 1: [mapversion] -> [1371]
Entity 1: [maxpropscreenwidth] -> [-1]
Entity 1: [skyname] -> [sky_alpinestorm_01]
Entity 1: [world_maxs] -> [3432 4096 822]
Entity 1: [world_mins] -> [-2408 -4096 -571]
Entity 2: [classname] -> [env_tonemap_controller]
Entity 2: [hammerid] -> [90580]
Entity 2: [origin] -> [553 -441 322]
Entity 2: [targetname] -> [tonemap_global]
I think doing it all with one regex expression is hard because of the variable number of entries inside each entity {}. Personally I would consider using simply std::readline to do your parsing.
#include <map>
#include <vector>
#include <string>
#include <sstream>
#include <iostream>
std::istringstream iss(R"~(
{
"world_maxs" "3432 4096 822"
"world_mins" "-2408 -4096 -571"
"skyname" "sky_alpinestorm_01"
"maxpropscreenwidth" "-1"
"detailvbsp" "detail_sawmill.vbsp"
"detailmaterial" "detail/detailsprites_sawmill"
"classname" "worldspawn"
"mapversion" "1371"
"hammerid" "1"
}
{
"origin" "553 -441 322"
"targetname" "tonemap_global"
"classname" "env_tonemap_controller"
"hammerid" "90580"
}
)~");
int main()
{
std::string skip;
std::string entity;
std::vector<std::map<std::string, std::string> > vm;
// skip to open brace, read entity until close brace
while(std::getline(iss, skip, '{') && std::getline(iss, entity, '}'))
{
// turn entity into input stream
std::istringstream iss(entity);
// temporary map
std::map<std::string, std::string> m;
std::string key, val;
// skip to open quote, read key to close quote
while(std::getline(iss, skip, '"') && std::getline(iss, key, '"'))
{
// skip to open quote read val to close quote
if(std::getline(iss, skip, '"') && std::getline(iss, val, '"'))
m[key] = val;
}
// move map (no longer needed)
vm.push_back(std::move(m));
}
for(auto& m: vm)
{
for(auto& p: m)
std::cout << p.first << ": " << p.second << '\n';
std::cout << '\n';
}
}
Output:
classname: worldspawn
detailmaterial: detail/detailsprites_sawmill
detailvbsp: detail_sawmill.vbsp
hammerid: 1
mapversion: 1371
maxpropscreenwidth: -1
skyname: sky_alpinestorm_01
world_maxs: 3432 4096 822
world_mins: -2408 -4096 -571
classname: env_tonemap_controller
hammerid: 90580
origin: 553 -441 322
targetname: tonemap_global
I would have written it like this:
^\{(\s*"([A-Za-z0-9_]+)"\s*"([^"]+)")+\s*\}$
Or splited the regex into two strings. First match the curly braces, then loop through the content of curly braces line for line.
Match curly braces: ^(\{[^\}]+)$
Match the lines: ^(\s*"([A-Za-z0-9_]+)"\s*"([^"]+)"\s*)$
I have a working boost spirit parser and was thinking if it is possible to do iterative update of an abstract syntax tree with boost spirit?
I have a struct similar to:
struct ast;
typedef boost::variant< boost::recursive_wrapper<ast> > node;
struct ast
{
std::vector<int> value;
std::vector<node> children;
};
Which is being parsed by use of:
bool r = phrase_parse(begin, end, grammar, space, ast);
Would it be possible to do iterative update of abstract syntax tree with boost spirit? I have not found any documentation on this, but I was thinking if the parsers semantic actions could push_back on an already existing AST. Has anyone tried this?
This would allow for parsing like this:
bool r = phrase_parse(begin, end, grammar, space, ast); //initial parsing
//the second parse will be called at a later state given some event/timer/io/something
bool r = phrase_parse(begin, end, grammar, space, ast); //additional parsing which will update the already existing AST
How would you know which nodes to merge? Or would you always add ("graft") at the root level? In that case, why don't you just parse another and merge moving the elements into the existing ast?
ast& operator+=(ast&& other) {
std::move(other.value.begin(), other.value.end(), back_inserter(value));
std::move(other.children.begin(), other.children.end(), back_inserter(children));
return *this;
}
Demo Time
Let's devise the simplest grammar I can think of for this AST:
start = '{' >> -(int_ % ',') >> ';' >> -(start % ',') >> '}';
Note I didn't even make the ; optional. Oh well. Samples. Exercises for readers. ☡ You know the drill.
We implement the trivial function ast parse(It f, It l), and then we can simply merge the asts:
int main() {
ast merged;
for(std::string const& input : {
"{1 ,2 ,3 ;{4 ;{9 , 8 ;}},{5 ,6 ;}}",
"{10,20,30;{40;{90, 80;}},{50,60;}}",
})
{
merged += parse(input.begin(), input.end());
std::cout << "merged + " << input << " --> " << merged << "\n";
}
}
Live On Coliru
//#define BOOST_SPIRIT_DEBUG
#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/karma.hpp>
namespace qi = boost::spirit::qi;
namespace karma = boost::spirit::karma;
struct ast;
//typedef boost::make_recursive_variant<boost::recursive_wrapper<ast> >::type node;
typedef boost::variant<boost::recursive_wrapper<ast> > node;
struct ast {
std::vector<int> value;
std::vector<node> children;
ast& operator+=(ast&& other) {
std::move(other.value.begin(), other.value.end(), back_inserter(value));
std::move(other.children.begin(), other.children.end(), back_inserter(children));
return *this;
}
};
BOOST_FUSION_ADAPT_STRUCT(ast,
(std::vector<int>,value)
(std::vector<node>,children)
)
template <typename It, typename Skipper = qi::space_type>
struct grammar : qi::grammar<It, ast(), Skipper>
{
grammar() : grammar::base_type(start) {
using namespace qi;
start = '{' >> -(int_ % ',') >> ';' >> -(start % ',') >> '}';
BOOST_SPIRIT_DEBUG_NODES((start));
}
private:
qi::rule<It, ast(), Skipper> start;
};
// for output:
static inline std::ostream& operator<<(std::ostream& os, ast const& v) {
using namespace karma;
rule<boost::spirit::ostream_iterator, ast()> r;
r = '{' << -(int_ % ',') << ';' << -((r|eps) % ',') << '}';
return os << format(r, v);
}
template <typename It> ast parse(It f, It l)
{
ast parsed;
static grammar<It> g;
bool ok = qi::phrase_parse(f,l,g,qi::space,parsed);
if (!ok || (f!=l)) {
std::cout << "Parse failure\n";
std::cout << "Remaining unparsed: '" << std::string(f,l) << "'\n";
exit(255);
}
return parsed;
}
int main() {
ast merged;
for(std::string const& input : {
"{1 ,2 ,3 ;{4 ;{9 , 8 ;}},{5 ,6 ;}}",
"{10,20,30;{40;{90, 80;}},{50,60;}}",
})
{
merged += parse(input.begin(), input.end());
std::cout << "merged + " << input << " --> " << merged << "\n";
}
}
Of course, it prints:
merged + {1 ,2 ,3 ;{4 ;{9 , 8 ;}},{5 ,6 ;}} --> {1,2,3;{4;{9,8;}},{5,6;}}
merged + {10,20,30;{40;{90, 80;}},{50,60;}} --> {1,2,3,10,20,30;{4;{9,8;}},{5,6;},{40;{90,80;}},{50,60;}}
UPDATE
In this - trivial - example, you can just bind the collections to the attributes in the parse call. The same thing will happen without the operator+= call needed to move the elements, because the rules are written to automatically append to the bound container attribute.
CAVEAT: A distinct disadvantage of modifying the target value in-place is what happens if parsing fails. In the version the merged value will then be "undefined" (has received partial information from the failed parse).
So if you want to parse inputs "atomically", the first, more explicit approach is a better fit.
So the following is a slightly shorter way to write the same:
Live On Coliru
// #define BOOST_SPIRIT_DEBUG
#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/karma.hpp>
namespace qi = boost::spirit::qi;
namespace karma = boost::spirit::karma;
struct ast;
//typedef boost::make_recursive_variant<boost::recursive_wrapper<ast> >::type node;
typedef boost::variant<boost::recursive_wrapper<ast> > node;
struct ast {
std::vector<int> value;
std::vector<node> children;
};
BOOST_FUSION_ADAPT_STRUCT(ast,
(std::vector<int>,value)
(std::vector<node>,children)
)
template <typename It, typename Skipper = qi::space_type>
struct grammar : qi::grammar<It, ast(), Skipper>
{
grammar() : grammar::base_type(start) {
using namespace qi;
start = '{' >> -(int_ % ',') >> ';' >> -(start % ',') >> '}';
BOOST_SPIRIT_DEBUG_NODES((start));
}
private:
qi::rule<It, ast(), Skipper> start;
};
// for output:
static inline std::ostream& operator<<(std::ostream& os, ast const& v) {
using namespace karma;
rule<boost::spirit::ostream_iterator, ast()> r;
r = '{' << -(int_ % ',') << ';' << -((r|eps) % ',') << '}';
return os << format(r, v);
}
template <typename It> void parse(It f, It l, ast& into)
{
static grammar<It> g;
bool ok = qi::phrase_parse(f,l,g,qi::space,into);
if (!ok || (f!=l)) {
std::cout << "Parse failure\n";
std::cout << "Remaining unparsed: '" << std::string(f,l) << "'\n";
exit(255);
}
}
int main() {
ast merged;
for(std::string const& input : {
"{1 ,2 ,3 ;{4 ;{9 , 8 ;}},{5 ,6 ;}}",
"{10,20,30;{40;{90, 80;}},{50,60;}}",
})
{
parse(input.begin(), input.end(), merged);
std::cout << "merged + " << input << " --> " << merged << "\n";
}
}
Still prints
(I'm sorry if I ask this question wrong, this is my first time I write in a forum)
When I started programming at my SFML - Game, I had a very old book, wich was very C-like (eg. recommendation of atoi();).
Now I got a new C++(including C++11) book, and I want to rewrite the old lines wih newer Code.
I saved the Tiles in a file stored like this:
[0-0,15-1|22,44] [0-1|0]
[4-0,10-1,3-1|0] [0-5,5-5|0]
That means:
[...] desribes a Tile
0-0 etc. is the xy position on the Texturesheet
22 etc. is the event that will be triggered.
the amount of events and sf::Vector2i shouldn't be set constantly.
The Tiles are separately taken out from another class, which manages the entire Tilemap.
Now my problem: I have no idea how i should push the numbers from the strinstream right in two vectores?
My code:
class Tile{
private:
std::deque<sf::Sprite> tile;
std::deque<int> event;
public:
Tile(sf::Texture& texture, std::deque<sf::Vector2i>&& ctor_texturerects, std::deque<int>&& ctor_events);//This one is working fine
Tile(sf::Texture& texture, std::stringstream&& ctor_stream/*reads the Tile*/){
std::deque<sf::Vector2i> temp_texturerects;
std::deque<int>temp_events;
/*TODO: filter the stringstream and push them into the containers*/
Tile::Tile(texture,std::move(temp_texturerect),std::move(temp_events));
}
I'd be also very happy if you could give me another solution, like changing sf::Vector2i to a better solution or giving me a better stream and class concept
Thanks in advance
Xeno Ceph
Edit:
I made a little workaround:
(I changed the inputstream to a normal string)
But the code doesn't look good
There mujst be an easier solution
Tile:: Tile(sf::Texture& texture, std::string&& ctor_string){
std::deque<sf::Vector2i> temp_texturerects;
std::deque<int> temp_events;
std::stringstream strstr;
for(int i=0; i<ctor_string.size(); ++i){
while(ctor_string[i]!='|'){
while(ctor_string[i] != ','){
strstr << ctor_string[i];
}
sf::Vector2i v2i;
strstr >> v2i.x >> v2i.y;
temp_texturerects.push_front(v2i);
strstr.str("");
}
while(ctor_string[i]!=']'){
while(ctor_string[i] != ','){
strstr << ctor_string[i];
}
int integer;
strstr >> integer;
temp_events.push_front(integer);
strstr.str("");
}
}
Tile::Tile(texture, std::move(temp_texturerects), std::move(temp_events));
}
Has anybody a better solution?
If I understand your question correctly, you have some strings of the form
[0-0,15-1|22,44] [0-1|0]
[4-0,10-1,3-1|0] [0-5,5-5|0]
and you want to extract 2 types of data - positions (e.g. 0-0) and events (e.g. 22).
Your question is how to extract this data cleanly, discarding the [ and ] characters, etc.
One great way to approach this is to use the getline function that operates on stringstreams, which inherit from std::istream (http://www.cplusplus.com/reference/string/string/getline/). It can take custom delimiters, not just the newline character. So you can use '[', '|' and ']' as different delimiting characters and parse them in a logical order.
For example, since your string is just a collection of tiles, you can split it up into a number of functions - ParseTile, ParsePositions and ParseEvents, something like the following:
void Tile::ParseInput(stringstream&& ctor_string) {
//extract input, tile by tile
while(!ctor_string.eof()) {
string tile;
//you can treat each tile as though it is on a separate line by
//specifying the ']' character as the delimiter for the "line"
getline(ctor_string, tile, ']');
tile += "]"; //add back the ']' character that was discarded from the getline
//the string "tile" should now contain a single tile [...], which we can further process using ParseTile
ParseTile(tile);
}
}
The ParseTile function:
void Tile::ParseTile(string&& tile) {
//input string tile is e.g. " [0-0, 15-1|22,44]"
string discard; //string to hold parts of tile string that should be thrown away
string positions; //string to hold list of positions, separated by ','
string events; //string to hold events, separated by ','
//create stringstream from input
stringstream tilestream(tile);
//tilestream is e.g. "[0-0,15-1|22,44]"
getline(tilestream, discard, '['); //gets everything until '['
//now, discard is " [" and tilestream is "0-0,15-1|22,44]"
getline(tilestream, positions, '|');
//now, positions is "0-0,15-1" and tilestream is "22,44]"
getline(tilestream, events,']');
//now, events is "22,44" and tilestream is empty
ParsePositions(positions);
ParseEvents(events);
}
You can write your own ParsePositions and ParseEvents functions which basically will be a more getline calls using a ',' as the delimiting character (just loop until the string ends).
I suggest either writing a proper parser manually (not unlike the other answer proposes) or to use a proper parsing framework, like Boost Spirit.
The advantages of the latter is that you get debuggability, composability, attributes etc. "for free". Here's the simplest example I could think of:
struct TileData
{
std::deque<sf::Vector2i> texturerects;
std::deque<int> events;
};
typedef std::deque<TileData> TileDatas;
template <typename It>
struct parser : qi::grammar<It, TileDatas(), qi::space_type>
{
parser() : parser::base_type(start)
{
using namespace qi;
v2i = (int_ >> '-' >> int_)
[ _val = phx::construct<sf::Vector2i>(_1, _2) ];
tiledata =
(v2i % ',') >> '|' >>
(int_ % ',');
start = *('[' >> tiledata >> ']');
}
private:
qi::rule<It, sf::Vector2i(), qi::space_type> v2i;
qi::rule<It, TileData(), qi::space_type> tiledata;
qi::rule<It, TileDatas(), qi::space_type> start;
};
Adding a bit of code to test this, see it live on http://liveworkspace.org/code/3WM0My$1, output:
Parsed: TileData {
texturerects: deque<N2sf8Vector2iE> {v2i(0, 0), v2i(15, 1), }
events: deque<i> {22, 44, }
}
Parsed: TileData {
texturerects: deque<N2sf8Vector2iE> {v2i(0, 1), }
events: deque<i> {0, }
}
Parsed: TileData {
texturerects: deque<N2sf8Vector2iE> {v2i(4, 0), v2i(10, 1), v2i(3, 1), }
events: deque<i> {0, }
}
Parsed: TileData {
texturerects: deque<N2sf8Vector2iE> {v2i(0, 5), v2i(5, 5), }
events: deque<i> {0, }
}
Full code:
#define BOOST_SPIRIT_USE_PHOENIX_V3
// #define BOOST_SPIRIT_DEBUG
#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/include/support_istream_iterator.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
namespace qi = boost::spirit::qi;
namespace phx= boost::phoenix;
// liveworkspace.org doesn't have SFML
namespace sf { struct Vector2i { int x, y; Vector2i(int ax=0, int ay=0) : x(ax), y(ay) {} }; }
struct TileData
{
std::deque<sf::Vector2i> texturerects;
std::deque<int> events;
};
BOOST_FUSION_ADAPT_STRUCT(TileData,
(std::deque<sf::Vector2i>, texturerects)
(std::deque<int>, events))
typedef std::deque<TileData> TileDatas;
template <typename It>
struct parser : qi::grammar<It, TileDatas(), qi::space_type>
{
parser() : parser::base_type(start)
{
using namespace qi;
v2i = (int_ >> '-' >> int_)
[ _val = phx::construct<sf::Vector2i>(_1, _2) ];
tiledata =
(v2i % ',') >> '|' >>
(int_ % ',');
start = *('[' >> tiledata >> ']');
}
private:
qi::rule<It, sf::Vector2i(), qi::space_type> v2i;
qi::rule<It, TileData(), qi::space_type> tiledata;
qi::rule<It, TileDatas(), qi::space_type> start;
};
typedef boost::spirit::istream_iterator It;
std::ostream& operator<<(std::ostream& os, sf::Vector2i const &v) { return os << "v2i(" << v.x << ", " << v.y << ")"; }
template <typename T> std::ostream& operator<<(std::ostream& os, std::deque<T> const &d) {
os << "deque<" << typeid(T).name() << "> {";
for (auto& t : d) os << t << ", ";
return os << "}";
}
std::ostream& operator<<(std::ostream& os, TileData const& ttd) {
return os << "TileData {\n"
"\ttexturerects: " << ttd.texturerects << "\n"
"\tevents: " << ttd.events << "\n}";
}
int main()
{
parser<It> p;
std::istringstream iss(
"[0-0,15-1|22,44] [0-1|0]\n"
"[4-0,10-1,3-1|0] [0-5,5-5|0]");
It f(iss), l;
TileDatas data;
if (qi::phrase_parse(f,l,p,qi::space,data))
{
for (auto& tile : data)
{
std::cout << "Parsed: " << tile << "\n";
}
}
if (f != l)
{
std::cout << "Remaining unparsed: '" << std::string(f, l) << "'\n";
}
}
I'm trying to define my own grammar using boost spirit framework and I'm defining such a matching rule:
value = (
char_('"') >>
(*qi::lexeme[
char_('\\') >> char_('\\') |
char_('\\') >> char_('"') |
graph - char_('"') |
char_(' ')
])[some_func] >>
char_('"')
);
I'd like to assing an action - some_func - to the part of it, and pass the whole matching string as a parameter. But unfortunately I will get something like vector<boost::variant<boost::fusion::vector2 ..a lot of stuff...)...> . Can I somehow get the whole data as a char*, std::string or even void* with size?
Look at qi::as_string:
Output of demo program:
DEBUG: 'some\\"quoted\\"string.'
parse success
To be honest, it looks like you are really trying to parse 'verbatim' strings with possible escape chars. In the respect, the use of lexeme seem wrong (the spaces get eaten). If you want to see samples of escaped string parsing, see e.g.
Boost Spirit Implement small one-line DSL on a server application (for this style)
Compiling a simple parser with Boost.Spirit (for escaping by duplication)
Parsing escaped strings with boost spirit
Parse quoted strings with boost::spirit
A simple rearrangement that I think could be made, at least might look like:
value = qi::lexeme [
char_('"') >>
qi::as_string [
*(
string("\\\\")
| string("\\\"")
| (graph | ' ') - '"'
)
] [some_func(_1)] >>
char_('"')
];
Note however that you could simply declare the rule without a skipper and drop the lexeme alltogether: http://liveworkspace.org/code/1oEhei$0
Code (live on liveworkspace)
#include <boost/fusion/adapted.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;
struct some_func_t
{
template <typename> struct result { typedef void type; };
template <typename T>
void operator()(T const& s) const
{
std::cout << "DEBUG: '" << s << "'\n";
}
};
template <typename It, typename Skipper = qi::space_type>
struct parser : qi::grammar<It, Skipper>
{
parser() : parser::base_type(value)
{
using namespace qi;
// using phx::bind; using phx::ref; using phx::val;
value = (
char_('"') >>
qi::as_string
[
(*qi::lexeme[
char_('\\') >> char_('\\') |
char_('\\') >> char_('"') |
graph - char_('"') |
char_(' ')
])
] [some_func(_1)] >>
char_('"')
);
BOOST_SPIRIT_DEBUG_NODE(value);
}
private:
qi::rule<It, Skipper> value;
phx::function<some_func_t> some_func;
};
bool doParse(const std::string& input)
{
typedef std::string::const_iterator It;
auto f(begin(input)), l(end(input));
parser<It, qi::space_type> p;
try
{
bool ok = qi::phrase_parse(f,l,p,qi::space);
if (ok)
{
std::cout << "parse success\n";
}
else std::cerr << "parse failed: '" << std::string(f,l) << "'\n";
if (f!=l) std::cerr << "trailing unparsed: '" << std::string(f,l) << "'\n";
return ok;
} catch(const qi::expectation_failure<It>& e)
{
std::string frag(e.first, e.last);
std::cerr << e.what() << "'" << frag << "'\n";
}
return false;
}
int main()
{
bool ok = doParse("\"some \\\"quoted\\\" string.\"");
return ok? 0 : 255;
}