I have 2 type of expressions that I want parse and calculate the results.
Artimetic expressions: +,-,*,/ and sqrt() function;
Ex: "2 + 3 * sqrt(100*25)" -> should be calculated as 152
Functions: GetSubString() and ConcatenateStrings()
Ex: "GetSubString('100str1', 0, 3)" -> should be calculated as 100
I have 2 seperate grammars to parse these expression types. Now I want to combine these 2 grammars and make it possible to define these expressions together.
Ex:
"GetSubString('100str1', 0, 2+1) + sqrt(9)" -> result= 103
"2 + 3 * sqrt(GetSubString('100str1', 0, 2+1))" -> result= 32
I have tried to combine 2 grammars as below by using permutation operator. But it doesnt compile.
expr_ =
( *( (function_call_ ^ arithmeticexpression_)| string_ ));
So is this a right way to combine my function_call_ and arithmeticexpression_ rules or how should I do this?
typedef boost::variant<int, float, double, std::wstring> RetValue;
RetValue CTranslationFunctions::GetSubString(RetValue const& str, RetValue position, RetValue len)
{
std::wstring strToCut;
size_t posInt = 0;
size_t lenInt = 0;
try
{
strToCut = boost::get<std::wstring>(str);
posInt = boost::get<int>(position);
lenInt = boost::get<int>(len);
}
catch (const boost::bad_get&)
{
throw;
}
return strToCut.substr(posInt, lenInt);
}
RetValue CTranslationFunctions::ConcatenateStrings(RetValue const& a, RetValue const& b)
{
wostringstream woss;
woss << a << b;
return woss.str();
}
double CTranslationFunctions::Negate(double num)
{
return -num;
}
double CTranslationFunctions::Add(double num1 , const double num2)
{
return num1 + num2;
};
double CTranslationFunctions::Subtruct(double num1 , double num2)
{
return num1 - num2;
};
double CTranslationFunctions::Multiply(double num1 , double num2)
{
return num1 * num2;
};
double CTranslationFunctions::Divide(double num1 , double num2)
{
return num1 / num2;
};
double CTranslationFunctions::Sqrt(double num)
{
return sqrt(num);
}
class InvalidParamEx{};
double CTranslationFunctions::ConvertStringToDouble(RetValue val)
{
wostringstream wss;
double dNum;
wss << val;
std::wistringstream iss;
iss.str(wss.str());
try
{
iss >> dNum;
}
catch (...)
{
throw InvalidParamEx();
}
return dNum;
}
BOOST_PHOENIX_ADAPT_FUNCTION(RetValue, ConcatenateStrings_, ConcatenateStrings, 2)
BOOST_PHOENIX_ADAPT_FUNCTION(RetValue, GetContainerId_, GetContainerId, 2)
BOOST_PHOENIX_ADAPT_FUNCTION(double, Add_, Add, 2)
BOOST_PHOENIX_ADAPT_FUNCTION(double, Subtruct_, Subtruct, 2)
BOOST_PHOENIX_ADAPT_FUNCTION(double, Multiply_, Multiply, 2)
BOOST_PHOENIX_ADAPT_FUNCTION(double, Divide_, Divide, 2)
BOOST_PHOENIX_ADAPT_FUNCTION(double, Negate_, Negate, 1)
BOOST_PHOENIX_ADAPT_FUNCTION(double, Sqrt_, Sqrt, 1)
BOOST_PHOENIX_ADAPT_FUNCTION(double, ConvertStringToDouble_, ConvertStringToDouble, 1)
// Grammar to parse map functions
template <typename It, typename Skipper = qi::space_type >
struct MapFunctionParser : qi::grammar<It, RetValue(), Skipper, qi::locals<char> >
{
MapFunctionParser() : MapFunctionParser::base_type(expr_)
{
using namespace qi;
function_call_ =
| (lit(L"GetSubString") > '(' > expr_ > ',' > expr_ > ',' > expr_ > ')')
[ _val = GetSubString_(_1, _2, _3) ]
| (lit(L"ConcatenateStrings") > '(' > expr_ > lit(',') > expr_ > ')')
[ _val = ConcatenateStrings_(_1, _2) ];
string_ = as_wstring[omit [ char_("'\"") [_a =_1] ]
>> no_skip [ *(char_ - char_(_a)) ]
>> lit(_a)];
arithmeticexpression_ =
term_ [_val = _1]
>> *( ('+' >> term_ [_val = Add_(_val,_1)])
| ('-' >> term_ [_val = Subtruct_(_val, _1)])
);
term_ =
factor_ [_val = _1]
>> *( ('*' >> factor_ [_val = Multiply_(_val, _1)])
| ('/' >> factor_ [_val = Divide_(_val, _1)])
);
factor_ =
double_ [_val = _1]
string_ [_val = ConvertStringToDouble(_1)]
| ('-' >> factor_ [_val = Negate_(_1)])
| ('+' >> factor_ [_val = _1])
| (L"Sqrt" > '(' > double_ > ')' ) [_val = Sqrt_(_1)]);
expr_ =
( *( (function_call_ ^ arithmeticexpression_)| string_ ));
on_error<fail> ( expr_, std::cout
<< phx::val("Error! Expecting ") << _4 << phx::val(" here: \"")
<< phx::construct<std::string>(_3, _2) << phx::val("\"\n"));
BOOST_SPIRIT_DEBUG_NODE(function_call_);
BOOST_SPIRIT_DEBUG_NODE(expr_);
BOOST_SPIRIT_DEBUG_NODE(string_);
BOOST_SPIRIT_DEBUG_NODE(funcparameter_);
BOOST_SPIRIT_DEBUG_NODE(arithmeticexpression_);
BOOST_SPIRIT_DEBUG_NODE(factor_);
BOOST_SPIRIT_DEBUG_NODE(term_);
}
private:
qi::rule<It, RetValue(), Skipper, qi::locals<char> > function_call_, expr_, funcparameter_;
qi::rule<It, wstring(), Skipper, qi::locals<char> > string_;
qi::rule<It, double(), Skipper> arithmeticexpression_, factor_, term_;
};
Edit Moved my early response to the bottom
BIG UPDATE
That took a while. Mostly it was because the code shown has strange problems:
several rules contain syntax errors (function_call and factor_
there is a reference to GetContainerId and GetSubstring was never Phoenix-adapted
The type CTranslationFunctions didn't exist, and member functions were being declared
however the ADAPT_FUNCTION macros still referenced the member function names as if they were supposed to be in the enclosing namespace (?!)
So what I basically ended up doing was a re-write. Yeah I know. I'm crazy. Nevertheless, let me walk you through it, explaining some of the things I changed and why.
#define BOOST_SPIRIT_USE_PHOENIX_V3
// #define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/phoenix/function/adapt_function.hpp>
#include <boost/lexical_cast.hpp>
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;
typedef boost::variant<int, double> NumValue;
typedef boost::variant<int, double, std::wstring> GenericValue;
Right away, I split the concept of numeric and generic values. This is because the distinction is important to certain expressions (mainly the arithmetic expressions). I could have still used GenericValue everywhere, but we'll see later how NumValue makes handling the arithmetic evaluations simpler.
struct InvalidParamEx : public virtual std::exception
{
const char* what() const noexcept { return "Invalid type of operand/parameter"; }
};
There's your exception type, showing some good practices. We throw it when a numeric value was expected, but the GenericValue contained something incompatible. How? Let's see:
struct AsNumValue : boost::static_visitor<NumValue>
{
int operator()(int i) const { return i; }
double operator()(double d) const { return d; }
NumValue operator()(std::wstring const& s) const
{
try { return boost::lexical_cast<int>(s); } catch(...) {}
try { return boost::lexical_cast<double>(s); } catch(...) {}
throw InvalidParamEx();
}
};
class CTranslationFunctions
{
// helper
NumValue static num(GenericValue const& v) { return boost::apply_visitor(AsNumValue(), v); }
There. I defined the missing class for you, and right away added the helper that converts GenericValue → NumValue. As you can see, I used boost::lexical_cast because there is no use in reinventing the wheel. Note that your earlier approach with ConvertStringToDouble had several big problems:
it would always result in a double value, whereas your functions may require int
it would treate '100str1' as the value 100 without warning
it happened at the wrong time: any string a simple term would be converted to double, even if it really was a string. (Why this was relevant, will become clear when you see the modified expr_ and term_ rules.)
Let's move on:
public:
static GenericValue GetSubString(GenericValue const& str, GenericValue position, GenericValue len);
static GenericValue ConcatenateStrings(GenericValue const& a, GenericValue const& b);
Yup, we'll define them later. Now, brace yourself for the arithmetic operation functions:
#define DEFUNOP(name, expr) private: struct do_##name : boost::static_visitor<NumValue> { \
template <typename T1> NumValue operator()(T1 const& a) const { return expr; } \
}; \
public: static NumValue name(GenericValue const& a) { auto na=num(a); return boost::apply_visitor(do_##name(), na); }
#define DEFBINOP(name, infix) struct do_##name : boost::static_visitor<NumValue> { \
template <typename T1, typename T2> NumValue operator()(T1 const&a, T2 const&b) const\
{ return a infix b; } \
}; \
public: static NumValue name(GenericValue const& a, GenericValue const& b) { auto na=num(a), nb=num(b); return boost::apply_visitor(do_##name(), na, nb); }
// define the operators polymorphically, so `int` + `double` becomes `double`, but `int` * `int` stays `int`
DEFBINOP(Add , +);
DEFBINOP(Subtruct, -);
DEFBINOP(Multiply, *);
DEFBINOP(Divide , /);
DEFUNOP (Negate , -a);
DEFUNOP (Sqrt , sqrt(a));
};
Whoaaaaah What happened there? Well, the comment says it all:
You needed to distinguish between int+int vs. double+int etc. This is known as polymorphic evaluation. Example: GetSubString('100str1', 0, 2+1) could never work, because 2+1 needs to evaluate to an int(3), but your double Add(double,double) always produced a double.
I've used MACROs to remove the tedious work from creating a polymorphic function object for each operator
I've let decltype detect the resultant types in mixed cases
Here's where NumValue has merit above GenericValue: because NumValue can only be int or double, we know that the generic operator() implementation covers all legal combinations.
To ensure that all parameters are actually NumValues, they are passed through asNumeric before calling the function object.
This thorughly solves your arithmetic operations, and has another bonus: it removes the 'need' for ConvertStringToDouble, since you get conversion to NumValue when it's needed, namely on evaluation of arithmetic operations. This is an important thing, down the road when we fix your grammar to support your desired input expressions.
If you've come this far, you've seen the rough parts. The rest is plain sailing.
GenericValue CTranslationFunctions::GetSubString(GenericValue const& str, GenericValue position, GenericValue len)
{
using boost::get;
return get<std::wstring>(str).substr(get<int>(position), get<int>(len));
}
Yeah, I shortened it a bit.
GenericValue CTranslationFunctions::ConcatenateStrings(GenericValue const& a, GenericValue const& b)
{
std::wostringstream woss;
woss << a << b;
return woss.str();
}
BOOST_PHOENIX_ADAPT_FUNCTION(GenericValue, ConcatenateStrings_, CTranslationFunctions::ConcatenateStrings, 2)
BOOST_PHOENIX_ADAPT_FUNCTION(GenericValue, GetSubString_ , CTranslationFunctions::GetSubString , 3)
BOOST_PHOENIX_ADAPT_FUNCTION(NumValue , Add_ , CTranslationFunctions::Add , 2)
BOOST_PHOENIX_ADAPT_FUNCTION(NumValue , Subtruct_ , CTranslationFunctions::Subtruct , 2)
BOOST_PHOENIX_ADAPT_FUNCTION(NumValue , Multiply_ , CTranslationFunctions::Multiply , 2)
BOOST_PHOENIX_ADAPT_FUNCTION(NumValue , Divide_ , CTranslationFunctions::Divide , 2)
BOOST_PHOENIX_ADAPT_FUNCTION(NumValue , Negate_ , CTranslationFunctions::Negate , 1)
BOOST_PHOENIX_ADAPT_FUNCTION(NumValue , Sqrt_ , CTranslationFunctions::Sqrt , 1)
Yawn. We know how to adapt functions for Phoenix, already; let's get to the grammar definition!
// Grammar to parse map functions
template <typename It, typename Skipper = qi::space_type >
struct MapFunctionParser : qi::grammar<It, GenericValue(), Skipper>
{
MapFunctionParser() : MapFunctionParser::base_type(expr_)
{
using namespace qi;
function_call_ =
(no_case["GetSubString"] > '(' > expr_ > ',' > expr_ > ',' > expr_ > ')') [ _val = GetSubString_(_1, _2, _3) ]
| (no_case["ConcatenateStrings"] > '(' > expr_ > ',' > expr_ > ')') [ _val = ConcatenateStrings_(_1, _2) ]
| (no_case["Sqrt"] > '(' > expr_ > ')') [ _val = Sqrt_(_1) ]
;
string_ = // keep it simple, silly (KISS)
(L'"' > *~char_('"') > L'"')
| (L"'" > *~char_("'") > L"'");
arithmeticexpression_ =
term_ [ _val = _1 ]
>> *( ('+' >> term_ [ _val = Add_(_val,_1) ])
| ('-' >> term_ [ _val = Subtruct_(_val, _1) ])
);
term_ =
factor_ [ _val = _1 ]
>> *( ('*' >> factor_ [ _val = Multiply_(_val, _1) ])
| ('/' >> factor_ [ _val = Divide_(_val, _1) ])
);
factor_ =
int_ [ _val = _1 ]
| double_ [ _val = _1 ]
| string_ [ _val = _1 ]
| ('-' >> factor_) [ _val = Negate_(_1) ]
| ('+' >> factor_) [ _val = _1 ]
| function_call_ [ _val = _1 ]
;
expr_ = arithmeticexpression_;
on_error<fail> ( expr_, std::cout
<< phx::val("Error! Expecting ") << _4 << phx::val(" here: \"")
<< phx::construct<std::string>(_3, _2) << phx::val("\"\n"));
BOOST_SPIRIT_DEBUG_NODES((function_call_) (expr_) (string_) (funcparameter_) (arithmeticexpression_) (factor_) (term_))
}
private:
qi::rule<It, std::wstring()>
string_; // NO SKIPPER (review)
qi::rule<It, GenericValue(), Skipper>
function_call_, expr_, funcparameter_, // NO LOCALS (review)
arithmeticexpression_, term_, factor_;
};
Well. What have we here. What changed?
I removed qi::locals which was only ever used in the string_ rule anyways, and I rewrote that to honour the KISS principle
I also fixed the problem with whitespace in strings (your parser would have parsed " oops " identical to "oops"). I did so by removing the Skipper from the string_ declaration. This has the same effect as enclosing the whole rule in qi::lexeme[].
I moved Sqrt to the function_call_ rule, because, well, it's a function call.
I tweaked the function names to be no_case[] case insensitive, since your examples suggested that sqrt(9) should work
Note that Sqrt now takes any expression whereas the old situation had
| (L"Sqrt" > '(' > double_ > ')') // Wait, whaaat?
Yeah, this was never going to parse your second example, really :|
Now the real meat of the operation comes. In order to let sqrt(GetSubstring(....)) parse, we'll have to let function_call_ be a possible value for a term_. Once that's the case, we don't have to anything more in expr_ since expr_ might consist of a single factor_ containing a single term_ representing a function_call_ already, so
expr_ = ( *( (function_call_ ^ arithmeticexpression_)| string_ ));
evaporates into
expr_ = arithmeticexpression_;
What happened to string_ there? Well, it's still in term_, where it was, but the ConvertStringToDouble was removed there. Strings will just happily be strings, unless they are required in the context of an arithmetic operation that requires NumValues. That's when they will be coerced into a number, and no earlier (as shown above).
int main()
{
static const MapFunctionParser<std::wstring::const_iterator> p;
std::wstring input;
while (std::getline(std::wcin, input))
{
std::wstring::const_iterator f(begin(input)), l(end(input));
GenericValue value;
assert(qi::phrase_parse(f, l, p, qi::space, value));
if (f!=l)
std::wcout << L"remaining unparsed: '" << std::wstring(f,l) << L"'\n";
std::wcout << input << " --> " << value << std::endl;
}
}
When I fed this little test program the two lines from your question, it dutifully churned out the following text:
GetSubString('100str1', 0, 2+1) + sqrt(9) --> 103
2 + 3 * sqrt(GetSubString('100str1', 0, 2+1)) --> 32
You can see the full code on Coliru (sadly, it takes too long to compile).
Originally this answer started with the following:
Q. I have tried to combine 2 grammars as below by using permutation operator. But it doesnt compile
What did you expect the permutation operator to do? The documentation states:
The permutation operator, a ^ b, matches one or more operands (a, b, ... etc.) in any order...
As you can see it would result in an attribute
boost::variant<
fusion::vector2<optional<RetValue>, optional<double>>,
std::wstring>
which clearly is not compatible. Now, I assume you just want either/or semantics, so
expr_ = string_ | function_call_ | arithmeticexpression_;
should do nicely, resulting in boost::variant<RetValue, double, std::wstring>
which is assignable to a RetValue.
Now after jumping through a dozen hoops to make your sample code compile (why...) here's a fix:
Related
My task is to parse a bracketed string, like
[foo | bar | foobar], to a vector of std::strings.
In this case,
the vector should end up with the contents {"foo" , "bar", "foobar"}.
These brackets can be nested. For example, the given bracketed string
[[john | doe] | [ bob | dylan]]
would become { "[john | doe]" , "[bob | dylan] }"
The best I could manage so far is
int main(int argc, char ** argv)
{
const std::string input {argv[1]};
std::vector<std::string> res;
qi::phrase_parse(input.cbegin(), input.cend(),
'['
>> *qi::lexeme[ +(qi::char_ - '|') >> '|']
> -qi::lexeme[ +(qi::char_ - ']') >> ']' ],
qi::space ,
res);
for (const auto& v: res)
std::cout << v <<std::endl;
return 0;
}
which fails miserably for the nested case. Can somebody please point me in the right direction?
Note #1: Nested cases can be more than one.
Note #2: I welcome any simpler solutions, even without using Boost Spirit.
Here's a simple C++ parser based on the assumption that brackets are balanced, i.e. every [ has a ] there.
bracket is the number of opening brackets. We make crucial decisions when that number is 1.
#include <iostream>
#include <vector>
#include <string>
#include <string_view>
bool edge(const int num){
return num == 1;
}
int main(){
std::vector<std::string> all;
std::string line;
// std::getline(std::cin, line);
line = "[[john | doe] | [ bob | dylan]]";
int bracket = 0;
std::string::size_type start = 0;
for(int i = 0; i < line.size(); i++){
const char c = line[i];
if(c == '['){
bracket++;
if(edge(bracket)){
start = i + 1;
}
}
if(c == ']'){
if(edge(bracket)){
all.push_back(line.substr(start, i - start));
}
bracket--;
}
if(c == '|' && edge(bracket)){
all.push_back(line.substr(start, i - start));
start = i + 1;
}
}
for(std::string_view t : all){
std::cout << t << std::endl;
}
}
If you want nested lists of strings, first you'll need a result that can store nested lists. Luckily, in C++17 you can have vectors of forward references (as long as their defined at some point). So you can make a type that is a list, where every item is either a string or another list:
struct Expr : std::vector<
boost::variant<
std::string,
Expr>>
{
using std::vector<boost::variant<std::string, Expr>>::vector;
};
After the grammar is pretty simple. Note that it's recursive - Term can have an Expr nested into it:
WORD = /[^\[\|\]]+/
Term = WORD | Expr
Expr = '[' Term ('|' Term)* ']';
You can express each rule separately. Boost Spirit Qi conveniently has the % operator, which parses a delimited list and inserts it into a container.
using It = std::string::const_iterator;
using Sk = qi::space_type;
qi::rule<It, std::string(), Sk> word;
qi::rule<It, boost::variant<std::string, Expr>(), Sk> term;
qi::rule<It, Expr(), Sk> expr;
word = +(qi::char_ - '[' - '|' - ']');
term = word | expr;
expr = '[' >> (term % '|') >> ']';
Then qi::phrase_parse will do what you want:
Expr res;
qi::phrase_parse(input.cbegin(), input.cend(), expr, qi::space, res);
Demo: https://godbolt.org/z/5W993s
This simpler version seems to be what you want:
qi::phrase_parse(input.cbegin(), input.cend(),
'['
>> qi::lexeme[ +~qi::char_("|]") ] % '|'
>> ']',
qi::space,
res);
It will parse:
"foo "
"bar "
"foobar"
Maybe you didn't actually want the spaces as part of the matches. Then it can be even simpler:
qi::phrase_parse(input.cbegin(), input.cend(),
'['
>> qi::lexeme[ +(qi::graph - qi::char_("|]")) ] % '|'
>> ']',
qi::space,
res);
See it Live On Coliru
Note: if you have C++14 consider using X3: Live On Coliru. That will be a lot faster to compile
I am trying to create an optional parser rule. Depending on the value of the first attribute, I want to optionally emits a data.
Example, for the input:
x,2,3
y,3,4
x,5,6
If the first character is a y then the line should be discarded. Otherwise it will be processed. In this example, if the 3rd attribute is >= 4 then it is true. The synthesized attribute should be std::pair<bool, unsigned int> where the unsigned int value is the second attribute.
The parser is:
using namespace qi = boost::spirit::qi;
using Data = std::pair<bool, unsigned>;
BOOST_PHOENIX_ADAPT_FUNCTION(Data, make_pair, std::make_pair, 2);
class DataParser :
public qi::grammar<
std::string::iterator,
boost::spirit::char_encoding::ascii,
boost::spirit::ascii::space_type,
std::vector<Data>()
>
{
qi::rule<iterator_type, encoding_type, bool()> type;
qi::rule<iterator_type, encoding_type, bool()> side;
// doesn't compile: qi::rule<iterator_type, encoding_type, boost::spirit::ascii::space_type, boost::optional<Data>()> line;
qi::rule<iterator_type, encoding_type, boost::spirit::ascii::space_type, qi::locals<bool, unsigned, bool>, Data()> line;
qi::rule<iterator_type, encoding_type, boost::spirit::ascii::space_type, sig_type> start;
public:
DataParser()
: base_type(start)
{
using namespace qi::labels;
type = qi::char_[_val = _1 == 'x'];
side = qi::int_[_val = _1 >= 4];
line %= (qi::omit[type[_a = _1]] >> ',' >> qi::omit[qi::uint_[_b = _1]] >> ',' >> qi::omit[side[_c = _1]])[if_(_a)[_val = make_pair(_c, _b)]];
// doesn't compile: line %= (qi::omit[type[_a = _1]] >> ',' >> qi::omit[qi::uint_[_b = _1]] >> ',' >> qi::omit[side[_c = _1]])[if_(_a)[_val = make_pair(_c, _b)].else_[_val = qi::unused]];
// doesn't compile: line %= (type >> ',' >> qi::uint_ >> ',' >> side)[if_(_1)[_val = make_pair(_3, _2)]];
// doesn't compile: line %= (type >> ',' >> qi::uint_ >> ',' >> side)[if_(_1)[_val = make_pair(_3, _2)].else_[_val = unused]];
start = *line;
}
};
I get: [[false, 2], [false, 0], [true, 5]] where I want to get: [[false, 2], [true, 5]] (the second entry should be discarded).
I tried with boost::optional<Data> for the data rule and also to assign unused to _val but nothing worked.
Edit after fixing the issue with the accepted answer
The new rules are now:
using Data = std::pair<bool, unsigned>;
BOOST_PHOENIX_ADAPT_FUNCTION(Data, make_pair, std::make_pair, 2);
class DataParser :
public qi::grammar<
std::string::iterator,
boost::spirit::char_encoding::ascii,
boost::spirit::ascii::blank_type,
std::vector<Data>()
>
{
using Items = boost::fusion::vector<bool, unsigned, bool>;
qi::rule<iterator_type, encoding_type, bool()> type;
qi::rule<iterator_type, encoding_type, bool()> side;
qi::rule<iterator_type, encoding_type, boost::spirit::ascii::blank_type, Items()> line;
qi::rule<iterator_type, encoding_type, boost::spirit::ascii::blank_type, sig_type> start;
public:
DataParser()
: base_type(start)
{
using namespace qi::labels;
namespace px = boost::phoenix;
type = qi::char_[_val = _1 == 'x'];
side = qi::int_[_val = _1 >= 4];
line = type >> ',' >> qi::uint_ >> ',' >> side;
start = line[if_(_1)[px::push_back(_val, make_pair(_3, _2))]] % qi::eol;
}
};
The key points being to use the semantic action to decide if the synthesized attribute should be added by using all attributes of the previous rule, in this case line.
Okay. You use lots of power-tools. But remember, with great power comes....
In particular, qi::locals, phoenix, semantic actions: they're all complicating life so only use them as a last resort (or when they're a natural fit, which is rarely¹).
Think directly,
start = *line;
line = // ....
When you say
If the first character is a y then the line should be discarded. Otherwise it will be processed.
You can express this directly:
line = !qi::lit('y') >> // ...
Alternatively, spell out what starters to accept:
line = qi::omit[ qi::char_("xz") ] >> // ...
Done.
Straight Forward Mapping
Here I'll cheat by re-ordering the pair<unsigned, bool> so it matches the input order. Now everything works out of the box without "any" magic:
line = !qi::lit('y') >> qi::omit[qi::alnum] >> ',' >> qi::int_ >> ',' >> side;
ignore = +(qi::char_ - qi::eol);
start = qi::skip(qi::blank) [ (line | ignore) % qi::eol ];
However it WILL result in the spurious entries as you noticed: Live On Compiler Explorer
Parsed: {(2, false), (0, false), (5, true)}
Improving
Now you could go hack around things by changing the eol to also eat subsequent lines that don't appear to contain valid data lines. However, it becomes unwieldy, and we still have the desire to flip the pair's members.
So, here's where I think an actrion could be handy:
public:
DataParser() : DataParser::base_type(start) {
using namespace qi::labels;
start = qi::skip(qi::blank) [
(qi::char_ >> ',' >> qi::uint_ >> ',' >> qi::int_) [
_pass = process(_val, _1, _2, _3) ]
% qi::eol ];
}
private:
struct process_f {
template <typename... T>
bool operator()(Datas& into, char id, unsigned type, int side) const {
switch(id) {
case 'z': case 'x':
into.emplace_back(side >= 4, type);
break;
case 'y': // ignore
break;
case 'a':
return false; // fail the rule
}
return true;
}
};
boost::phoenix::function<action_f> process;
You can see, there's a nice separation of concerns now. You parse (char,int,int) and conditionally process it. That's what's keeping this relatively simple compared to your attempts.
Live Demo
Live On Compiler Explorer
#include <boost/fusion/adapted.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <fmt/ranges.h>
namespace qi = boost::spirit::qi;
using Data = std::pair<bool, unsigned>;
using Datas = std::vector<Data>;
template <typename It>
class DataParser : public qi::grammar<It, Datas()> {
using Skipper = qi::blank_type;
qi::rule<It, Datas(), Skipper> line;
qi::rule<It, Datas()> start;
public:
DataParser() : DataParser::base_type(start) {
using namespace qi::labels;
start = qi::skip(qi::blank) [
(qi::char_ >> ',' >> qi::uint_ >> ',' >> qi::int_) [
_pass = process(_val, _1, _2, _3) ]
% qi::eol ];
}
private:
struct process_f {
template <typename... T>
bool operator()(Datas& into, char id, unsigned type, int side) const {
switch(id) {
case 'z': case 'x':
into.emplace_back(side >= 4, type);
break;
case 'y': // ignore
break;
case 'a':
return false; // fail the rule
}
return true;
}
};
boost::phoenix::function<process_f> process;
};
int main() {
using It = std::string::const_iterator;
DataParser<It> p;
for (std::string const input : {
"x,2,3\ny,3,4\nx,5,6",
})
{
auto f = begin(input), l = end(input);
Datas d;
auto ok = qi::parse(f, l, p, d);
if (ok) {
fmt::print("Parsed: {}\n", d);
} else {
fmt::print("Parsed failed\n", d);
}
if (f!=l) {
fmt::print("Remaining unparsed: '{}'\n", std::string(f,l));
}
}
}
Prints
Parsed: {(false, 2), (true, 5)}
¹ Boost Spirit: "Semantic actions are evil"?
I seem to be experiencing some mental block with Boost Spirit I just cannot get by. I have a fairly simple grammar I need to handle, where I would like to put the values into a struct, that contains a std::map<> as one of it's members. The key names for the pairs are known up front, so that only those are allowed. There could be one to many keys in the map, in any order with each key name validated via qi.
The grammar looks something like this, as an example.
test .|*|<hostname> add|modify|save ( key [value] key [value] ... ) ;
//
test . add ( a1 ex00
a2 ex01
a3 "ex02,ex03,ex04" );
//
test * modify ( m1 ex10
m2 ex11
m3 "ex12,ex13,ex14"
m4 "abc def ghi" );
//
test 10.0.0.1 clear ( c1
c2
c3 );
In this example the keys for “add” being a1, a2 and a3, likewise for “modify” m1, m2, m3 and m4 and each must contain a value. For “clear” the keys of the map c1, c2 and c3 may not contain a value. Also, let's say for this example you can have up to 10 keys (a1 ... a11, m1 ... m11 and c1 ... c11) any combination of them could be used, in any order, for their corresponding action. Meaning that you cannot use the known key cX for the "add" or mX for "clear"
The structure follows this simple pattern
//
struct test
{
std::string host;
std::string action;
std::map<std::string,std::string> option;
}
So from the above examples, I would expect to have the struct contain ...
// add ...
test.host = .
test.action = add
test.option[0].first = a1
test.option[0].second = ex00
test.option[1].first = a2
test.option[1].second = ex01
test.option[2].first = a3
test.option[2].second = ex02,ex03,ex04
// modify ...
test.host = *
test.action = modify
test.option[0].first = m1
test.option[0].second = ex10
test.option[1].first = m2
test.option[1].second = ex11
test.option[2].first = m3
test.option[2].second = ex12,ex13,ex14
test.option[2].first = m3
test.option[2].second = abc def ghi
// clear ...
test.host = *
test.action = 10.0.0.1
test.option[0].first = c1
test.option[0].second =
test.option[1].first = c2
test.option[1].second =
test.option[2].first = c3
test.option[2].second =
I can get each indivudal part working, standalone, but I cannot seem to them working together. For example I have the host and action working without the map<>.
I’ve adapted a previously posted example from Sehe (here) trying to get this to work (BTW: Sehe has some awesome examples, which I’ve been using as much as the documentation).
Here is an excerpt (obviously not working), but at least shows where I’m trying to go.
namespace ast {
namespace qi = boost::spirit::qi;
//
using unused = qi::unused_type;
//
using string = std::string;
using strings = std::vector<string>;
using list = strings;
using pair = std::pair<string, string>;
using map = std::map<string, string>;
//
struct test
{
using preference = std::map<string,string>;
string host;
string action;
preference option;
};
}
//
BOOST_FUSION_ADAPT_STRUCT( ast::test,
( std::string, host )
( std::string, action ) )
( ast::test::preference, option ) )
//
namespace grammar
{
//
template <typename It>
struct parser
{
//
struct skip : qi::grammar<It>
{
//
skip() : skip::base_type( text )
{
using namespace qi;
// handle all whitespace (" ", \t, ...)
// along with comment lines/blocks
//
// comment blocks: /* ... */
// // ...
// -- ...
// # ...
text = ascii::space
| ( "#" >> *( char_ - eol ) >> ( eoi | eol ) ) // line comment
| ( "--" >> *( char_ - eol ) >> ( eoi | eol ) ) // ...
| ( "//" >> *( char_ - eol ) >> ( eoi | eol ) ) // ...
| ( "/*" >> *( char_ - "*/" ) >> "*/" ); // block comment
//
BOOST_SPIRIT_DEBUG_NODES( ( text ) )
}
//
qi::rule<It> text;
};
//
struct token
{
//
token()
{
using namespace qi;
// common
string = '"' >> *("\\" >> char_ | ~char_('"')) >> '"';
identity = char_("a-zA-Z_") >> *char_("a-zA-Z0-9_");
real = double_;
integer = int_;
//
value = ( string | identity );
// ip target
any = '*';
local = ( char_('.') | fqdn );
fqdn = +char_("a-zA-Z0-9.\\-" ); // consession
ipv4 = +as_string[ octet[ _pass = ( _1 >= 0 && _1 <= 255 ) ] >> '.'
>> octet[ _pass = ( _1 >= 0 && _1 <= 255 ) ] >> '.'
>> octet[ _pass = ( _1 >= 0 && _1 <= 255 ) ] >> '.'
>> octet[ _pass = ( _1 >= 0 && _1 <= 255 ) ] ];
//
target = ( any | local | fqdn | ipv4 );
//
pair = identity >> -( attr( ' ' ) >> value );
map = pair >> *( attr( ' ' ) >> pair );
list = *( value );
//
BOOST_SPIRIT_DEBUG_NODES( ( string )
( identity )
( value )
( real )
( integer )
( any )
( local )
( fqdn )
( ipv4 )
( target )
( pair )
( keyval )
( map )
( list ) )
}
//
qi::rule<It, std::string()> string;
qi::rule<It, std::string()> identity;
qi::rule<It, std::string()> value;
qi::rule<It, double()> real;
qi::rule<It, int()> integer;
qi::uint_parser<unsigned, 10, 1, 3> octet;
qi::rule<It, std::string()> any;
qi::rule<It, std::string()> local;
qi::rule<It, std::string()> fqdn;
qi::rule<It, std::string()> ipv4;
qi::rule<It, std::string()> target;
//
qi::rule<It, ast::map()> map;
qi::rule<It, ast::pair()> pair;
qi::rule<It, ast::pair()> keyval;
qi::rule<It, ast::list()> list;
};
//
struct test : token, qi::grammar<It, ast::test(), skip>
{
//
test() : test::base_type( command_ )
{
using namespace qi;
using namespace qr;
auto kw = qr::distinct( copy( char_( "a-zA-Z0-9_" ) ) );
// not sure how to enforce the "key" names!
key_ = *( '(' >> *value >> ')' );
// tried using token::map ... didn't work ...
//
add_ = ( ( "add" >> attr( ' ' ) ) [ _val = "add" ] );
modify_ = ( ( "modify" >> attr( ' ' ) ) [ _val = "modify" ] );
clear_ = ( ( "clear" >> attr( ' ' ) ) [ _val = "clear" ] );
//
action_ = ( add_ | modify_ | clear_ );
/* *** can't get from A to B here ... not sure what to do *** */
//
command_ = kw[ "test" ]
>> target
>> action_
>> ';';
BOOST_SPIRIT_DEBUG_NODES( ( command_ )
( action_ )
( add_ )
( modify_ )
( clear_ ) )
}
//
private:
//
using token::value;
using token::target;
using token::map;
qi::rule<It, ast::test(), skip> command_;
qi::rule<It, std::string(), skip> action_;
//
qi::rule<It, std::string(), skip> add_;
qi::rule<It, std::string(), skip> modify_;
qi::rule<It, std::string(), skip> clear_;
};
...
};
}
I hope this question isn't too ambiguous and if you need a working example of the problem, I can certainly provide that. Any and all help is greatly appreciated, so thank you in advance!
Notes:
with this
add_ = ( ( "add" >> attr( ' ' ) ) [ _val = "add" ] );
modify_ = ( ( "modify" >> attr( ' ' ) ) [ _val = "modify" ] );
clear_ = ( ( "clear" >> attr( ' ' ) ) [ _val = "clear" ] );
did you mean to require a space? Or are you really just trying to force the struct action field to contain a trailing space (that's what will happen).
If you meant the latter, I'd do that outside of the parser¹.
If you wanted the first, use the kw facility:
add_ = kw["add"] [ _val = "add" ];
modify_ = kw["modify"] [ _val = "modify" ];
clear_ = kw["clear"] [ _val = "clear" ];
In fact, you can simplify that (again, ¹):
add_ = raw[ kw["add"] ];
modify_ = raw[ kw["modify"] ];
clear_ = raw[ kw["clear"] ];
Which also means that you can simplify to
action_ = raw[ kw[lit("add")|"modify"|"clear"] ];
However, getting a bit close to your question, you could also use a symbol parser:
symbols<char> action_sym;
action_sym += "add", "modify", "clear";
//
action_ = raw[ kw[action_sym] ];
Caveat: the symbols needs to be a member so its lifetime extends beyond the constructor.
If you meant to capture the input representation of ipv4 addresses with
ipv4 = +as_string[ octet[ _pass = ( _1 >= 0 && _1 <= 255 ) ] >> '.'
>> octet[ _pass = ( _1 >= 0 && _1 <= 255 ) ] >> '.'
>> octet[ _pass = ( _1 >= 0 && _1 <= 255 ) ] >> '.'
>> octet[ _pass = ( _1 >= 0 && _1 <= 255 ) ] ];
Side note I'm assuming +as_string is a simple mistake and you meant as_string instead.
Simplify:
qi::uint_parser<uint8_t, 10, 1, 3> octet;
This obviates the range checks (see ¹ again):
ipv4 = as_string[ octet >> '.' >> octet >> '.' >> octet >> '.' >> octet ];
However, this would build a 4-char binary string representation of the address. If you wanted that, fine. I doubt it (because you'd have written std::array<uint8_t, 4> or uint64_t, right?). So if you wanted the string, again use raw[]:
ipv4 = raw[ octet >> '.' >> octet >> '.' >> octet >> '.' >> octet ];
Same issue as with number 1.:
pair = identity >> -( attr(' ') >> value );
This time, the problem betrays that the productions should not be in token; Conceptually token-izing precedes parsing and hence I'd keep the tokens skipper-less. kw doesn't really do a lot of good in that context. Instead, I'd move pair, map and list (unused?) into the parser:
pair = kw[identity] >> -value;
map = +pair;
list = *value;
Some examples
There's a very recent example I made about using symbols to parse (here), but this answer comes a lot closer to your question:
How to provider user with autocomplete suggestions for given boost::spirit grammar?
It goes far beyond the scope of your parser because it does all kinds of actions in the grammar, but what it does show is to have generic "lookup-ish" rules that can be parameterized with a particular "symbol set": see the Identifier Lookup section of the answer:
Identifier Lookup
We store "symbol tables" in Domain members _variables and
_functions:
using Domain = qi::symbols<char>; Domain _variables, _functions;
Then we declare some rules that can do lookups on either of them:
// domain identifier lookups
qi::_r1_type _domain;
qi::rule<It, Ast::Identifier(Domain const&)> maybe_known, known,
unknown;
The corresponding declarations will be shown shortly.
Variables are pretty simple:
variable = maybe_known(phx::ref(_variables));
Calls are trickier. If a name is unknown we don't want to assume it
implies a function unless it's followed by a '(' character.
However, if an identifier is a known function name, we want even to
imply the ( (this gives the UX the appearance of autocompletion
where when the user types sqrt, it suggests the next character to be
( magically).
// The heuristics: // - an unknown identifier followed by (
// - an unclosed argument list implies ) call %= (
known(phx::ref(_functions)) // known -> imply the parens
| &(identifier >> '(') >> unknown(phx::ref(_functions))
) >> implied('(') >> -(expression % ',') >> implied(')');
It all builds on known, unknown and maybe_known:
///////////////////////////////
// identifier loopkup, suggesting
{
maybe_known = known(_domain) | unknown(_domain);
// distinct to avoid partially-matching identifiers
using boost::spirit::repository::qi::distinct;
auto kw = distinct(copy(alnum | '_'));
known = raw[kw[lazy(_domain)]];
unknown = raw[identifier[_val=_1]] [suggest_for(_1, _domain)];
}
I think you can use the same approach constructively here. One additional gimmick could be to validate that properties supplied are, in fact, unique.
Demo Work
Combining all the hints above makes it compile and "parse" the test commands:
Live On Coliru
#include <string>
#include <map>
#include <vector>
namespace ast {
//
using string = std::string;
using strings = std::vector<string>;
using list = strings;
using pair = std::pair<string, string>;
using map = std::map<string, string>;
//
struct command {
string host;
string action;
map option;
};
}
#include <boost/fusion/adapted.hpp>
BOOST_FUSION_ADAPT_STRUCT(ast::command, host, action, option)
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/repository/include/qi_distinct.hpp>
namespace grammar
{
namespace qi = boost::spirit::qi;
namespace qr = boost::spirit::repository::qi;
template <typename It>
struct parser
{
struct skip : qi::grammar<It> {
skip() : skip::base_type(text) {
using namespace qi;
// handle all whitespace along with line/block comments
text = ascii::space
| (lit("#")|"--"|"//") >> *(char_ - eol) >> (eoi | eol) // line comment
| "/*" >> *(char_ - "*/") >> "*/"; // block comment
//
BOOST_SPIRIT_DEBUG_NODES((text))
}
private:
qi::rule<It> text;
};
//
struct token {
//
token() {
using namespace qi;
// common
string = '"' >> *("\\" >> char_ | ~char_('"')) >> '"';
identity = char_("a-zA-Z_") >> *char_("a-zA-Z0-9_");
value = string | identity;
// ip target
any = '*';
local = '.' | fqdn;
fqdn = +char_("a-zA-Z0-9.\\-"); // concession
ipv4 = raw [ octet >> '.' >> octet >> '.' >> octet >> '.' >> octet ];
//
target = any | local | fqdn | ipv4;
//
BOOST_SPIRIT_DEBUG_NODES(
(string) (identity) (value)
(any) (local) (fqdn) (ipv4) (target)
)
}
protected:
//
qi::rule<It, std::string()> string;
qi::rule<It, std::string()> identity;
qi::rule<It, std::string()> value;
qi::uint_parser<uint8_t, 10, 1, 3> octet;
qi::rule<It, std::string()> any;
qi::rule<It, std::string()> local;
qi::rule<It, std::string()> fqdn;
qi::rule<It, std::string()> ipv4;
qi::rule<It, std::string()> target;
};
//
struct test : token, qi::grammar<It, ast::command(), skip> {
//
test() : test::base_type(command_)
{
using namespace qi;
auto kw = qr::distinct( copy( char_( "a-zA-Z0-9_" ) ) );
//
action_sym += "add", "modify", "clear";
action_ = raw[ kw[action_sym] ];
//
command_ = kw["test"]
>> target
>> action_
>> '(' >> map >> ')'
>> ';';
//
pair = kw[identity] >> -value;
map = +pair;
list = *value;
BOOST_SPIRIT_DEBUG_NODES(
(command_) (action_)
(pair) (map) (list)
)
}
private:
using token::target;
using token::identity;
using token::value;
qi::symbols<char> action_sym;
//
qi::rule<It, ast::command(), skip> command_;
qi::rule<It, std::string(), skip> action_;
//
qi::rule<It, ast::map(), skip> map;
qi::rule<It, ast::pair(), skip> pair;
qi::rule<It, ast::list(), skip> list;
};
};
}
#include <fstream>
int main() {
using It = boost::spirit::istream_iterator;
using Parser = grammar::parser<It>;
std::ifstream input("input.txt");
It f(input >> std::noskipws), l;
Parser::skip const s{};
Parser::test const p{};
std::vector<ast::command> data;
bool ok = phrase_parse(f, l, *p, s, data);
if (ok) {
std::cout << "Parsed " << data.size() << " commands\n";
} else {
std::cout << "Parsed failed\n";
}
if (f != l) {
std::cout << "Remaining unparsed input: '" << std::string(f,l) << "'\n";
}
}
Prints
Parsed 3 commands
Let's restrict the Keys
Like in the linked answer above, let's pass the map, pair rules the actual key set to get their allowed values from:
using KeySet = qi::symbols<char>;
using KeyRef = KeySet const*;
//
KeySet add_keys, modify_keys, clear_keys;
qi::symbols<char, KeyRef> action_sym;
qi::rule<It, ast::pair(KeyRef), skip> pair;
qi::rule<It, ast::map(KeyRef), skip> map;
Note A key feature used is the associated attribute value with a symbols<> lookup (in this case we associate a KeyRef with an action symbol):
//
add_keys += "a1", "a2", "a3", "a4", "a5", "a6";
modify_keys += "m1", "m2", "m3", "m4";
clear_keys += "c1", "c2", "c3", "c4", "c5";
action_sym.add
("add", &add_keys)
("modify", &modify_keys)
("clear", &clear_keys);
Now the heavy lifting starts.
Using qi::locals<> and inherited attributes
Let's give command_ some local space to store the selected keyset:
qi::rule<It, ast::command(), skip, qi::locals<KeyRef> > command_;
Now we can in principle assignt to it (using the _a placeholder). However, there's some details:
//
qi::_a_type selected;
Always prefer descriptive names :) _a and _r1 get old pretty quick. Things are confusing enough as it is.
command_ %= kw["test"]
>> target
>> raw[ kw[action_sym] [ selected = _1 ] ]
>> '(' >> map(selected) >> ')'
>> ';';
Note: the subtlest detail here is %= instead of = to avoid the suppression of automatic attribute propagation when a semantic action is present (yeah, see ¹ again...)
But all in all, that doesn't read so bad?
//
qi::_r1_type symref;
pair = raw[ kw[lazy(*symref)] ] >> -value;
map = +pair(symref);
And now at least things parse
Almost there
Live On Coliru
//#define BOOST_SPIRIT_DEBUG
#include <string>
#include <map>
#include <vector>
namespace ast {
//
using string = std::string;
using strings = std::vector<string>;
using list = strings;
using pair = std::pair<string, string>;
using map = std::map<string, string>;
//
struct command {
string host;
string action;
map option;
};
}
#include <boost/fusion/adapted.hpp>
BOOST_FUSION_ADAPT_STRUCT(ast::command, host, action, option)
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/repository/include/qi_distinct.hpp>
namespace grammar
{
namespace qi = boost::spirit::qi;
namespace qr = boost::spirit::repository::qi;
template <typename It>
struct parser
{
struct skip : qi::grammar<It> {
skip() : skip::base_type(rule_) {
using namespace qi;
// handle all whitespace along with line/block comments
rule_ = ascii::space
| (lit("#")|"--"|"//") >> *(char_ - eol) >> (eoi | eol) // line comment
| "/*" >> *(char_ - "*/") >> "*/"; // block comment
//
//BOOST_SPIRIT_DEBUG_NODES((skipper))
}
private:
qi::rule<It> rule_;
};
//
struct token {
//
token() {
using namespace qi;
// common
string = '"' >> *("\\" >> char_ | ~char_('"')) >> '"';
identity = char_("a-zA-Z_") >> *char_("a-zA-Z0-9_");
value = string | identity;
// ip target
any = '*';
local = '.' | fqdn;
fqdn = +char_("a-zA-Z0-9.\\-"); // concession
ipv4 = raw [ octet >> '.' >> octet >> '.' >> octet >> '.' >> octet ];
//
target = any | local | fqdn | ipv4;
//
BOOST_SPIRIT_DEBUG_NODES(
(string) (identity) (value)
(any) (local) (fqdn) (ipv4) (target)
)
}
protected:
//
qi::rule<It, std::string()> string;
qi::rule<It, std::string()> identity;
qi::rule<It, std::string()> value;
qi::uint_parser<uint8_t, 10, 1, 3> octet;
qi::rule<It, std::string()> any;
qi::rule<It, std::string()> local;
qi::rule<It, std::string()> fqdn;
qi::rule<It, std::string()> ipv4;
qi::rule<It, std::string()> target;
};
//
struct test : token, qi::grammar<It, ast::command(), skip> {
//
test() : test::base_type(start_)
{
using namespace qi;
auto kw = qr::distinct( copy( char_( "a-zA-Z0-9_" ) ) );
//
add_keys += "a1", "a2", "a3", "a4", "a5", "a6";
modify_keys += "m1", "m2", "m3", "m4";
clear_keys += "c1", "c2", "c3", "c4", "c5";
action_sym.add
("add", &add_keys)
("modify", &modify_keys)
("clear", &clear_keys);
//
qi::_a_type selected;
command_ %= kw["test"]
>> target
>> raw[ kw[action_sym] [ selected = _1 ] ]
>> '(' >> map(selected) >> ')'
>> ';';
//
qi::_r1_type symref;
pair = raw[ kw[lazy(*symref)] ] >> -value;
map = +pair(symref);
list = *value;
start_ = command_;
BOOST_SPIRIT_DEBUG_NODES(
(start_) (command_)
(pair) (map) (list)
)
}
private:
using token::target;
using token::identity;
using token::value;
using KeySet = qi::symbols<char>;
using KeyRef = KeySet const*;
//
qi::rule<It, ast::command(), skip> start_;
qi::rule<It, ast::command(), skip, qi::locals<KeyRef> > command_;
//
KeySet add_keys, modify_keys, clear_keys;
qi::symbols<char, KeyRef> action_sym;
qi::rule<It, ast::pair(KeyRef), skip> pair;
qi::rule<It, ast::map(KeyRef), skip> map;
qi::rule<It, ast::list(), skip> list;
};
};
}
#include <fstream>
int main() {
using It = boost::spirit::istream_iterator;
using Parser = grammar::parser<It>;
std::ifstream input("input.txt");
It f(input >> std::noskipws), l;
Parser::skip const s{};
Parser::test const p{};
std::vector<ast::command> data;
bool ok = phrase_parse(f, l, *p, s, data);
if (ok) {
std::cout << "Parsed " << data.size() << " commands\n";
} else {
std::cout << "Parsed failed\n";
}
if (f != l) {
std::cout << "Remaining unparsed input: '" << std::string(f,l) << "'\n";
}
}
Prints
Parsed 3 commands
HOLD ON, NOT SO FAST! It's wrong
Yeah. If you enable debug, you'll see it parses things oddly:
<attributes>[[[1, 0, ., 0, ., 0, ., 1], [c, l, e, a, r], [[[c, 1], [c, 2]], [[c, 3], []]]]]</attributes>
This is actually "merely" a problem with the grammar. If the grammar cannot see the difference between a key and value then obviously c2 is going to be parsed as the value of property with key c1.
It's up to you to disambiguate the grammar. For now, I'm going to demonstrate a fix using a negative assertion: we only accept values that are not known keys. It's a bit dirty, but might be useful to you for instructional purposes:
key = raw[ kw[lazy(*symref)] ];
pair = key(symref) >> -(!key(symref) >> value);
map = +pair(symref);
Note I factored out the key rule for readability:
Live On Coliru
Parses
<attributes>[[[1, 0, ., 0, ., 0, ., 1], [c, l, e, a, r], [[[c, 1], []], [[c, 2], []], [[c, 3], []]]]]</attributes>
Just what the doctor ordered!
¹ Boost Spirit: "Semantic actions are evil"?
In other threads I've read how to add a symbol to the symbol table in a semantic action, but I don't know how to remove it.
The idea behind my question is, that I want to allow to rename keywords in the parsed text. So, several keywords with values are given, but the user can reassign them:
reassign(keyword)(mykeyword)
I have a rule with semantic action
using namespace boost::spirit::qi;
...
qi::symbols<char, TYPE> keywords;
...
key_replace = ( lit("reassign") >> lit("(") >> keywords >> lit(")") >>
lit("(") >> lexeme [ raw [ ( alpha >> *( alnum | '_' ) ) ] ] >> lit (")") )
[ boost::phoenix::bind(keywords.remove, _1) ]; // TODO: replace not remove
The problem is, that I don't get a reference to the symbol itself, but to the stored value. So calling remove doesn't work.
How can I get a reference to the parsed symbol during parsing?
Is there a simpler way to exchange a symbol while preserving the value during parsing?
The naive 'phoenix' way would be to
rule_assign = key >> value
[ phx::bind(keywords.add, _1, _2) ];
rule_remove = key
[ phx::bind(keywords.remove, _1) ];
// and voila: (BROKEN)
rule_replace = key >> value
[ phx::bind(keywords.remove, _1),
phx::bind(keywords.add, _1, _2)
];
The latter doesn't work, I believe due to the fact that the first bind returns an object that overloads operator, itself, and it gets preferred over phoenix's operator,.
I suggest you work around this by writing a little helper:
struct assign_symbol_f
{
assign_symbol_f(Symbols& sym) : sym(sym) {}
typedef bool result_type;
template<typename Key, typename Value>
bool operator()(Key const& key, Value const& value) const
{
bool replaced = (nullptr != sym.find(key));
sym.remove(key);
sym.add(key, value);
return replaced;
}
private:
Symbols& sym;
};
This transparently assigns or reassigns an item in a symbols tree. Use it as follows:
rule_replace_or_add = key >> value
[ phx::bind(assign_symbol_f(keywords), qi::_1, qi::_2) ];
Now, you could split things and be more specific:
assign_symbol_f assign_sym(keywords);
rule_assign = key >> value
[ qi::_pass = !phx::bind(assign_sym, _1, _2) ];
rule_replace = key >> value
[ qi::_pass = phx::bind(assign_sym, _1, _2) ];
Bonus
As a bonus you can have a little bit of syntactic sugar by creating a lazy actor for your functor:
phx::function<assign_symbol_f> assign_sym;
// use it like
rule_assign = key >> value
[ qi::_pass = assign_sym(_1, _2) ];
rule_replace = key >> value
[ qi::_pass = assign_sym(_1, _2) ];
Look ma! No more phx::bind.
Full demo
Complete with a rudimentary test suite :)
#define BOOST_SPIRIT_USE_PHOENIX_V3
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <string>
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;
typedef qi::symbols<char, int> Symbols;
struct assign_symbol_f
{
assign_symbol_f(Symbols& sym) : sym(sym) {}
typedef bool result_type;
template<typename Key, typename Value>
bool operator()(Key const& key, Value const& value) const
{
bool replaced = (nullptr != sym.find(key));
sym.remove(key);
sym.add(key, value);
return replaced;
}
private:
Symbols& sym;
};
template <typename Iter> struct parser : qi::grammar<Iter, qi::space_type>
{
parser(Symbols &dict)
: parser::base_type(start),
assign_sym(dict)
{
using namespace qi;
identifier = +graph;
add_rule = lit("+") >> (identifier >> int_)
[ assign_sym(_1, _2) ]
;
del_rule = lit("-") >> identifier
[ phx::bind(dict.remove, _1) ]
;
start = (del_rule | add_rule) % ";";
}
private:
phx::function<assign_symbol_f> assign_sym;
qi::rule<Iter, qi::space_type> start, del_rule, add_rule;
qi::rule<Iter, std::string()> identifier; // no skipper
};
bool execute(std::string const& test, Symbols& dict)
{
auto f = test.begin(), l = test.end();
parser<std::string::const_iterator> prsr(dict);
return
qi::phrase_parse(f, l, prsr, qi::space)
&& (f == l);
}
int main() {
Symbols dict;
assert(execute("+foo 3; +bar 4; -foo", dict));
assert(!dict.find("foo"));
assert( dict.find("bar") && (4 == dict.at("bar")));
assert(!dict.find("zap"));
assert(execute("+zap -42; +bar 5; +foo 33", dict));
assert( dict.find("zap") && (-42 == dict.at("zap")));
assert( dict.find("bar") && (5 == dict.at("bar"))); // replaced
assert( dict.find("foo") && (33 == dict.at("foo")));
}
I have a strange problem with a calculator made using boost::spirit. This calculator is supposed to take a string as argument representing a series of arithmetical expression separated by commas, like "a+4*5,77,(b-c)*4". It also allows the string "?" and returns the array containing a -1 in this case. The calculator is initialized with a SymTable, which is a template class argument to describe any class offering the [string] -> int operator (example: a map), to resolve the value of variables.
The following code works on my Ubuntu 10.4 with both gcc 4.6.2 and gcc 4.4, and both boost 1.47 and 1.48. It also worked in the past on a Cray Linux machine with gcc 4.5.3 and boost 1.47.
#include <boost/bind.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
namespace sp = boost::spirit;
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace Damaris {
template <typename Iterator, typename SymTable>
struct Calc : qi::grammar<Iterator, std::vector<int>(), ascii::space_type>
{
qi::rule<Iterator, std::vector<int>(), ascii::space_type> start;
qi::rule<Iterator, int(), ascii::space_type> expr;
qi::rule<Iterator, int(), ascii::space_type> qmark;
qi::rule<Iterator, int(), ascii::space_type> factor;
qi::rule<Iterator, int(), ascii::space_type> simple;
qi::rule<Iterator, std::string(), ascii::space_type> identifier;
qi::rule<Iterator, int(SymTable), ascii::space_type> value;
/**
* \brief Constructor.
* \param[in] sym : table of symboles.
*/
Calc(SymTable &sym) : Calc::base_type(start)
{
identifier = qi::lexeme[( qi::alpha | '_') >> *( qi::alnum | '_')];
value = identifier[qi::_val = qi::labels::_r1[qi::_1]];
simple = ('(' >> expr >> ')')
| qi::int_
| value(boost::phoenix::ref(sym));
factor %= (simple >> '*' >> factor)[qi::_val = qi::_1 * qi::_2]
| (simple >> '/' >> factor)[qi::_val = qi::_1 / qi::_2]
| (simple >> '%' >> factor)[qi::_val = qi::_1 % qi::_2]
| simple;
expr %= (factor >> '+' >> expr)[qi::_val = qi::_1 + qi::_2]
| (factor >> '-' >> expr)[qi::_val = qi::_1 - qi::_2]
| factor;
qmark = qi::char_('?')[qi::_val = -1];
start = qmark
| (expr % ',');
}
};
}
Today I tried again compiling the same code on the Cray machine (which has been upgraded since then, I think), I tried with gcc 4.6.2 and gcc 4.5.2, and both with boost 1.48 and 1.49, and I always get the same compilation error that I don't understand :
/nics/b/home/mdorier/damaris-0.4/common/Calc.hpp:74:3: instantiated from 'Damaris::Calc<Iterator, SymTable>::Calc(SymTable&) [with Iterator = __gnu_cxx::__normal_iterator<const char*, std::basic_string<char> >, SymTable = Damaris::ParameterSet]'
/nics/b/home/mdorier/damaris-0.4/common/MetadataManager.cpp:45:79: instantiated from here
/nics/b/home/mdorier/deploy/include/boost/spirit/home/qi/detail/assign_to.hpp:123:13: error: invalid static_cast from type 'const boost::fusion::vector2<int, int>' to type 'int'
The line 74 in Calc.hpp corresponds to the line "factor = ...".
The instantiation line indicated (MetadataManager.cpp:45) is the following:
layoutInterp = new Calc<std::string::const_iterator,ParameterSet>(*parameters);
with layoutInterp being of type Calc* and parameters being of type ParameterSet*.
Any idea where this error comes from? Thanks
I'm pretty sure you might have been rearranging stuff in your rules. In fact, the %= auto-rule expression assignments won't work because the synthesized type of the parser expression doesn't resemble an int.
Basically, you'd change
factor %= (simple >> '*' >> factor)[ _val = _1 * _2 ]
| (simple >> '/' >> factor)[ _val = _1 / _2 ]
| (simple >> '%' >> factor)[ _val = _1 % _2 ]
| simple;
expr %= (factor >> '+' >> expr)[ _val = _1 + _2 ]
| (factor >> '-' >> expr)[ _val = _1 - _2 ]
| factor;
into
factor = (simple >> '*' >> factor)[ _val = _1 * _2 ]
| (simple >> '/' >> factor)[ _val = _1 / _2 ]
| (simple >> '%' >> factor)[ _val = _1 % _2 ]
| (simple) [_val = _1 ];
expr = (factor >> '+' >> expr)[ _val = _1 + _2 ]
| (factor >> '-' >> expr)[ _val = _1 - _2 ]
| (factor) [_val = _1 ];
I have fixed up some small issues and created a SSCCE of your post that works, as far as I can tell 1:
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/karma.hpp>
#include <boost/spirit/include/phoenix.hpp>
namespace sp = boost::spirit;
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace karma = boost::spirit::karma;
namespace phx = boost::phoenix;
namespace Damaris {
template <typename Iterator, typename SymTable>
struct Calc : qi::grammar<Iterator, std::vector<int>(), ascii::space_type>
{
qi::rule<Iterator, std::vector<int>(), ascii::space_type> start;
qi::rule<Iterator, int(), ascii::space_type> expr;
qi::rule<Iterator, int(), ascii::space_type> qmark;
qi::rule<Iterator, int(), ascii::space_type> factor;
qi::rule<Iterator, int(), ascii::space_type> simple;
qi::rule<Iterator, std::string(), ascii::space_type> identifier;
qi::rule<Iterator, int(SymTable), ascii::space_type> value;
Calc(SymTable &sym) : Calc::base_type(start)
{
using namespace qi;
identifier = lexeme[( alpha | '_') >> *( alnum | '_')];
value = identifier[ _val = _r1[_1] ];
simple = ('(' >> expr >> ')')
| int_
| value(boost::phoenix::ref(sym));
factor = (simple >> '*' >> factor)[ _val = _1 * _2 ]
| (simple >> '/' >> factor)[ _val = _1 / _2 ]
| (simple >> '%' >> factor)[ _val = _1 % _2 ]
| (simple) [_val = _1 ];
expr = (factor >> '+' >> expr)[ _val = _1 + _2 ]
| (factor >> '-' >> expr)[ _val = _1 - _2 ]
| (factor) [_val = _1 ];
qmark = char_('?')[ _val = -1 ];
start = qmark
| (expr % ',');
BOOST_SPIRIT_DEBUG_NODE(start);
BOOST_SPIRIT_DEBUG_NODE(qmark);
BOOST_SPIRIT_DEBUG_NODE(expr);
BOOST_SPIRIT_DEBUG_NODE(factor);
BOOST_SPIRIT_DEBUG_NODE(simple);
BOOST_SPIRIT_DEBUG_NODE(value);
BOOST_SPIRIT_DEBUG_NODE(identifier);
}
};
}
int main(int argc, const char *argv[])
{
typedef std::map<std::string, int> SymTable;
SymTable symbols;
Damaris::Calc<std::string::const_iterator, SymTable> calc(symbols);
symbols["TheAnswerToLifeUniverse"] = 100;
symbols["Everything"] = -58;
std::string input = "3*4+5/4, TheAnswerToLifeUniverse + Everything";
std::string::const_iterator f(input.begin()), l(input.end());
std::vector<int> data;
if (qi::phrase_parse(f,l,calc,ascii::space,data))
std::cout << "output: " << karma::format(karma::int_ % ", " << karma::eol, data);
else
std::cout << "problem: '" << std::string(f,l) << "'\n";
return 0;
}
Output:
output: 13, 42
1 gcc 4.6.1, boost 1_48