Boost spirit parsing objective-C like language - c++

I'm trying to use boost's spirit to reimplement the logos parsing perl script from iPhone jailbreaking development.
An example of input is:
%hook SBLockScreenView
-(void)setCustomSlideToUnlockText:(id)arg1
{
arg1 = #"Changed the slider";
%orig(arg1);
}
%end
I so far have:
namespace logos
{
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
struct class_hook
{
std::string class_name;
std::string method_signature;
std::string method_body;
};
template <typename Iterator>
struct class_hook_parser : qi::grammar<Iterator, class_hook(), ascii::space_type>
{
class_hook_parser() : class_hook_parser::base_type(start)
{
using qi::int_;
using qi::lit;
using qi::on_error;
using qi::fail;
using qi::double_;
using qi::lexeme;
using ascii::char_;
hooked_class %= lexeme[+(char_("a-zA-Z") - '-')];
method_sig %= lexeme[+(char_) - '{'];
method_body %= lexeme[+(char_ - '}')];
start %=
lit("%hook")
>> hooked_class
>> method_sig
>> method_body
>> lit("%end")
;
on_error<fail>
(
start,
boost::phoenix::ref(std::cout) << "Something errored!" << std::endl);
}
qi::rule<Iterator, std::string(), ascii::space_type> hooked_class;
qi::rule<Iterator, std::string(), ascii::space_type> method_sig;
qi::rule<Iterator, std::string(), ascii::space_type> method_body;
qi::rule<Iterator, class_hook(), ascii::space_type> start;
};
}
BOOST_FUSION_ADAPT_STRUCT(logos::class_hook,
(std::string, class_name)
(std::string, method_signature)
(std::string, method_body))
typedef std::string::const_iterator iterator_type;
typedef logos::class_hook_parser<iterator_type> class_hook_parser;
using boost::spirit::ascii::space;
std::string::const_iterator
iter = std::begin(tweak_source_code),
end = std::end(tweak_source_code);
class_hook_parser g;
logos::class_hook emp;
bool r = phrase_parse(iter, end, g, space, emp);
if (r) {
std::cout << "Got: " << boost::fusion::as_vector(emp) << std::endl;
}
else std::cout << "Something isn't working" << std::endl;
But this oddly only prints out the Something isn't working message, not the on_fail callback. Where is my mistake in the parsing and how can I get actually working and informative parse error messages?

Did you mean
+(char_ - '{')
instead of
+(char_) - '{'
And likely, you'd require the body to begin with that { that was rejected as part of the signature. Here's my fixed version:
hooked_class = +char_("a-zA-Z");
method_sig = +(char_ - '{');
method_body = '{' >> +(char_ - '}') >> '}';
Notes:
Dropping the skipper allows you to drop the lexeme[] directive too.
Rejecting - from the "a-zA-Z" set is useless (it's not in it...).
method_sig now includes all whitespace (including the trailing newline)
Use BOOST_SPIRIT_DEBUG to get insight in why your grammar works in mysterious ways
See also: Boost spirit skipper issues
Live On Coliru
//#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
namespace logos
{
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
struct class_hook
{
std::string class_name;
std::string method_signature;
std::string method_body;
};
template <typename Iterator>
struct class_hook_parser : qi::grammar<Iterator, class_hook(), ascii::space_type>
{
class_hook_parser() : class_hook_parser::base_type(start)
{
using qi::int_;
using qi::lit;
using qi::on_error;
using qi::fail;
using qi::double_;
using qi::lexeme;
using ascii::char_;
hooked_class = +char_("a-zA-Z");
method_sig = +(char_ - '{');
method_body = '{' >> +(char_ - '}') >> '}';
start = "%hook"
>> hooked_class
>> method_sig
>> method_body
>> "%end"
;
on_error<fail> (start,
boost::phoenix::ref(std::cout) << "Something errored\n"
);
BOOST_SPIRIT_DEBUG_NODES((hooked_class)(method_sig)(method_body)(start))
}
private:
qi::rule<Iterator, std::string()> hooked_class, method_sig, method_body;
qi::rule<Iterator, class_hook(), ascii::space_type> start;
};
}
BOOST_FUSION_ADAPT_STRUCT(logos::class_hook, class_name, method_signature, method_body)
int main() {
typedef std::string::const_iterator iterator_type;
typedef logos::class_hook_parser<iterator_type> class_hook_parser;
std::string const tweak_source_code = R"(
%hook SBLockScreenView
-(void)setCustomSlideToUnlockText:(id)arg1
{
arg1 = #"Changed the slider";
%orig(arg1);
}
%end
)";
using boost::spirit::ascii::space;
iterator_type iter = std::begin(tweak_source_code), end = std::end(tweak_source_code);
class_hook_parser g;
logos::class_hook emp;
bool r = phrase_parse(iter, end, g, space, emp);
if (r) {
std::cout << "Got: " << boost::fusion::as_vector(emp) << "\n";
} else {
std::cout << "Something isn't working\n";
}
}
Prints
Got: (SBLockScreenView -(void)setCustomSlideToUnlockText:(id)arg1
arg1 = #"Changed the slider";
%orig(arg1);
)

Related

Boost Spirit - How to match a string but not substrings [duplicate]

How can I prevent the Boost Spirit Symbol parser from accepting a keyword (symbol) when starts with a valid keyword (symbol). I would like the construct to fail parsing ‘ONEMORE’ as a whole and not succeed in parsing ‘ONE’ because that is a valid keyword and then fail on ‘MORE”.
Here is the actual output of the code below:
Keyword as a number: 1
Keyword as a number: 2
Keyword as a number: 1
Invalid keyword: MORETHREE
And this is what I like it to be:
Keyword as a number: 1
Keyword as a number: 2
Invalid keyword: ONEMORE
Keyword as a number: 3
The code is just a sample to get the point across.
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <iostream>
#include <string>
using namespace std;
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
void printNumber( unsigned u )
{
cout << "Keyword as a number: " << u << endl;
}
void printInvalidKeyword( const string &s )
{
cout << "Invalid keyword: " << s << endl;
}
template <typename Iterator>
struct keyword_parser : qi::grammar<Iterator, ascii::space_type>
{
struct mySymbols_ : qi::symbols<char, unsigned>
{
mySymbols_()
{
add
("ONE" , 1)
("TWO" , 2)
("THREE" , 2)
;
}
} mySymbols;
keyword_parser() : keyword_parser::base_type(start)
{
using qi::_1;
using qi::raw;
using ascii::char_;
start %= *(
mySymbols[&printNumber]
|
invalid[&printInvalidKeyword]
);
invalid = +char_;
}
qi::rule<Iterator, ascii::space_type> start;
qi::rule<Iterator, std::string(), ascii::space_type> invalid;
};
int main()
{
using boost::spirit::ascii::space;
typedef std::string::const_iterator iterator_type;
typedef keyword_parser<iterator_type> keyword_parser;
std::string s = "ONE TWO ONEMORE THREE";
iterator_type b = s.begin();
iterator_type e = s.end();
phrase_parse(b, e, keyword_parser(), space);
return 0;
}
Look at qi::repository::distinct or take some measures yourself:
start %= *(
keyword [cout << val("Keyword as a number: ") << _1 << endl]
| invalid [cout << val("Invalid keyword: ") << _1 << endl]
);
keyword = mySymbols >> !(char_("a-zA-Z0-9_"));
invalid = +ascii::graph;
The rules being declared as
qi::rule<Iterator, ascii::space_type> start;
// lexemes do not ignore embedded skippables
qi::rule<Iterator, int()> keyword;
qi::rule<Iterator, std::string()> invalid;
See it Live On Coliru
Prints:
Keyword as a number: 1
Keyword as a number: 2
Invalid keyword: ONEMORE
Keyword as a number: 2
Full source:
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <iostream>
#include <string>
using namespace std;
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;
namespace ascii = boost::spirit::ascii;
template <typename Iterator>
struct keyword_parser : qi::grammar<Iterator, ascii::space_type>
{
struct mySymbols_ : qi::symbols<char, unsigned>
{
mySymbols_()
{
add
("ONE" , 1)
("TWO" , 2)
("THREE" , 2)
;
}
} mySymbols;
keyword_parser() : keyword_parser::base_type(start)
{
using qi::_1;
using ascii::char_;
using phx::val;
start %= *(
keyword [cout << val("Keyword as a number: ") << _1 << endl]
| invalid [cout << val("Invalid keyword: ") << _1 << endl]
);
keyword = mySymbols >> !(char_("a-zA-Z0-9_"));
invalid = +ascii::graph;
}
qi::rule<Iterator, ascii::space_type> start;
// lexemes do not ignore embedded skippables
qi::rule<Iterator, int()> keyword;
qi::rule<Iterator, std::string()/*IMPLICIT LEXEME:, ascii::space_type*/> invalid;
};
int main()
{
using boost::spirit::ascii::space;
typedef std::string::const_iterator iterator_type;
typedef keyword_parser<iterator_type> keyword_parser;
std::string s = "ONE TWO ONEMORE THREE";
iterator_type b = s.begin();
iterator_type e = s.end();
phrase_parse(b, e, keyword_parser(), space);
return 0;
}

boost::spirit parsing into a fusion adapted structure optional but exclusive

If there's a structure:
struct record
{
std::string type;
std::string delimiter;
uint32_t length;
std::string name;
record()
{
type = "";
delimiter = "";
length = 0;
name = "";
}
};
Which is adapted using boost::fusion, and the below grammar:
struct record_parser : qi::grammar<Iterator, record(), ascii::space_type>
{
record_parser() : record_parser::base_type(start)
{
using qi::lit;
using qi::uint_;
using qi::lexeme;
using ascii::char_;
using ascii::blank;
using ascii::string;
using qi::attr;
using qi::eps;
type %= lexeme[+(char_ - (blank|char('(')))];
delimiter_double_quote %= char('(') >> lexeme[char('"') >> +(char_ - char('"')) >> char('"') ] >> char(')');
delimiter_single_quote %= char('(') >> lexeme[char('\'') >> +(char_ - char('\'')) >> char('\'')] >> char(')');
delimiter %= (delimiter_double_quote | delimiter_single_quote);
name %= lexeme[+(char_ - (blank|char(';')))] >> char(';');
length %= (char('(') >> uint_ >> char(')'));
start %=
eps >
lit("record")
>> char('{')
>> type
>> (delimiter | attr("")) >> (length | attr(0))
>> name
>> char('}')
;
}
qi::rule<Iterator, std::string(), ascii::space_type> type;
qi::rule<Iterator, std::string(), ascii::space_type> delimiter_double_quote;
qi::rule<Iterator, std::string(), ascii::space_type> delimiter_single_quote;
qi::rule<Iterator, std::string(), ascii::space_type> delimiter;
qi::rule<Iterator, uint32_t(), ascii::space_type> length;
qi::rule<Iterator, std::string(), ascii::space_type> name;
qi::rule<Iterator, record(), ascii::space_type> start;
};
I am looking to parse 'delimiter' and 'length' as optional. However, one of them has to be present, and if one is present, the other one should not exist.
For Example:
record { string(5) Alex; }
record { string("|") Alex; }
But Not:
record { string(5)("|") Alex; }
record { string Alex; }
I have attempted to do it this way, but compilation fails:
start %=
eps >
lit("record")
>> char('{')
>> type
>> ((delimiter >> attr(0)) | (attr("") >> length))
>> name
>> char('}')
;
Thank you for your help in advance. Below is the full source code:
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_object.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>
#include <string>
namespace client
{
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace phoenix = boost::phoenix;
struct record
{
std::string type;
std::string delimiter;
uint32_t length;
std::string name;
record()
{
type = "";
delimiter = "";
length = 0;
name = "";
}
};
}
BOOST_FUSION_ADAPT_STRUCT(
client::record,
(std::string, type)
(std::string, delimiter)
(uint32_t, length)
(std::string, name)
)
namespace client
{
template <typename Iterator>
struct record_parser : qi::grammar<Iterator, record(), ascii::space_type>
{
record_parser() : record_parser::base_type(start)
{
using qi::lit;
using qi::uint_;
using qi::lexeme;
using ascii::char_;
using ascii::blank;
using ascii::string;
using qi::attr;
using qi::eps;
type %= lexeme[+(char_ - (blank|char('(')))];
delimiter_double_quote %= char('(') >> lexeme[char('"') >> +(char_ - char('"')) >> char('"') ] >> char(')');
delimiter_single_quote %= char('(') >> lexeme[char('\'') >> +(char_ - char('\'')) >> char('\'')] >> char(')');
delimiter %= (delimiter_double_quote | delimiter_single_quote);
name %= lexeme[+(char_ - (blank|char(';')))] >> char(';');
length %= (char('(') >> uint_ >> char(')'));
start %=
eps >
lit("record")
>> char('{')
>> type
>> (delimiter | attr("")) >> (length | attr(0))
>> name
>> char('}')
;
}
qi::rule<Iterator, std::string(), ascii::space_type> type;
qi::rule<Iterator, std::string(), ascii::space_type> delimiter_double_quote;
qi::rule<Iterator, std::string(), ascii::space_type> delimiter_single_quote;
qi::rule<Iterator, std::string(), ascii::space_type> delimiter;
qi::rule<Iterator, uint32_t(), ascii::space_type> length;
qi::rule<Iterator, std::string(), ascii::space_type> name;
qi::rule<Iterator, record(), ascii::space_type> start;
};
}
////////////////////////////////////////////////////////////////////////////
// Main program
////////////////////////////////////////////////////////////////////////////
int main()
{
std::string storage = "record { string(5) Alex; }";
using boost::spirit::ascii::space;
typedef std::string::const_iterator iterator_type;
typedef client::record_parser<iterator_type> record_parser;
record_parser g; // Our grammar
client::record rec;
std::string::const_iterator iter = storage.begin();
std::string::const_iterator end = storage.end();
bool r = phrase_parse(iter, end, g, space, rec);
if (r && iter == end)
{
std::cout << boost::fusion::tuple_open('[');
std::cout << boost::fusion::tuple_close(']');
std::cout << boost::fusion::tuple_delimiter(", ");
std::cout << "-------------------------\n";
std::cout << "Parsing succeeded\n";
std::cout << "got: " << boost::fusion::as_vector(rec) << std::endl;
std::cout << "\n-------------------------\n";
}
else
{
std::string::const_iterator some = iter+30;
std::string context(iter, (some>end)?end:some);
std::cout << "-------------------------\n";
std::cout << "Parsing failed\n";
std::cout << "stopped at -->" << context << "...\n";
std::cout << "-------------------------\n";
}
return 0;
}
You can just write out the combinations:
>> (
delimiter >> attr(0)
| attr("") >> length
| attr("") >> attr(0)
)
The best way to make it work with automatic attribute propagation is to use an AST structure that is similar:
namespace client {
struct record {
std::string type;
struct param_t {
std::string delimiter;
uint32_t length = 0;
} param;
std::string name;
};
}
BOOST_FUSION_ADAPT_STRUCT(client::record::param_t, delimiter, length)
BOOST_FUSION_ADAPT_STRUCT(client::record, type, param, name)
Full Demo Live On Coliru
Note how much simpler the grammar has been made (all those char(' ') things are unnecessary; use lexemes only if you declare a skipper; use ~char_ instead of character set subtraction; use graph instead of char_ - space etc.).
type = +(graph - '(');
delimiter_double_quote = '"' >> +~char_('"') >> '"' ;
delimiter_single_quote = "'" >> +~char_("'") >> "'" ;
delimiter = '(' >> (delimiter_double_quote | delimiter_single_quote) >> ')';
name = +(graph - ';');
length = '(' >> uint_ >> ')';
start = eps > lit("record") >> '{'
>> type
>> (
delimiter >> attr(0)
| attr("") >> length
| attr("") >> attr(0)
)
>> name >> ';' >> '}'
;
Full code:
Live On Coliru
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>
#include <string>
namespace qi = boost::spirit::qi;
namespace client {
struct record {
std::string type;
struct param_t {
std::string delimiter;
uint32_t length = 0;
} param;
std::string name;
};
}
BOOST_FUSION_ADAPT_STRUCT(client::record::param_t, delimiter, length)
BOOST_FUSION_ADAPT_STRUCT(client::record, type, param, name)
namespace client {
std::ostream& operator<<(std::ostream& os, record::param_t const& v) { return os << boost::fusion::as_vector(v); }
std::ostream& operator<<(std::ostream& os, record const& v) { return os << boost::fusion::as_vector(v); }
}
namespace client
{
template <typename Iterator, typename Skipper = qi::ascii::space_type>
struct record_parser : qi::grammar<Iterator, record(), Skipper>
{
record_parser() : record_parser::base_type(start)
{
using namespace qi;
type = +(graph - '(');
delimiter_double_quote = '"' >> +~char_('"') >> '"' ;
delimiter_single_quote = "'" >> +~char_("'") >> "'" ;
delimiter = '(' >> (delimiter_double_quote | delimiter_single_quote) >> ')';
name = +(graph - ';');
length = '(' >> uint_ >> ')';
start = eps > lit("record") >> '{'
>> type
>> (
delimiter >> attr(0)
| attr("") >> length
| attr("") >> attr(0)
)
>> name >> ';' >> '}'
;
}
private:
qi::rule<Iterator, record(), Skipper> start;
qi::rule<Iterator, uint32_t(), Skipper> length;
qi::rule<Iterator, std::string(), Skipper> delimiter;
// lexemes
qi::rule<Iterator, std::string()> type, delimiter_double_quote, delimiter_single_quote, name;
};
}
int main()
{
for (std::string const storage : {
"record { string(5) Alex; }",
"record { string(\"|\") Alex; }",
})
{
typedef std::string::const_iterator iterator_type;
typedef client::record_parser<iterator_type> record_parser;
record_parser g; // Our grammar
client::record rec;
auto iter = storage.begin(), end = storage.end();
bool r = phrase_parse(iter, end, g, qi::ascii::space, rec);
if (r) {
std::cout << "Parsing succeeded: " << rec << std::endl;
} else {
std::cout << "Parsing failed\n";
}
if (iter != end) {
std::cout << "Remaining: '" << std::string(iter, end) << "'...\n";
}
}
}
Prints:
Parsing succeeded: (string ( 5) Alex)
Parsing succeeded: (string (| 0) Alex)
Because it's 2016, adding a X3 example too. Once again, taking the variant approach, which I find to be typical in Spirit code.
namespace AST {
struct record {
std::string type;
boost::variant<std::string, uint32_t> param;
std::string name;
};
}
BOOST_FUSION_ADAPT_STRUCT(AST::record, type, param, name)
namespace parser {
using namespace x3;
auto quoted = [](char q) { return q >> +~char_(q) >> q; };
static auto const type = +(graph - '(');
static auto const delimiter = '(' >> (quoted('"') | quoted('\'')) >> ')';
static auto const name = +(graph - ';');
static auto const length = '(' >> uint_ >> ')';
static auto const start = lit("record") >> '{' >> type >> (delimiter | length) >> name >> ';' >> '}';
}
That's all. The calling code is virtually unchanged:
int main()
{
for (std::string const storage : {
"record { string(5) Alex; }",
"record { string(\"|\") Alex; }",
"record { string Alex; }",
})
{
typedef std::string::const_iterator iterator_type;
AST::record rec;
auto iter = storage.begin(), end = storage.end();
bool r = phrase_parse(iter, end, parser::start, x3::ascii::space, rec);
if (r) {
std::cout << "Parsing succeeded: " << boost::fusion::as_vector(rec) << std::endl;
} else {
std::cout << "Parsing failed\n";
}
if (iter != end) {
std::cout << "Remaining: '" << std::string(iter, end) << "'\n";
}
}
}
Everything compiles a lot quicker and I'd not be surprised if the resultant code was at least twice as fast at runtime too.
Live On Coliru
#include <boost/spirit/home/x3.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
namespace x3 = boost::spirit::x3;
namespace AST {
struct record {
std::string type;
boost::variant<std::string, uint32_t> param;
std::string name;
};
}
BOOST_FUSION_ADAPT_STRUCT(AST::record, type, param, name)
namespace parser {
using namespace x3;
auto quoted = [](char q) { return q >> +~char_(q) >> q; };
static auto const type = +(graph - '(');
static auto const delimiter = '(' >> (quoted('"') | quoted('\'')) >> ')';
static auto const name = +(graph - ';');
static auto const length = '(' >> uint_ >> ')';
static auto const start = lit("record") >> '{' >> type >> (delimiter | length) >> name >> ';' >> '}';
}
#include <iostream>
#include <boost/fusion/include/io.hpp>
#include <boost/fusion/include/as_vector.hpp>
#include <boost/optional/optional_io.hpp>
int main()
{
for (std::string const storage : {
"record { string(5) Alex; }",
"record { string(\"|\") Alex; }",
"record { string Alex; }",
})
{
typedef std::string::const_iterator iterator_type;
AST::record rec;
auto iter = storage.begin(), end = storage.end();
bool r = phrase_parse(iter, end, parser::start, x3::ascii::space, rec);
if (r) {
std::cout << "Parsing succeeded: " << boost::fusion::as_vector(rec) << std::endl;
} else {
std::cout << "Parsing failed\n";
}
if (iter != end) {
std::cout << "Remaining: '" << std::string(iter, end) << "'\n";
}
}
}
Prints
Parsing succeeded: (string 5 Alex)
Parsing succeeded: (string | Alex)
Parsing failed
Remaining: 'record { string Alex; }'
sehe's first answer is perfect (or would be if he corrected what he realized in the comments), but I just wanted to add an explanation of the problem and a possible alternative. The code below is based on that excellent answer.
You have a couple of problems with the attributes of your start rule. The attribute you want to get is record which is basically tuple<string,string,uint32_t,string>. Let's see the attributes of several parsers:
Something similar (but simpler) to your original rule:
Attribute of: "lit("record") >> char_('{') >> type >> delimiter >> length >> name >> char_('}')"
tuple<char,string,string,uint32_t,string,char>
As you can see you have two extra char caused b your use of char_(has an attribute of char) instead of lit(has no attribute). omit[char_] could also work, but would be a little silly.
Let's change char_ to lit:
Attribute of: "lit("record") >> lit('{') >> type >> delimiter >> length >> name >> lit('}')"
tuple<string,string,uint32_t,string>
Which is what we want.
Your original rule with lit:
Attribute of: "lit("record") >> lit('{') >> type >> (delimiter | attr("")) >> (length | attr(0)) >> name >> lit('}')"
tuple<string,variant<string,char const (&)[1]>,variant<uint32_t,int>,string>
Since the branches of | aren't identical, you get variants instead of the attribute you want. (In this simple case everything works as if there were no variants though)
Let's remove the variants (since they cause errors in more complex scenarios):
Attribute of: "lit("record") >> lit('{') >> type >> (delimiter | attr(string())) >> (length | attr(uint32_t())) >> name >> lit('}')"
tuple<string,string,uint32_t,string>
This works in the cases you want but also when both are missing.
sehe's approach:
Attribute of: "lit("record") >> lit('{') >> type >> ((delimiter >> attr(uint32_t())) | (attr(string()) >> length)) >> name >> lit('}')"
tuple<string,tuple<string,uint32_t>,string>
Looking at this synthesized attribute you can see the need to create the param_t helper struct to make your record attribute match.
See on Coliru a way to "calculate" the previous attributes.
The possible alternative is a custom directive using boost::fusion::flatten_view. Keep in mind that this directive has very little testing so I would recommend the approach shown by sehe, but it seems to work (at least in this case).
The example in this question with this directive on Wandbox
Several other examples where this directive can be useful
flatten_directive.hpp
#pragma once
#include <boost/spirit/home/qi/meta_compiler.hpp>
#include <boost/spirit/home/qi/skip_over.hpp>
#include <boost/spirit/home/qi/parser.hpp>
#include <boost/spirit/home/support/unused.hpp>
#include <boost/spirit/home/support/common_terminals.hpp>
#include <boost/spirit/home/qi/detail/attributes.hpp>
#include <boost/spirit/home/support/info.hpp>
#include <boost/spirit/home/support/handles_container.hpp>
#include <boost/fusion/include/flatten_view.hpp>
#include <boost/fusion/include/for_each.hpp>
#include <boost/fusion/include/zip_view.hpp>
namespace custom
{
BOOST_SPIRIT_TERMINAL(flatten);
}
namespace boost {
namespace spirit
{
///////////////////////////////////////////////////////////////////////////
// Enablers
///////////////////////////////////////////////////////////////////////////
template <>
struct use_directive<qi::domain, custom::tag::flatten> // enables flatten
: mpl::true_ {};
}
}
namespace custom
{
template <typename Subject>
struct flatten_directive : boost::spirit::qi::unary_parser<flatten_directive<Subject> >
{
typedef Subject subject_type;
flatten_directive(Subject const& subject)
: subject(subject) {}
template <typename Context, typename Iterator>
struct attribute
{
typedef boost::fusion::flatten_view<typename
boost::spirit::traits::attribute_of<subject_type, Context, Iterator>::type>
type;//the attribute of the directive is a flatten_view of whatever is the attribute of the subject
};
template <typename Iterator, typename Context
, typename Skipper, typename Attribute>
bool parse(Iterator& first, Iterator const& last
, Context& context, Skipper const& skipper
, Attribute& attr) const
{
Iterator temp = first;
boost::spirit::qi::skip_over(first, last, skipper);
typename boost::spirit::traits::attribute_of<subject_type, Context, Iterator>::type original_attr;
if (subject.parse(first, last, context, skipper, original_attr))//parse normally
{
typename attribute<Context, Iterator>::type flattened_attr(original_attr);//flatten the attribute
typedef boost::fusion::vector<Attribute&,typename attribute<Context,Iterator>::type&> sequences;
boost::fusion::for_each(//assign to each element of Attribute the corresponding element of the flattened sequence
boost::fusion::zip_view<sequences>(
sequences(attr,flattened_attr)
)
,
[](const auto& pair)//substitute with a functor with templated operator() to support c++98/03
{
boost::spirit::traits::assign_to(boost::fusion::at_c<1>(pair),boost::fusion::at_c<0>(pair));
}
);
return true;
}
first = temp;
return false;
}
template <typename Context>
boost::spirit::info what(Context& context) const
{
return info("flatten", subject.what(context));
}
Subject subject;
};
}//custom
///////////////////////////////////////////////////////////////////////////
// Parser generators: make_xxx function (objects)
///////////////////////////////////////////////////////////////////////////
namespace boost {
namespace spirit {
namespace qi
{
template <typename Subject, typename Modifiers>
struct make_directive<custom::tag::flatten, Subject, Modifiers>
{
typedef custom::flatten_directive<Subject> result_type;
result_type operator()(unused_type, Subject const& subject, unused_type) const
{
return result_type(subject);
}
};
}
}
}
namespace boost {
namespace spirit {
namespace traits
{
///////////////////////////////////////////////////////////////////////////
template <typename Subject>
struct has_semantic_action<custom::flatten_directive<Subject> >
: unary_has_semantic_action<Subject> {};
///////////////////////////////////////////////////////////////////////////
template <typename Subject, typename Attribute, typename Context
, typename Iterator>
struct handles_container<custom::flatten_directive<Subject>, Attribute
, Context, Iterator>
: unary_handles_container<Subject, Attribute, Context, Iterator> {};
}
}
}
main.cpp
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>
#include <boost/fusion/include/flatten_view.hpp>
#include <boost/fusion/include/copy.hpp>
#include "flatten_directive.hpp"
#include <string>
namespace qi = boost::spirit::qi;
namespace client {
struct record {
std::string type;
std::string delimiter;
uint32_t length = 0;
std::string name;
};
}
BOOST_FUSION_ADAPT_STRUCT(client::record, type, delimiter, length, name)
namespace client {
std::ostream& operator<<(std::ostream& os, record const& v) { return os << boost::fusion::tuple_open('[') << boost::fusion::tuple_close(']') << boost::fusion::tuple_delimiter(", ") << boost::fusion::as_vector(v); }
}
namespace client
{
template <typename Iterator, typename Skipper = qi::ascii::space_type>
struct record_parser : qi::grammar<Iterator, record(), Skipper>
{
record_parser() : record_parser::base_type(start)
{
using namespace qi;
type = +(graph - '(');
delimiter_double_quote = '"' >> +~char_('"') >> '"';
delimiter_single_quote = "'" >> +~char_("'") >> "'";
delimiter = '(' >> (delimiter_double_quote | delimiter_single_quote) >> ')';
name = +(graph - ';');
length = '(' >> uint_ >> ')';
start =
custom::flatten[
lit("record")
>> '{'
>> type
>> (
delimiter >> attr(uint32_t())//the attributes of both branches must be exactly identical
| attr(std::string("")) >> length//const char[1]!=std::string int!=uint32_t
)
>> name
>> ';'
>> '}'
]
;
}
private:
qi::rule<Iterator, record(), Skipper> start;
qi::rule<Iterator, uint32_t(), Skipper> length;
qi::rule<Iterator, std::string(), Skipper> delimiter;
// lexemes
qi::rule<Iterator, std::string()> type, delimiter_double_quote, delimiter_single_quote, name;
};
}
int main()
{
for (std::string const storage : {
"record { string(5) Alex; }",
"record { string(\"|\") Alex; }",
"record { string Alex; }",
"record { string (\"|\")(5) Alex; }"
})
{
typedef std::string::const_iterator iterator_type;
typedef client::record_parser<iterator_type> record_parser;
record_parser g; // Our grammar
client::record rec;
auto iter = storage.begin(), end = storage.end();
bool r = phrase_parse(iter, end, g, qi::ascii::space, rec);
if (r) {
std::cout << "Parsing succeeded: " << rec << std::endl;
}
else {
std::cout << "Parsing failed\n";
}
if (iter != end) {
std::cout << "Remaining: '" << std::string(iter, end) << "'...\n";
}
}
}
Here's the more typical approach that parsed variant<std::string, uint32_t> so the AST reflects that only one can be present:
With Nil-Param
With the same misunderstanding as in my first answer, allowing both params to be optional:
Live On Coliru
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>
#include <boost/optional/optional_io.hpp>
#include <string>
namespace qi = boost::spirit::qi;
namespace client {
struct nil { friend std::ostream& operator<<(std::ostream& os, nil) { return os << "(nil)"; } };
struct record {
std::string type;
boost::variant<nil, std::string, uint32_t> param;
std::string name;
};
}
BOOST_FUSION_ADAPT_STRUCT(client::record, type, param, name)
namespace client
{
template <typename Iterator, typename Skipper = qi::ascii::space_type>
struct record_parser : qi::grammar<Iterator, record(), Skipper>
{
record_parser() : record_parser::base_type(start)
{
using namespace qi;
type = +(graph - '(');
delimiter_double_quote = '"' >> +~char_('"') >> '"' ;
delimiter_single_quote = "'" >> +~char_("'") >> "'" ;
delimiter = '(' >> (delimiter_double_quote | delimiter_single_quote) >> ')';
name = +(graph - ';');
length = '(' >> uint_ >> ')';
start = eps > lit("record") >> '{'
>> type
>> (delimiter | length | attr(nil{}))
>> name >> ';' >> '}'
;
}
private:
qi::rule<Iterator, record(), Skipper> start;
qi::rule<Iterator, uint32_t(), Skipper> length;
qi::rule<Iterator, std::string(), Skipper> delimiter;
// lexemes
qi::rule<Iterator, std::string()> type, delimiter_double_quote, delimiter_single_quote, name;
};
}
int main()
{
for (std::string const storage : {
"record { string(5) Alex; }",
"record { string(\"|\") Alex; }",
"record { string Alex; }",
})
{
typedef std::string::const_iterator iterator_type;
typedef client::record_parser<iterator_type> record_parser;
record_parser g; // Our grammar
client::record rec;
auto iter = storage.begin(), end = storage.end();
bool r = phrase_parse(iter, end, g, qi::ascii::space, rec);
if (r) {
std::cout << "Parsing succeeded: " << boost::fusion::as_vector(rec) << std::endl;
} else {
std::cout << "Parsing failed\n";
}
if (iter != end) {
std::cout << "Remaining: '" << std::string(iter, end) << "'...\n";
}
}
}
Prints
Parsing succeeded: (string 5 Alex)
Parsing succeeded: (string | Alex)
Parsing succeeded: (string (nil) Alex)
Without Nil-Param
Requiring exactly one:
Live On Coliru
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>
#include <boost/optional/optional_io.hpp>
#include <string>
namespace qi = boost::spirit::qi;
namespace client {
struct record {
std::string type;
boost::variant<std::string, uint32_t> param;
std::string name;
};
}
BOOST_FUSION_ADAPT_STRUCT(client::record, type, param, name)
namespace client
{
template <typename Iterator, typename Skipper = qi::ascii::space_type>
struct record_parser : qi::grammar<Iterator, record(), Skipper>
{
record_parser() : record_parser::base_type(start)
{
using namespace qi;
type = +(graph - '(');
delimiter_double_quote = '"' >> +~char_('"') >> '"' ;
delimiter_single_quote = "'" >> +~char_("'") >> "'" ;
delimiter = '(' >> (delimiter_double_quote | delimiter_single_quote) >> ')';
name = +(graph - ';');
length = '(' >> uint_ >> ')';
start = eps > lit("record") >> '{'
>> type
>> (delimiter | length)
>> name >> ';' >> '}'
;
}
private:
qi::rule<Iterator, record(), Skipper> start;
qi::rule<Iterator, uint32_t(), Skipper> length;
qi::rule<Iterator, std::string(), Skipper> delimiter;
// lexemes
qi::rule<Iterator, std::string()> type, delimiter_double_quote, delimiter_single_quote, name;
};
}
int main()
{
for (std::string const storage : {
"record { string(5) Alex; }",
"record { string(\"|\") Alex; }",
"record { string Alex; }",
})
{
typedef std::string::const_iterator iterator_type;
typedef client::record_parser<iterator_type> record_parser;
record_parser g; // Our grammar
client::record rec;
auto iter = storage.begin(), end = storage.end();
bool r = phrase_parse(iter, end, g, qi::ascii::space, rec);
if (r) {
std::cout << "Parsing succeeded: " << boost::fusion::as_vector(rec) << std::endl;
} else {
std::cout << "Parsing failed\n";
}
if (iter != end) {
std::cout << "Remaining: '" << std::string(iter, end) << "'...\n";
}
}
}
Prints
Parsing succeeded: (string 5 Alex)
Parsing succeeded: (string | Alex)
terminate called after throwing an instance of 'boost::exception_detail::clone_impl<boost::exception_detail::error_info_injector<boost::spirit::qi::expectation_failure<__gnu_cxx::__normal_iterator<char const*, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > > > >'
what(): boost::spirit::qi::expectation_failure

Prevent the Boost Spirit Symbol parser from accepting a keyword too early

How can I prevent the Boost Spirit Symbol parser from accepting a keyword (symbol) when starts with a valid keyword (symbol). I would like the construct to fail parsing ‘ONEMORE’ as a whole and not succeed in parsing ‘ONE’ because that is a valid keyword and then fail on ‘MORE”.
Here is the actual output of the code below:
Keyword as a number: 1
Keyword as a number: 2
Keyword as a number: 1
Invalid keyword: MORETHREE
And this is what I like it to be:
Keyword as a number: 1
Keyword as a number: 2
Invalid keyword: ONEMORE
Keyword as a number: 3
The code is just a sample to get the point across.
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <iostream>
#include <string>
using namespace std;
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
void printNumber( unsigned u )
{
cout << "Keyword as a number: " << u << endl;
}
void printInvalidKeyword( const string &s )
{
cout << "Invalid keyword: " << s << endl;
}
template <typename Iterator>
struct keyword_parser : qi::grammar<Iterator, ascii::space_type>
{
struct mySymbols_ : qi::symbols<char, unsigned>
{
mySymbols_()
{
add
("ONE" , 1)
("TWO" , 2)
("THREE" , 2)
;
}
} mySymbols;
keyword_parser() : keyword_parser::base_type(start)
{
using qi::_1;
using qi::raw;
using ascii::char_;
start %= *(
mySymbols[&printNumber]
|
invalid[&printInvalidKeyword]
);
invalid = +char_;
}
qi::rule<Iterator, ascii::space_type> start;
qi::rule<Iterator, std::string(), ascii::space_type> invalid;
};
int main()
{
using boost::spirit::ascii::space;
typedef std::string::const_iterator iterator_type;
typedef keyword_parser<iterator_type> keyword_parser;
std::string s = "ONE TWO ONEMORE THREE";
iterator_type b = s.begin();
iterator_type e = s.end();
phrase_parse(b, e, keyword_parser(), space);
return 0;
}
Look at qi::repository::distinct or take some measures yourself:
start %= *(
keyword [cout << val("Keyword as a number: ") << _1 << endl]
| invalid [cout << val("Invalid keyword: ") << _1 << endl]
);
keyword = mySymbols >> !(char_("a-zA-Z0-9_"));
invalid = +ascii::graph;
The rules being declared as
qi::rule<Iterator, ascii::space_type> start;
// lexemes do not ignore embedded skippables
qi::rule<Iterator, int()> keyword;
qi::rule<Iterator, std::string()> invalid;
See it Live On Coliru
Prints:
Keyword as a number: 1
Keyword as a number: 2
Invalid keyword: ONEMORE
Keyword as a number: 2
Full source:
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <iostream>
#include <string>
using namespace std;
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;
namespace ascii = boost::spirit::ascii;
template <typename Iterator>
struct keyword_parser : qi::grammar<Iterator, ascii::space_type>
{
struct mySymbols_ : qi::symbols<char, unsigned>
{
mySymbols_()
{
add
("ONE" , 1)
("TWO" , 2)
("THREE" , 2)
;
}
} mySymbols;
keyword_parser() : keyword_parser::base_type(start)
{
using qi::_1;
using ascii::char_;
using phx::val;
start %= *(
keyword [cout << val("Keyword as a number: ") << _1 << endl]
| invalid [cout << val("Invalid keyword: ") << _1 << endl]
);
keyword = mySymbols >> !(char_("a-zA-Z0-9_"));
invalid = +ascii::graph;
}
qi::rule<Iterator, ascii::space_type> start;
// lexemes do not ignore embedded skippables
qi::rule<Iterator, int()> keyword;
qi::rule<Iterator, std::string()/*IMPLICIT LEXEME:, ascii::space_type*/> invalid;
};
int main()
{
using boost::spirit::ascii::space;
typedef std::string::const_iterator iterator_type;
typedef keyword_parser<iterator_type> keyword_parser;
std::string s = "ONE TWO ONEMORE THREE";
iterator_type b = s.begin();
iterator_type e = s.end();
phrase_parse(b, e, keyword_parser(), space);
return 0;
}

How to get error position using the spirit parser

I wrote a simple parser with spirit, akin to json (but simpler and more specialised). By following the advice in here, I tried to implement error handling by tracking the error position. In particular, my parsing function is as follows
bool parse_properties(std::istream& is, const std::string &filename, PropertyList &pset)
{
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace classic = boost::spirit::classic;
typedef std::istreambuf_iterator<char> base_iterator_type;
base_iterator_type in_begin(is);
// convert input iterator to forward iterator, usable by spirit parser
typedef boost::spirit::multi_pass<base_iterator_type> forward_iterator_type;
forward_iterator_type fwd_begin = boost::spirit::make_default_multi_pass(in_begin);
forward_iterator_type fwd_end;
// wrap forward iterator with position iterator, to record the position
typedef classic::position_iterator2<forward_iterator_type> pos_iterator_type;
pos_iterator_type position_begin(fwd_begin, fwd_end, filename);
pos_iterator_type position_end;
qi::rule<pos_iterator_type> skipper = ascii::space |
'#' >> *(ascii::char_ - qi::eol) >> qi::eol;
property_set_grammar<pos_iterator_type, qi::rule<pos_iterator_type> > g;
bool r = false;
try {
r = phrase_parse(position_begin,
position_end,
g, skipper, pset);
}
catch(const qi::expectation_failure<pos_iterator_type>& e) {
const classic::file_position_base<std::string>& pos = e.first.get_position();
std::stringstream msg;
msg <<
"parse error at file " << pos.file <<
" line " << pos.line << " column " << pos.column << std::endl <<
"'" << e.first.get_currentline() << "'" << std::endl <<
std::setw(pos.column) << " " << "^- here";
throw std::runtime_error(msg.str());
}
return r;
}
Unfortunately, it does not work. Function phrase_parse always returns false immediately, both for correct and for incorrect files, and never raises any exception.
However, when I modify the above code to use a simple forward_iterator instead of the classic::position_iterator2 it works fine, but of course it does not track the error position. The very strange thing is that the original example in here works fine. So maybe the problem is related to my grammar. Here it follows:
template <typename Iterator, typename Skipper>
struct property_set_grammar : qi::grammar<Iterator, PropertyList(),
Skipper>
{
qi::rule<Iterator, Property(), Skipper> prop;
qi::rule<Iterator, std::string(), Skipper> name;
qi::rule<Iterator, std::string(), Skipper> type;
qi::rule<Iterator, std::string(), Skipper> value;
qi::rule<Iterator, std::string(), Skipper> value_simple;
qi::rule<Iterator, std::string(), Skipper> value_quoted;
qi::rule<Iterator, PropertyList(), Skipper> plist;
property_set_grammar() :
property_set_grammar::base_type(plist, "Set of Properties") {
using qi::lit;
using qi::alpha;
using qi::alnum;
using qi::lexeme;
using qi::char_;
name = lexeme[alpha >> *alnum];
type = lexeme[alpha >> *alnum];
value_simple = lexeme[*(alnum - lit('"'))];
value_quoted = lit('"') > lexeme[*(char_ - lit('"'))] > lit('"');
value = (value_quoted | value_simple);
prop = name >> '=' > value > ';';
plist = type >> '(' > name > ')' > '{' >> *(prop | plist) > '}' > ';';
}
};
I am using g++ (Ubuntu/Linaro 4.7.2-2ubuntu1) 4.7.2 and version 1.50 of the boost libraries.
Is there anything stupid that I am overlooking? If needed, I can provide the complete code (it's just a few files).
It seems that boost::spirit::position_iterator is bugged.
You can edit the headers of position_iterator as suggested in the answer here.
Or you can implement your own position_iterator, I did this by basically copy-and-paste the original code of boost::spirit::position_iterator, then remove some unneeded stuff.
Also, when parsing from istream, make sure to set the noskipws manip:
//is >> std::noskipws;
EDIT: Not needed with istreambuf_iterator

How can i implement const in Boost Spirit?

I am interested in Boost Spirit nowadays and trying to build something. Can we implement something like a const in C++ using Spirit? For instance, user will define an item like;
constant var PROG_LANG="Java";
"constant var" seems weird, I accept but you got the idea. I searched the internet but can't found anything about it.
What the BigBoss said :)
Only I'd do without the semantic actions - making it far less... verbose (See also Boost Spirit: "Semantic actions are evil"?):
vdef =
("constant" >> attr(true) | attr(false)) >>
"var" >> identifier >> '=' >> identifier_value >> ';' ;
That's all. This uses qi::attr to account for the default (missing constant keyword).
Here's a full demo with output:
http://liveworkspace.org/code/c9e4bef100d2249eb4d4b88205f85c4b
Output:
parse success: 'var myvariable = "has some value";'
data: false;myvariable;has some value;
parse success: 'constant var myvariable = "has some value";'
data: true;myvariable;has some value;
Code:
#include <boost/fusion/adapted.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/karma.hpp>
namespace qi = boost::spirit::qi;
namespace karma = boost::spirit::karma;
namespace phx = boost::phoenix;
struct var_definition {
bool is_constant;
std::string name;
std::string value;
var_definition() : is_constant( false ) {}
};
BOOST_FUSION_ADAPT_STRUCT(var_definition, (bool, is_constant)(std::string, name)(std::string, value))
void doParse(const std::string& input)
{
typedef std::string::const_iterator It;
qi::rule<It, std::string()> identifier, identifier_value;
qi::rule<It, var_definition(), qi::space_type> vdef;
{
using namespace qi;
identifier_value = '"' >> lexeme [ +~char_('"') ] > '"';
identifier = lexeme [ +graph ];
vdef =
("constant" >> attr(true) | attr(false)) >>
"var" >> identifier >> '=' >> identifier_value >> ';' ;
}
var_definition data;
It f(std::begin(input)), l(std::end(input));
bool ok = qi::phrase_parse(f,l,vdef,qi::space,data);
if (ok)
{
std::cout << "parse success: '" << input << "'\n";
std::cout << "data: " << karma::format_delimited(karma::auto_, ';', data) << "\n";
}
}
int main()
{
doParse("var myvariable = \"has some value\";");
doParse("constant var myvariable = \"has some value\";");
}
I don't get your question correctly, spirit is a parser and it has nothing to do with the meaning of constant it can only parse it, but if you mean parse an optional variable like constant then it can be something line:
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;
typedef std::string::const_iterator it;
struct var_definition {
bool is_constant;
std::string name;
std::string value;
var_definition() : is_constant( false ) {}
};
qi::rule<it, std::string()> identifier;
qi::rule<it, std::string()> identifier_value;
qi::rule<it, var_definition(), boost::spirit::ascii::space_type> vdef;
void mark_var_as_constant(var_definition& vd) {vd.is_constant=true;}
void set_var_name(var_definition& vd, std::string const& val) {vd.name=val;}
void set_var_value(var_definition& vd, std::string const& val) {vd.value=val;}
vdef %=
-qi::lit("constant")[phx::bind(mark_var_as_constant, qi::_val)] >>
qi::lit("var") >>
identifier[phx::bind(set_var_name, qi::_val, qi::_1)] >>
qi::char_('=') >>
identifier_value[phx::bind(set_var_value, qi::_val, qi::_1)] >>
qi::char_(';');
Of course there are other ways, for example:
(qi::lit("constant")[phx::bind(mark_var_as_constant, qi::_val)] | qi::eps)
And the easiest is:
qi::hold[ qi::lit("constant")[phx::bind(mark_var_as_constant, qi::_val)] ]