I want to find out whether I can detect function call using regex. The basic case is easy: somefunction(1, 2);
But what if I had code:
somefunction(someotherfunction(), someotherotherfunction());
or
somefunction(function () { return 1; }, function() {return 2;});
or
caller_function(somefunction(function () { return 1; }, function() {return 2;}))
In this case I need to match same number of opening braces and closing braces so that I can find end of call to somefunction
Is it possible?
Thanks in advance.
Your question is misleading. It's not as simple as you think.
First, the grammar isn't regular. Regular expressions are not the right tool.
Second, you ask "detecting function call" but the samples show anonymous function definitions, a totally different ball game.
Here's a start using Boost Spirit:
start = skip(space) [ fcall ];
fcall = ident >> '(' >> -args >> ')';
args = arg % ',';
arg = fdef | fcall;
fdef = lexeme["function"] >> '(' >> -formals >> ')' >> body;
formals = ident % ',';
identch = alpha | char_("_");
ident = identch >> *(identch|digit);
body = '{' >> *~char_('}') >> '}';
Which would map onto an AST like:
struct function_definition {
std::vector<std::string> formal_arguments;
std::string body;
};
struct function_call;
using argument = boost::variant<
function_definition,
boost::recursive_wrapper<function_call>
>;
struct function_call {
std::string name;
std::vector<argument> args;
};
DEMO
Live On Coliru
// #define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/io.hpp>
#include <boost/fusion/adapted/struct.hpp>
struct function_definition {
std::vector<std::string> formal_arguments;
std::string body;
};
struct function_call;
using argument = boost::variant<
function_definition,
boost::recursive_wrapper<function_call>
>;
struct function_call {
std::string name;
std::vector<argument> args;
};
BOOST_FUSION_ADAPT_STRUCT(function_call, name, args)
BOOST_FUSION_ADAPT_STRUCT(function_definition, formal_arguments, body)
namespace qi = boost::spirit::qi;
template <typename It>
struct Parser : qi::grammar<It, function_call()> {
Parser() : Parser::base_type(start) {
using namespace qi;
start = skip(space) [ fcall ];
fcall = ident >> '(' >> -args >> ')';
args = arg % ',';
arg = fdef | fcall;
fdef = lexeme["function"] >> '(' >> -formals >> ')' >> body;
formals = ident % ',';
identch = alpha | char_("_");
ident = identch >> *(identch|digit);
body = '{' >> *~char_('}') >> '}';
BOOST_SPIRIT_DEBUG_NODES((start)(fcall)(args)(arg)(fdef)(formals)(ident)(body))
}
private:
using Skipper = qi::space_type;
qi::rule<It, function_call()> start;
qi::rule<It, function_call(), Skipper> fcall;
qi::rule<It, argument(), Skipper> arg;
qi::rule<It, std::vector<argument>(), Skipper> args;
qi::rule<It, function_definition(), Skipper> fdef;
qi::rule<It, std::vector<std::string>(), Skipper> formals;
qi::rule<It, char()> identch;
qi::rule<It, std::string()> ident, body;
};
// for debug:
#include <experimental/iterator>
static inline std::ostream& operator<<(std::ostream& os, function_definition const& v) {
os << "function(";
std::copy(v.formal_arguments.begin(), v.formal_arguments.end(), std::experimental::make_ostream_joiner(os, ", "));
return os << ") {" << v.body << "}";
}
static inline std::ostream& operator<<(std::ostream& os, function_call const& v) {
os << v.name << "(";
std::copy(v.args.begin(), v.args.end(), std::experimental::make_ostream_joiner(os, ", "));
return os << ")";
}
int main() {
std::string const input = "caller_function(somefunction(function () { return 1; }, function() {return 2;}))";
using It = std::string::const_iterator;
Parser<It> const p;
It f = input.begin(), l = input.end();
function_call parsed;
bool ok = parse(f, l, p, parsed);
if (ok) {
std::cout << "Parsed ok: " << parsed << "\n";
} else {
std::cout << "Parse failed\n";
}
if (f!=l)
std::cout << "Remaining unparsed input: '" << std::string(f,l) << "'\n";
}
Prints
Parsed ok: caller_function(somefunction(function() { return 1; }, function() {return 2;}))
If there's a structure:
struct record
{
std::string type;
std::string delimiter;
uint32_t length;
std::string name;
record()
{
type = "";
delimiter = "";
length = 0;
name = "";
}
};
Which is adapted using boost::fusion, and the below grammar:
struct record_parser : qi::grammar<Iterator, record(), ascii::space_type>
{
record_parser() : record_parser::base_type(start)
{
using qi::lit;
using qi::uint_;
using qi::lexeme;
using ascii::char_;
using ascii::blank;
using ascii::string;
using qi::attr;
using qi::eps;
type %= lexeme[+(char_ - (blank|char('(')))];
delimiter_double_quote %= char('(') >> lexeme[char('"') >> +(char_ - char('"')) >> char('"') ] >> char(')');
delimiter_single_quote %= char('(') >> lexeme[char('\'') >> +(char_ - char('\'')) >> char('\'')] >> char(')');
delimiter %= (delimiter_double_quote | delimiter_single_quote);
name %= lexeme[+(char_ - (blank|char(';')))] >> char(';');
length %= (char('(') >> uint_ >> char(')'));
start %=
eps >
lit("record")
>> char('{')
>> type
>> (delimiter | attr("")) >> (length | attr(0))
>> name
>> char('}')
;
}
qi::rule<Iterator, std::string(), ascii::space_type> type;
qi::rule<Iterator, std::string(), ascii::space_type> delimiter_double_quote;
qi::rule<Iterator, std::string(), ascii::space_type> delimiter_single_quote;
qi::rule<Iterator, std::string(), ascii::space_type> delimiter;
qi::rule<Iterator, uint32_t(), ascii::space_type> length;
qi::rule<Iterator, std::string(), ascii::space_type> name;
qi::rule<Iterator, record(), ascii::space_type> start;
};
I am looking to parse 'delimiter' and 'length' as optional. However, one of them has to be present, and if one is present, the other one should not exist.
For Example:
record { string(5) Alex; }
record { string("|") Alex; }
But Not:
record { string(5)("|") Alex; }
record { string Alex; }
I have attempted to do it this way, but compilation fails:
start %=
eps >
lit("record")
>> char('{')
>> type
>> ((delimiter >> attr(0)) | (attr("") >> length))
>> name
>> char('}')
;
Thank you for your help in advance. Below is the full source code:
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_object.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>
#include <string>
namespace client
{
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace phoenix = boost::phoenix;
struct record
{
std::string type;
std::string delimiter;
uint32_t length;
std::string name;
record()
{
type = "";
delimiter = "";
length = 0;
name = "";
}
};
}
BOOST_FUSION_ADAPT_STRUCT(
client::record,
(std::string, type)
(std::string, delimiter)
(uint32_t, length)
(std::string, name)
)
namespace client
{
template <typename Iterator>
struct record_parser : qi::grammar<Iterator, record(), ascii::space_type>
{
record_parser() : record_parser::base_type(start)
{
using qi::lit;
using qi::uint_;
using qi::lexeme;
using ascii::char_;
using ascii::blank;
using ascii::string;
using qi::attr;
using qi::eps;
type %= lexeme[+(char_ - (blank|char('(')))];
delimiter_double_quote %= char('(') >> lexeme[char('"') >> +(char_ - char('"')) >> char('"') ] >> char(')');
delimiter_single_quote %= char('(') >> lexeme[char('\'') >> +(char_ - char('\'')) >> char('\'')] >> char(')');
delimiter %= (delimiter_double_quote | delimiter_single_quote);
name %= lexeme[+(char_ - (blank|char(';')))] >> char(';');
length %= (char('(') >> uint_ >> char(')'));
start %=
eps >
lit("record")
>> char('{')
>> type
>> (delimiter | attr("")) >> (length | attr(0))
>> name
>> char('}')
;
}
qi::rule<Iterator, std::string(), ascii::space_type> type;
qi::rule<Iterator, std::string(), ascii::space_type> delimiter_double_quote;
qi::rule<Iterator, std::string(), ascii::space_type> delimiter_single_quote;
qi::rule<Iterator, std::string(), ascii::space_type> delimiter;
qi::rule<Iterator, uint32_t(), ascii::space_type> length;
qi::rule<Iterator, std::string(), ascii::space_type> name;
qi::rule<Iterator, record(), ascii::space_type> start;
};
}
////////////////////////////////////////////////////////////////////////////
// Main program
////////////////////////////////////////////////////////////////////////////
int main()
{
std::string storage = "record { string(5) Alex; }";
using boost::spirit::ascii::space;
typedef std::string::const_iterator iterator_type;
typedef client::record_parser<iterator_type> record_parser;
record_parser g; // Our grammar
client::record rec;
std::string::const_iterator iter = storage.begin();
std::string::const_iterator end = storage.end();
bool r = phrase_parse(iter, end, g, space, rec);
if (r && iter == end)
{
std::cout << boost::fusion::tuple_open('[');
std::cout << boost::fusion::tuple_close(']');
std::cout << boost::fusion::tuple_delimiter(", ");
std::cout << "-------------------------\n";
std::cout << "Parsing succeeded\n";
std::cout << "got: " << boost::fusion::as_vector(rec) << std::endl;
std::cout << "\n-------------------------\n";
}
else
{
std::string::const_iterator some = iter+30;
std::string context(iter, (some>end)?end:some);
std::cout << "-------------------------\n";
std::cout << "Parsing failed\n";
std::cout << "stopped at -->" << context << "...\n";
std::cout << "-------------------------\n";
}
return 0;
}
You can just write out the combinations:
>> (
delimiter >> attr(0)
| attr("") >> length
| attr("") >> attr(0)
)
The best way to make it work with automatic attribute propagation is to use an AST structure that is similar:
namespace client {
struct record {
std::string type;
struct param_t {
std::string delimiter;
uint32_t length = 0;
} param;
std::string name;
};
}
BOOST_FUSION_ADAPT_STRUCT(client::record::param_t, delimiter, length)
BOOST_FUSION_ADAPT_STRUCT(client::record, type, param, name)
Full Demo Live On Coliru
Note how much simpler the grammar has been made (all those char(' ') things are unnecessary; use lexemes only if you declare a skipper; use ~char_ instead of character set subtraction; use graph instead of char_ - space etc.).
type = +(graph - '(');
delimiter_double_quote = '"' >> +~char_('"') >> '"' ;
delimiter_single_quote = "'" >> +~char_("'") >> "'" ;
delimiter = '(' >> (delimiter_double_quote | delimiter_single_quote) >> ')';
name = +(graph - ';');
length = '(' >> uint_ >> ')';
start = eps > lit("record") >> '{'
>> type
>> (
delimiter >> attr(0)
| attr("") >> length
| attr("") >> attr(0)
)
>> name >> ';' >> '}'
;
Full code:
Live On Coliru
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>
#include <string>
namespace qi = boost::spirit::qi;
namespace client {
struct record {
std::string type;
struct param_t {
std::string delimiter;
uint32_t length = 0;
} param;
std::string name;
};
}
BOOST_FUSION_ADAPT_STRUCT(client::record::param_t, delimiter, length)
BOOST_FUSION_ADAPT_STRUCT(client::record, type, param, name)
namespace client {
std::ostream& operator<<(std::ostream& os, record::param_t const& v) { return os << boost::fusion::as_vector(v); }
std::ostream& operator<<(std::ostream& os, record const& v) { return os << boost::fusion::as_vector(v); }
}
namespace client
{
template <typename Iterator, typename Skipper = qi::ascii::space_type>
struct record_parser : qi::grammar<Iterator, record(), Skipper>
{
record_parser() : record_parser::base_type(start)
{
using namespace qi;
type = +(graph - '(');
delimiter_double_quote = '"' >> +~char_('"') >> '"' ;
delimiter_single_quote = "'" >> +~char_("'") >> "'" ;
delimiter = '(' >> (delimiter_double_quote | delimiter_single_quote) >> ')';
name = +(graph - ';');
length = '(' >> uint_ >> ')';
start = eps > lit("record") >> '{'
>> type
>> (
delimiter >> attr(0)
| attr("") >> length
| attr("") >> attr(0)
)
>> name >> ';' >> '}'
;
}
private:
qi::rule<Iterator, record(), Skipper> start;
qi::rule<Iterator, uint32_t(), Skipper> length;
qi::rule<Iterator, std::string(), Skipper> delimiter;
// lexemes
qi::rule<Iterator, std::string()> type, delimiter_double_quote, delimiter_single_quote, name;
};
}
int main()
{
for (std::string const storage : {
"record { string(5) Alex; }",
"record { string(\"|\") Alex; }",
})
{
typedef std::string::const_iterator iterator_type;
typedef client::record_parser<iterator_type> record_parser;
record_parser g; // Our grammar
client::record rec;
auto iter = storage.begin(), end = storage.end();
bool r = phrase_parse(iter, end, g, qi::ascii::space, rec);
if (r) {
std::cout << "Parsing succeeded: " << rec << std::endl;
} else {
std::cout << "Parsing failed\n";
}
if (iter != end) {
std::cout << "Remaining: '" << std::string(iter, end) << "'...\n";
}
}
}
Prints:
Parsing succeeded: (string ( 5) Alex)
Parsing succeeded: (string (| 0) Alex)
Because it's 2016, adding a X3 example too. Once again, taking the variant approach, which I find to be typical in Spirit code.
namespace AST {
struct record {
std::string type;
boost::variant<std::string, uint32_t> param;
std::string name;
};
}
BOOST_FUSION_ADAPT_STRUCT(AST::record, type, param, name)
namespace parser {
using namespace x3;
auto quoted = [](char q) { return q >> +~char_(q) >> q; };
static auto const type = +(graph - '(');
static auto const delimiter = '(' >> (quoted('"') | quoted('\'')) >> ')';
static auto const name = +(graph - ';');
static auto const length = '(' >> uint_ >> ')';
static auto const start = lit("record") >> '{' >> type >> (delimiter | length) >> name >> ';' >> '}';
}
That's all. The calling code is virtually unchanged:
int main()
{
for (std::string const storage : {
"record { string(5) Alex; }",
"record { string(\"|\") Alex; }",
"record { string Alex; }",
})
{
typedef std::string::const_iterator iterator_type;
AST::record rec;
auto iter = storage.begin(), end = storage.end();
bool r = phrase_parse(iter, end, parser::start, x3::ascii::space, rec);
if (r) {
std::cout << "Parsing succeeded: " << boost::fusion::as_vector(rec) << std::endl;
} else {
std::cout << "Parsing failed\n";
}
if (iter != end) {
std::cout << "Remaining: '" << std::string(iter, end) << "'\n";
}
}
}
Everything compiles a lot quicker and I'd not be surprised if the resultant code was at least twice as fast at runtime too.
Live On Coliru
#include <boost/spirit/home/x3.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
namespace x3 = boost::spirit::x3;
namespace AST {
struct record {
std::string type;
boost::variant<std::string, uint32_t> param;
std::string name;
};
}
BOOST_FUSION_ADAPT_STRUCT(AST::record, type, param, name)
namespace parser {
using namespace x3;
auto quoted = [](char q) { return q >> +~char_(q) >> q; };
static auto const type = +(graph - '(');
static auto const delimiter = '(' >> (quoted('"') | quoted('\'')) >> ')';
static auto const name = +(graph - ';');
static auto const length = '(' >> uint_ >> ')';
static auto const start = lit("record") >> '{' >> type >> (delimiter | length) >> name >> ';' >> '}';
}
#include <iostream>
#include <boost/fusion/include/io.hpp>
#include <boost/fusion/include/as_vector.hpp>
#include <boost/optional/optional_io.hpp>
int main()
{
for (std::string const storage : {
"record { string(5) Alex; }",
"record { string(\"|\") Alex; }",
"record { string Alex; }",
})
{
typedef std::string::const_iterator iterator_type;
AST::record rec;
auto iter = storage.begin(), end = storage.end();
bool r = phrase_parse(iter, end, parser::start, x3::ascii::space, rec);
if (r) {
std::cout << "Parsing succeeded: " << boost::fusion::as_vector(rec) << std::endl;
} else {
std::cout << "Parsing failed\n";
}
if (iter != end) {
std::cout << "Remaining: '" << std::string(iter, end) << "'\n";
}
}
}
Prints
Parsing succeeded: (string 5 Alex)
Parsing succeeded: (string | Alex)
Parsing failed
Remaining: 'record { string Alex; }'
sehe's first answer is perfect (or would be if he corrected what he realized in the comments), but I just wanted to add an explanation of the problem and a possible alternative. The code below is based on that excellent answer.
You have a couple of problems with the attributes of your start rule. The attribute you want to get is record which is basically tuple<string,string,uint32_t,string>. Let's see the attributes of several parsers:
Something similar (but simpler) to your original rule:
Attribute of: "lit("record") >> char_('{') >> type >> delimiter >> length >> name >> char_('}')"
tuple<char,string,string,uint32_t,string,char>
As you can see you have two extra char caused b your use of char_(has an attribute of char) instead of lit(has no attribute). omit[char_] could also work, but would be a little silly.
Let's change char_ to lit:
Attribute of: "lit("record") >> lit('{') >> type >> delimiter >> length >> name >> lit('}')"
tuple<string,string,uint32_t,string>
Which is what we want.
Your original rule with lit:
Attribute of: "lit("record") >> lit('{') >> type >> (delimiter | attr("")) >> (length | attr(0)) >> name >> lit('}')"
tuple<string,variant<string,char const (&)[1]>,variant<uint32_t,int>,string>
Since the branches of | aren't identical, you get variants instead of the attribute you want. (In this simple case everything works as if there were no variants though)
Let's remove the variants (since they cause errors in more complex scenarios):
Attribute of: "lit("record") >> lit('{') >> type >> (delimiter | attr(string())) >> (length | attr(uint32_t())) >> name >> lit('}')"
tuple<string,string,uint32_t,string>
This works in the cases you want but also when both are missing.
sehe's approach:
Attribute of: "lit("record") >> lit('{') >> type >> ((delimiter >> attr(uint32_t())) | (attr(string()) >> length)) >> name >> lit('}')"
tuple<string,tuple<string,uint32_t>,string>
Looking at this synthesized attribute you can see the need to create the param_t helper struct to make your record attribute match.
See on Coliru a way to "calculate" the previous attributes.
The possible alternative is a custom directive using boost::fusion::flatten_view. Keep in mind that this directive has very little testing so I would recommend the approach shown by sehe, but it seems to work (at least in this case).
The example in this question with this directive on Wandbox
Several other examples where this directive can be useful
flatten_directive.hpp
#pragma once
#include <boost/spirit/home/qi/meta_compiler.hpp>
#include <boost/spirit/home/qi/skip_over.hpp>
#include <boost/spirit/home/qi/parser.hpp>
#include <boost/spirit/home/support/unused.hpp>
#include <boost/spirit/home/support/common_terminals.hpp>
#include <boost/spirit/home/qi/detail/attributes.hpp>
#include <boost/spirit/home/support/info.hpp>
#include <boost/spirit/home/support/handles_container.hpp>
#include <boost/fusion/include/flatten_view.hpp>
#include <boost/fusion/include/for_each.hpp>
#include <boost/fusion/include/zip_view.hpp>
namespace custom
{
BOOST_SPIRIT_TERMINAL(flatten);
}
namespace boost {
namespace spirit
{
///////////////////////////////////////////////////////////////////////////
// Enablers
///////////////////////////////////////////////////////////////////////////
template <>
struct use_directive<qi::domain, custom::tag::flatten> // enables flatten
: mpl::true_ {};
}
}
namespace custom
{
template <typename Subject>
struct flatten_directive : boost::spirit::qi::unary_parser<flatten_directive<Subject> >
{
typedef Subject subject_type;
flatten_directive(Subject const& subject)
: subject(subject) {}
template <typename Context, typename Iterator>
struct attribute
{
typedef boost::fusion::flatten_view<typename
boost::spirit::traits::attribute_of<subject_type, Context, Iterator>::type>
type;//the attribute of the directive is a flatten_view of whatever is the attribute of the subject
};
template <typename Iterator, typename Context
, typename Skipper, typename Attribute>
bool parse(Iterator& first, Iterator const& last
, Context& context, Skipper const& skipper
, Attribute& attr) const
{
Iterator temp = first;
boost::spirit::qi::skip_over(first, last, skipper);
typename boost::spirit::traits::attribute_of<subject_type, Context, Iterator>::type original_attr;
if (subject.parse(first, last, context, skipper, original_attr))//parse normally
{
typename attribute<Context, Iterator>::type flattened_attr(original_attr);//flatten the attribute
typedef boost::fusion::vector<Attribute&,typename attribute<Context,Iterator>::type&> sequences;
boost::fusion::for_each(//assign to each element of Attribute the corresponding element of the flattened sequence
boost::fusion::zip_view<sequences>(
sequences(attr,flattened_attr)
)
,
[](const auto& pair)//substitute with a functor with templated operator() to support c++98/03
{
boost::spirit::traits::assign_to(boost::fusion::at_c<1>(pair),boost::fusion::at_c<0>(pair));
}
);
return true;
}
first = temp;
return false;
}
template <typename Context>
boost::spirit::info what(Context& context) const
{
return info("flatten", subject.what(context));
}
Subject subject;
};
}//custom
///////////////////////////////////////////////////////////////////////////
// Parser generators: make_xxx function (objects)
///////////////////////////////////////////////////////////////////////////
namespace boost {
namespace spirit {
namespace qi
{
template <typename Subject, typename Modifiers>
struct make_directive<custom::tag::flatten, Subject, Modifiers>
{
typedef custom::flatten_directive<Subject> result_type;
result_type operator()(unused_type, Subject const& subject, unused_type) const
{
return result_type(subject);
}
};
}
}
}
namespace boost {
namespace spirit {
namespace traits
{
///////////////////////////////////////////////////////////////////////////
template <typename Subject>
struct has_semantic_action<custom::flatten_directive<Subject> >
: unary_has_semantic_action<Subject> {};
///////////////////////////////////////////////////////////////////////////
template <typename Subject, typename Attribute, typename Context
, typename Iterator>
struct handles_container<custom::flatten_directive<Subject>, Attribute
, Context, Iterator>
: unary_handles_container<Subject, Attribute, Context, Iterator> {};
}
}
}
main.cpp
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>
#include <boost/fusion/include/flatten_view.hpp>
#include <boost/fusion/include/copy.hpp>
#include "flatten_directive.hpp"
#include <string>
namespace qi = boost::spirit::qi;
namespace client {
struct record {
std::string type;
std::string delimiter;
uint32_t length = 0;
std::string name;
};
}
BOOST_FUSION_ADAPT_STRUCT(client::record, type, delimiter, length, name)
namespace client {
std::ostream& operator<<(std::ostream& os, record const& v) { return os << boost::fusion::tuple_open('[') << boost::fusion::tuple_close(']') << boost::fusion::tuple_delimiter(", ") << boost::fusion::as_vector(v); }
}
namespace client
{
template <typename Iterator, typename Skipper = qi::ascii::space_type>
struct record_parser : qi::grammar<Iterator, record(), Skipper>
{
record_parser() : record_parser::base_type(start)
{
using namespace qi;
type = +(graph - '(');
delimiter_double_quote = '"' >> +~char_('"') >> '"';
delimiter_single_quote = "'" >> +~char_("'") >> "'";
delimiter = '(' >> (delimiter_double_quote | delimiter_single_quote) >> ')';
name = +(graph - ';');
length = '(' >> uint_ >> ')';
start =
custom::flatten[
lit("record")
>> '{'
>> type
>> (
delimiter >> attr(uint32_t())//the attributes of both branches must be exactly identical
| attr(std::string("")) >> length//const char[1]!=std::string int!=uint32_t
)
>> name
>> ';'
>> '}'
]
;
}
private:
qi::rule<Iterator, record(), Skipper> start;
qi::rule<Iterator, uint32_t(), Skipper> length;
qi::rule<Iterator, std::string(), Skipper> delimiter;
// lexemes
qi::rule<Iterator, std::string()> type, delimiter_double_quote, delimiter_single_quote, name;
};
}
int main()
{
for (std::string const storage : {
"record { string(5) Alex; }",
"record { string(\"|\") Alex; }",
"record { string Alex; }",
"record { string (\"|\")(5) Alex; }"
})
{
typedef std::string::const_iterator iterator_type;
typedef client::record_parser<iterator_type> record_parser;
record_parser g; // Our grammar
client::record rec;
auto iter = storage.begin(), end = storage.end();
bool r = phrase_parse(iter, end, g, qi::ascii::space, rec);
if (r) {
std::cout << "Parsing succeeded: " << rec << std::endl;
}
else {
std::cout << "Parsing failed\n";
}
if (iter != end) {
std::cout << "Remaining: '" << std::string(iter, end) << "'...\n";
}
}
}
Here's the more typical approach that parsed variant<std::string, uint32_t> so the AST reflects that only one can be present:
With Nil-Param
With the same misunderstanding as in my first answer, allowing both params to be optional:
Live On Coliru
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>
#include <boost/optional/optional_io.hpp>
#include <string>
namespace qi = boost::spirit::qi;
namespace client {
struct nil { friend std::ostream& operator<<(std::ostream& os, nil) { return os << "(nil)"; } };
struct record {
std::string type;
boost::variant<nil, std::string, uint32_t> param;
std::string name;
};
}
BOOST_FUSION_ADAPT_STRUCT(client::record, type, param, name)
namespace client
{
template <typename Iterator, typename Skipper = qi::ascii::space_type>
struct record_parser : qi::grammar<Iterator, record(), Skipper>
{
record_parser() : record_parser::base_type(start)
{
using namespace qi;
type = +(graph - '(');
delimiter_double_quote = '"' >> +~char_('"') >> '"' ;
delimiter_single_quote = "'" >> +~char_("'") >> "'" ;
delimiter = '(' >> (delimiter_double_quote | delimiter_single_quote) >> ')';
name = +(graph - ';');
length = '(' >> uint_ >> ')';
start = eps > lit("record") >> '{'
>> type
>> (delimiter | length | attr(nil{}))
>> name >> ';' >> '}'
;
}
private:
qi::rule<Iterator, record(), Skipper> start;
qi::rule<Iterator, uint32_t(), Skipper> length;
qi::rule<Iterator, std::string(), Skipper> delimiter;
// lexemes
qi::rule<Iterator, std::string()> type, delimiter_double_quote, delimiter_single_quote, name;
};
}
int main()
{
for (std::string const storage : {
"record { string(5) Alex; }",
"record { string(\"|\") Alex; }",
"record { string Alex; }",
})
{
typedef std::string::const_iterator iterator_type;
typedef client::record_parser<iterator_type> record_parser;
record_parser g; // Our grammar
client::record rec;
auto iter = storage.begin(), end = storage.end();
bool r = phrase_parse(iter, end, g, qi::ascii::space, rec);
if (r) {
std::cout << "Parsing succeeded: " << boost::fusion::as_vector(rec) << std::endl;
} else {
std::cout << "Parsing failed\n";
}
if (iter != end) {
std::cout << "Remaining: '" << std::string(iter, end) << "'...\n";
}
}
}
Prints
Parsing succeeded: (string 5 Alex)
Parsing succeeded: (string | Alex)
Parsing succeeded: (string (nil) Alex)
Without Nil-Param
Requiring exactly one:
Live On Coliru
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>
#include <boost/optional/optional_io.hpp>
#include <string>
namespace qi = boost::spirit::qi;
namespace client {
struct record {
std::string type;
boost::variant<std::string, uint32_t> param;
std::string name;
};
}
BOOST_FUSION_ADAPT_STRUCT(client::record, type, param, name)
namespace client
{
template <typename Iterator, typename Skipper = qi::ascii::space_type>
struct record_parser : qi::grammar<Iterator, record(), Skipper>
{
record_parser() : record_parser::base_type(start)
{
using namespace qi;
type = +(graph - '(');
delimiter_double_quote = '"' >> +~char_('"') >> '"' ;
delimiter_single_quote = "'" >> +~char_("'") >> "'" ;
delimiter = '(' >> (delimiter_double_quote | delimiter_single_quote) >> ')';
name = +(graph - ';');
length = '(' >> uint_ >> ')';
start = eps > lit("record") >> '{'
>> type
>> (delimiter | length)
>> name >> ';' >> '}'
;
}
private:
qi::rule<Iterator, record(), Skipper> start;
qi::rule<Iterator, uint32_t(), Skipper> length;
qi::rule<Iterator, std::string(), Skipper> delimiter;
// lexemes
qi::rule<Iterator, std::string()> type, delimiter_double_quote, delimiter_single_quote, name;
};
}
int main()
{
for (std::string const storage : {
"record { string(5) Alex; }",
"record { string(\"|\") Alex; }",
"record { string Alex; }",
})
{
typedef std::string::const_iterator iterator_type;
typedef client::record_parser<iterator_type> record_parser;
record_parser g; // Our grammar
client::record rec;
auto iter = storage.begin(), end = storage.end();
bool r = phrase_parse(iter, end, g, qi::ascii::space, rec);
if (r) {
std::cout << "Parsing succeeded: " << boost::fusion::as_vector(rec) << std::endl;
} else {
std::cout << "Parsing failed\n";
}
if (iter != end) {
std::cout << "Remaining: '" << std::string(iter, end) << "'...\n";
}
}
}
Prints
Parsing succeeded: (string 5 Alex)
Parsing succeeded: (string | Alex)
terminate called after throwing an instance of 'boost::exception_detail::clone_impl<boost::exception_detail::error_info_injector<boost::spirit::qi::expectation_failure<__gnu_cxx::__normal_iterator<char const*, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > > > >'
what(): boost::spirit::qi::expectation_failure
So I have this, rather ugly rule in my boost::spirit parser;
qi::rule<Iterator, bool()> empty_brackets;
...
empty_brackets = (qi::token(LEFT_BRACKET) >> qi::token(RIGHT_BRACKET))
[ qi::_val = true ]
;
The rule is used as;
array_typeexp = (primitive_typeexp >> +(empty_brackets))
So what I really want, for the second argument (qi::_2), isn't a std::vector<bool>, but rather an unsigned int, describing the number of times 'empty_brackets' was matched.
How would I go about achieving this?
I'm really unsure what you could need this for, but, hey :/
I'd suggest
empty_brackets = lit('(') >> lit(')');
n_empty_brackets = eps [ _val = 0 ] >> +empty_brackets [ ++_val ];
array_typeexp = primitive_typeexp >> n_empty_brackets;
For simplicity I have made primitive_typeexp a "no-op", and therefore the outermost rules return just the unsigned value in this simple setup:
qi::rule<It, unsigned(), Skipper> array_typeexp, n_empty_brackets;
qi::rule<It, Skipper> empty_brackets, primitive_typeexp;
See a complete sample Live on Coliru
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
namespace qi = boost::spirit::qi;
template <typename It, typename Skipper = qi::space_type>
struct parser : qi::grammar<It, unsigned(), Skipper>
{
parser() : parser::base_type(array_typeexp)
{
using namespace qi;
empty_brackets = lit('(') >> lit(')');
n_empty_brackets = eps [ _val = 0 ] >> +empty_brackets [ ++_val ];
array_typeexp = primitive_typeexp >> n_empty_brackets;
// TODO
primitive_typeexp = eps;
BOOST_SPIRIT_DEBUG_NODES((array_typeexp)(n_empty_brackets)(empty_brackets)(primitive_typeexp));
}
private:
qi::rule<It, unsigned(), Skipper> array_typeexp, n_empty_brackets;
qi::rule<It, Skipper> empty_brackets, primitive_typeexp;
};
bool doParse(const std::string& input)
{
typedef std::string::const_iterator It;
auto f(begin(input)), l(end(input));
parser<It, qi::space_type> p;
unsigned data;
try
{
bool ok = qi::phrase_parse(f,l,p,qi::space,data);
if (ok)
{
std::cout << "parse success\n";
std::cout << "data: " << data << "\n";
}
else std::cerr << "parse failed: '" << std::string(f,l) << "'\n";
if (f!=l) std::cerr << "trailing unparsed: '" << std::string(f,l) << "'\n";
return ok;
} catch(const qi::expectation_failure<It>& e)
{
std::string frag(e.first, e.last);
std::cerr << e.what() << "'" << frag << "'\n";
}
return false;
}
int main()
{
doParse("() () ( ) ()");
doParse("()");
}
For some odd reason, I can't get qi::as_string[] to work with repeat()[].
Parsing std::string str = { "{ +100S+++ ;\n }" };, I get the following OUTPUT
PLUS OR MINUS+
THREE PLUS OR MINUS
PARSED FINE
-------------------------
Parsing succeeded
-------------------------
which shows that parsing was ok, the first + was captured, but not the subsequent three +++.
NOTE I am just trying to get three_plus_or_minus to capture one to three pluses or minuses in a row as a string. Alternative solutions that don't use as_string[] would be appreciated as well.
I apologize for the long listing, but I need to use both a lexer and a parser in my real code.
CODE
// -------------- Third Party --------------
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
// -------------- C++ stdlib --------------
#include <iostream>
#include <fstream>
#include <string>
using namespace boost::spirit;
using boost::phoenix::val;
enum token_ids
{
ID_CONSTANT = 1000,
ID_INTEGER,
ID_TAG,
ID_IDENTIFIER
};
template <typename Lexer>
struct example6_tokens : lex::lexer<Lexer>
{
example6_tokens()
{
identifier = "[a-zA-Z_][a-zA-Z0-9_]*";
constant = "[0-9]+";
tag = "sl|s|l|tl|SL|S|L|TSL|"
"z|r|i|Z|R|I|"
"<>|><|<<>>|>><<|><><|<><>";
this->self = lex::token_def<>('(') | ')' | '{' | '}'
| '=' | ';' | ':' | '+' | '-';
this->self.add
(constant, ID_CONSTANT )
(tag, ID_TAG )
(identifier, ID_IDENTIFIER )
;
this->self("WS")
= lex::token_def<>("[ \\t\\n]+")
| "\\/\\*[^*]*\\*+([^/*][^*]*\\*+)*\\/"
| "\\/\\/[^\n]*"
;
}
lex::token_def<std::string> identifier, tag;
lex::token_def<unsigned int> constant;
};
// ----------------------------------------------------------------------------
template <typename Iterator, typename Lexer>
struct example6_grammar
: qi::grammar<Iterator, qi::in_state_skipper<Lexer> >
{
template <typename TokenDef>
example6_grammar(TokenDef const& tok)
: example6_grammar::base_type(program)
{
using boost::spirit::_val;
program
= +block
;
block
= '{' >> *slsltl_stmt >> '}'
;
plus_or_minus
%= ( qi::as_string[ qi::lit( '+' ) ] | qi::as_string[ '-' ])
[
std::cout << val("PLUS OR MINUS") << val( _1 ) << "\n"
]
;
three_plus_or_minus
%= ( qi::as_string[ repeat(1,3)['+'] ] | qi::as_string[ repeat(1,3)['-'] ] )
[
std::cout << val("THREE PLUS OR MINUS") << val( _1 ) << "\n"
]
;
slsltl_stmt
= ( - plus_or_minus
>> token(ID_CONSTANT)
>> token(ID_TAG)
>> three_plus_or_minus
>> ';'
)
[
std::cout << val("PARSED FINE") << "\n"
]
;
expression
= tok.identifier [ _val = _1 ]
| tok.constant [ _val = _1 ]
;
}
typedef boost::variant<unsigned int, std::string> expression_type;
qi::rule<Iterator, qi::in_state_skipper<Lexer> > program, block;
qi::rule<Iterator, std::string(), qi::in_state_skipper<Lexer> >
plus_or_minus, three_plus_or_minus;
qi::rule<Iterator, std::string(), qi::in_state_skipper<Lexer> > slsltl_stmt;
qi::rule<Iterator, expression_type(), qi::in_state_skipper<Lexer> > expression;
};
int
main( int argv, char* argc[] )
{
typedef std::string::iterator base_iterator_type;
typedef lex::lexertl::token<
base_iterator_type, boost::mpl::vector<unsigned int, std::string>
> token_type;
typedef lex::lexertl::lexer<token_type> lexer_type;
typedef example6_tokens<lexer_type> example6_tokens;
typedef example6_tokens::iterator_type iterator_type;
typedef example6_grammar<iterator_type, example6_tokens::lexer_def> example6_grammar;
example6_tokens tokens; // Our lexer
example6_grammar calc(tokens); // Our parser
std::string str = { "{ +100S+++ ;\n }" };
std::string::iterator it = str.begin();
iterator_type iter = tokens.begin(it, str.end());
iterator_type end = tokens.end();
std::string ws("WS");
bool r = qi::phrase_parse(iter, end, calc, qi::in_state(ws)[tokens.self]);
if (r && iter == end)
{
std::cout << "-------------------------\n";
std::cout << "Parsing succeeded\n";
std::cout << "-------------------------\n";
}
else
{
std::cout << "-------------------------\n";
std::cout << "Parsing failed\n";
std::cout << "-------------------------\n";
}
}
Try
slsltl_stmt %= /*....*/;
Instead of slsltl_stmt =. The use of semantic actions disables automatic attribute propagation.
(I haven't looked at the rest of your code yet. There may be more places where this/others things need adapting)
Edit
I did some more testing and think this does what you expected:
three_plus_or_minus
= (qi::as_string[ repeat(1,3)[qi::char_('+')] | repeat(1,3)[qi::char_('-')] ])
[ std::cout << val("THREE PLUS OR MINUS") << _1 << "\n" ]
;
A question out-of-the-box though: why are you using a Lexer? Wouldn't it make sense to have a token for +++?
I want to a list of name-value pairs. Each list is terminated by a '.' and EOL. Each name-value pair is separated by a ':'. Each pair is separated by a ';' in the list. E.g.
NAME1: VALUE1; NAME2: VALUE2; NAME3: VALUE3.<EOL>
The problem I have is that the values contain '.' and the last value always consumes the '.' at the EOL. Can I use some sort of lookahead to ensure the last '.' before the EOL is treated differently?
I have created a sample, that presumably looks like what you have. The tweak is in the following line:
value = lexeme [ *(char_ - ';' - ("." >> (eol|eoi))) ];
Note how - ("." >> (eol|eoi))) means: exclude any . that is immediately followed by end-of-line or end-of-input.
Test case (also live on http://liveworkspace.org/code/949b1d711772828606ddc507acf4fb4b):
const std::string input =
"name1: value 1; other name : value #2.\n"
"name.sub1: value.with.periods; other.sub2: \"more fun!\"....\n";
bool ok = doParse(input, qi::blank);
Output:
parse success
data: name1 : value 1 ; other name : value #2 .
data: name.sub1 : value.with.periods ; other.sub2 : "more fun!"... .
Full code:
#include <boost/fusion/adapted.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/karma.hpp>
#include <map>
#include <vector>
namespace qi = boost::spirit::qi;
namespace karma = boost::spirit::karma;
namespace phx = boost::phoenix;
typedef std::map<std::string, std::string> map_t;
typedef std::vector<map_t> maps_t;
template <typename It, typename Skipper = qi::space_type>
struct parser : qi::grammar<It, maps_t(), Skipper>
{
parser() : parser::base_type(start)
{
using namespace qi;
name = lexeme [ +~char_(':') ];
value = lexeme [ *(char_ - ';' - ('.' >> (eol|eoi))) ];
line = ((name >> ':' >> value) % ';') >> '.';
start = line % eol;
}
private:
qi::rule<It, std::string(), Skipper> name, value;
qi::rule<It, map_t(), Skipper> line;
qi::rule<It, maps_t(), Skipper> start;
};
template <typename C, typename Skipper>
bool doParse(const C& input, const Skipper& skipper)
{
auto f(std::begin(input)), l(std::end(input));
parser<decltype(f), Skipper> p;
maps_t data;
try
{
bool ok = qi::phrase_parse(f,l,p,skipper,data);
if (ok)
{
std::cout << "parse success\n";
for (auto& line : data)
std::cout << "data: " << karma::format_delimited((karma::string << ':' << karma::string) % ';' << '.', ' ', line) << '\n';
}
else std::cerr << "parse failed: '" << std::string(f,l) << "'\n";
//if (f!=l) std::cerr << "trailing unparsed: '" << std::string(f,l) << "'\n";
return ok;
} catch(const qi::expectation_failure<decltype(f)>& e)
{
std::string frag(e.first, e.last);
std::cerr << e.what() << "'" << frag << "'\n";
}
return false;
}
int main()
{
const std::string input =
"name1: value 1; other name : value #2.\n"
"name.sub1: value.with.periods; other.sub2: \"more fun!\"....\n";
bool ok = doParse(input, qi::blank);
return ok? 0 : 255;
}