parse std::vector<int> from comma separated integers - c++

I'm trying to implement a very specific grammar, which requires me at a certain point to parse a list of comma separated integers. The qi rule looks like the following:
qi::rule<Iterator, ascii::space_type> ident;
qi::rule<Iterator, ascii::space_type> nlist;
...
ident = char_ >> nlist;
nlist = ("(" >> int_ % "," >> ")");
...
I need to pass the values up to the ident rule (The expression ident has to create a syntax tree node, where the parsed values from nlist are required for the constructor). I thought about creating and filling a std::vector and use the semantic action like _val = vector<int>.... What is now unclear to me is how do I create a vector of arbitrary length from this rule, since I do not make any assumptions on how long the input will be or using a predefined vector like the examples.
Is this even possible or does is there a better way to do it?

This is the bread and butter of Spirit Qi.
Just use any compatible attribute type and profit:
using nlist_t = std::vector<int>;
using ident_t = std::pair<char, nlist_t>;
qi::rule<Iterator, ident_t(), qi::ascii::space_type> ident;
qi::rule<Iterator, nlist_t(), qi::ascii::space_type> nlist;
Note: For std::pair attribute compatibility, include the relevant fusion header:
Live On Coliru
#include <boost/fusion/adapted/std_pair.hpp>
#include <boost/spirit/include/qi.hpp>
namespace qi = boost::spirit::qi;
int main()
{
using nlist_t = std::vector<int>;
using ident_t = std::pair<char, nlist_t>;
using Iterator = std::string::const_iterator;
qi::rule<Iterator, ident_t(), qi::ascii::space_type> ident;
qi::rule<Iterator, nlist_t(), qi::ascii::space_type> nlist;
ident = qi::char_ >> nlist;
nlist = '(' >> qi::int_ % ',' >> ')';
for (std::string const input : { "a (1,2,3)", "+(881,-2,42) \n", "?(0)" }) {
ident_t data;
if (qi::phrase_parse(input.begin(), input.end(), ident, qi::ascii::space, data)) {
std::cout << "Parsed: " << data.first << "(";
for (auto i : data.second) std::cout << i << ",";
std::cout << ")\n";
} else
std::cout << "Parse failed: '" << input << "'\n";
}
}
Prints
Parsed: a(1,2,3,)
Parsed: +(881,-2,42,)
Parsed: ?(0,)
BONUS
Version with imagined Ast type using phoenix::construct:
Also Live On Coliru
#include <boost/fusion/adapted/std_pair.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/phoenix.hpp>
namespace qi = boost::spirit::qi;
namespace px = boost::phoenix;
namespace OoShinyAst {
using MyName = char;
using MyArgument = int;
using MyArgumentList = std::vector<MyArgument>;
struct MyIdent {
MyName name;
MyArgumentList args;
MyIdent() = default;
MyIdent(MyName name, MyArgumentList args)
: name(std::move(name)), args(std::move(args)) { }
};
}
int main()
{
using Iterator = std::string::const_iterator;
qi::rule<Iterator, OoShinyAst::MyIdent(), qi::ascii::space_type> ident;
qi::rule<Iterator, OoShinyAst::MyArgumentList(), qi::ascii::space_type> nlist;
nlist = '(' >> qi::int_ % ',' >> ')';
ident = (qi::char_ >> nlist) [ qi::_val = px::construct<OoShinyAst::MyIdent>(qi::_1, qi::_2) ];
for (std::string const input : { "a (1,2,3)", "+(881,-2,42) \n", "?(0)" }) {
OoShinyAst::MyIdent data;
if (qi::phrase_parse(input.begin(), input.end(), ident, qi::ascii::space, data)) {
std::cout << "Parsed: " << data.name << "(";
for (auto i : data.args) std::cout << i << ",";
std::cout << ")\n";
} else
std::cout << "Parse failed: '" << input << "'\n";
}
}

Related

boost spirit: copy the result in a vector of strings

I want to parse a function (with an arbitrary name and an arbitrary numbers af arguments) in this form:
function(bye, 1, 3, 4, foo)
The arguments could be generic strings comma separated.
And I want to copy the name of the function and the arguments in a vector of strings.
like this
std::vector<std::string> F;
std::string fun = "function(bye, 1, 3, 4, foo)";
// The parser must produce this vector from the example
F[0] == "function"
F[1] == "1"
F[2] == "3"
F[3] == "4"
F[4] == "foo"
I've written the following code by after reading some tutorial but it does not work (In the sense that it not compile).
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_object.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>
#include <iostream>
#include <string>
namespace client
{
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
///////////////////////////////////////////////////////////////////////////////
template <typename Iterator>
struct command_parser : qi::grammar<Iterator, std::vector<std::string>(), ascii::space_type>
{
command_parser() : command_parser::base_type(start)
{
using qi::int_;
using qi::lit;
using qi::double_;
using qi::lexeme;
using ascii::char_;
fn_name = +qi::char_("a-zA-Z");
string = +qi::char_("a-zA-Z_0-9");
rec = *( lit(",") >> string );
start %= fn_name >> lit("(") >> string >> rec >> lit(")") ;
}
qi::rule<Iterator, std::string(), ascii::space_type> fn_name;
qi::rule<Iterator, std::string(), ascii::space_type> string;
qi::rule<Iterator, std::string(), ascii::space_type> rec;
qi::rule<Iterator, std::vector<std::string>, ascii::space_type> start;
};
}
////////////////////////////////////////////////////////////////////////////
// Main program
////////////////////////////////////////////////////////////////////////////
int
main()
{
namespace qi = boost::spirit::qi;
std::cout << "/////////////////////////////////////////////////////////\n\n";
client::command_parser<std::string::iterator> CP;
std::string cmd("fun(1,2,3,4 , 5, foo) ");
std::vector<std::string> VV;
bool result = qi::parse(cmd.begin(), cmd.end(), CP, VV);
if (result) {
for ( auto sss : VV ){
std::cout << sss << std::endl;
}
} else {
std::cout << "Fail" << std::endl;
}
return 0 ;
}
Just for fun, here's my minimalist take on this grammar:
using CallList = std::vector<std::string>;
struct ParseError : std::runtime_error {
ParseError() : std::runtime_error("ParseError") {}
};
// The parse implementation
CallList parse_function_call(std::string const& fun) {
CallList elements;
using namespace boost::spirit::qi;
using It = decltype(begin(fun));
static const rule<It, std::string()> identifier = alpha >> +(alnum | char_('_'));
if (!phrase_parse(begin(fun), end(fun),
identifier >> '(' >> -(lexeme[+~char_(",)")] % ",") >> ')' >> eoi,
space, elements))
throw ParseError{};
return elements;
}
With a little bit of plumbing
// just for test output
using TestResult = std::variant<CallList, ParseError>;
// exceptions are equivalent
static constexpr bool operator==(ParseError const&, ParseError const&)
{ return true; }
static inline std::ostream& operator<<(std::ostream& os, TestResult const& tr) {
using namespace std;
if (holds_alternative<ParseError>(tr)) {
return os << "ParseError";
} else {
auto& list = get<CallList>(tr);
copy(begin(list), end(list), std::experimental::make_ostream_joiner(os << "{", ","));
return os << "}";
}
}
TestResult try_parse(std::string const& fun) {
try { return parse_function_call(fun); }
catch(ParseError const& e) { return e; }
}
Here's a test runner:
for (auto const& [input, expected]: {
Case("function(bye, 1, 3, 4, foo)", CallList{"function", "1", "3", "4", "foo"}),
{"liar(pants on fire)", CallList{"liar", "pants on fire"}},
{"liar('pants on fire')", CallList{"liar", "'pants on fire'"}},
{"nullary()", CallList{"nullary"}},
{"nullary( )", CallList{"nullary"}},
{"zerolength(a,,b)", ParseError{}},
{"zerolength(a, ,b)", ParseError{}},
{"noarglust", ParseError{}},
{"", ParseError{}},
{"()", ParseError{}},
{"1(invalidfunctionname)", ParseError{}},
{"foo(bar) BOGUS", ParseError{}},
})
{
auto const actual = try_parse(input);
bool const ok = (actual == expected);
cout << std::quoted(input) << ": " << (ok? "PASS":"FAIL") << "\n";
if (!ok) {
std::cout << " -- expected: " << expected << "\n";
std::cout << " -- actual: " << actual << "\n";
}
}
Which prints Live On Coliru
"function(bye, 1, 3, 4, foo)": FAIL
-- expected: {function,1,3,4,foo}
-- actual: {function,bye,1,3,4,foo}
"liar(pants on fire)": PASS
"liar('pants on fire')": PASS
"nullary()": PASS
"nullary( )": PASS
"zerolength(a,,b)": PASS
"zerolength(a, ,b)": PASS
"noarglust": PASS
"": PASS
"()": PASS
"1(invalidfunctionname)": PASS
"foo(bar) BOGUS": PASS
Note that your example test-case doesn't pass, but I think that was a mistake in the test case.
Full Listing
Live On Coliru
//#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <experimental/iterator>
#include <variant>
#include <iomanip>
using CallList = std::vector<std::string>;
struct ParseError : std::runtime_error {
ParseError() : std::runtime_error("ParseError") {}
};
// The parse implementation
CallList parse_function_call(std::string const& fun) {
CallList elements;
using namespace boost::spirit::qi;
using It = decltype(begin(fun));
static const rule<It, std::string()> identifier = alpha >> +(alnum | char_('_'));
if (!phrase_parse(begin(fun), end(fun),
identifier >> '(' >> -(lexeme[+~char_(",)")] % ",") >> ')' >> eoi,
space, elements))
throw ParseError{};
return elements;
}
// just for test output
using TestResult = std::variant<CallList, ParseError>;
// exceptions are equivalent
static constexpr bool operator==(ParseError const&, ParseError const&)
{ return true; }
static inline std::ostream& operator<<(std::ostream& os, TestResult const& tr) {
using namespace std;
if (holds_alternative<ParseError>(tr)) {
return os << "ParseError";
} else {
auto& list = get<CallList>(tr);
copy(begin(list), end(list), std::experimental::make_ostream_joiner(os << "{", ","));
return os << "}";
}
}
TestResult try_parse(std::string const& fun) {
try { return parse_function_call(fun); }
catch(ParseError const& e) { return e; }
}
int main() {
using namespace std;
using Case = pair<std::string, TestResult>;
for (auto const& [input, expected]: {
Case("function(bye, 1, 3, 4, foo)", CallList{"function", "1", "3", "4", "foo"}),
{"liar(pants on fire)", CallList{"liar", "pants on fire"}},
{"liar('pants on fire')", CallList{"liar", "'pants on fire'"}},
{"nullary()", CallList{"nullary"}},
{"nullary( )", CallList{"nullary"}},
{"zerolength(a,,b)", ParseError{}},
{"zerolength(a, ,b)", ParseError{}},
{"noarglust", ParseError{}},
{"", ParseError{}},
{"()", ParseError{}},
{"1(invalidfunctionname)", ParseError{}},
{"foo(bar) BOGUS", ParseError{}},
})
{
auto const actual = try_parse(input);
bool const ok = (actual == expected);
cout << std::quoted(input) << ": " << (ok? "PASS":"FAIL") << "\n";
if (!ok) {
std::cout << " -- expected: " << expected << "\n";
std::cout << " -- actual: " << actual << "\n";
}
}
}
I'm correcting my answer per suggestions made by #sehe. All the credit for these corrections go to him. I am referencing your line numbers below. So the first error is from spirit and it says:
incompatible_start_rule:
// If you see the assertion below failing then the start rule
// passed to the constructor of the grammar is not compatible with
// the grammar (i.e. it uses different template parameters).
The signature of the start parser does not match that of the parser deceleration.
22. struct command_parser : qi::grammar<Iterator, std::vector<std::string>(), ascii::space_type>
43. qi::rule<Iterator, std::vector<std::string>, ascii::space_type> start;
I googled this and could not find an explanation but using an object rather than a type is preferable. I did it the other way in my first answer. The proper fix is at line 43:
43. qi::rule<Iterator, std::vector<std::string>(), ascii::space_type> start;
The next spirit error is:
The rule was instantiated with a skipper type but you have not pass
any. Did you use parse instead of phrase_parse?");
So a phrase_parse is required with a skipper. Note that we need a skipper to pass along.
64. using qi::ascii::space;
65. bool result = qi::phrase_parse(cmd.begin(), cmd.end(), CP, space, VV);
Now it compiles and the output is:
fun
1
2345foo
I see that won't do and you are looking to stuff the vector with each of the passed parameters. So you need a rule that is compatible with your attribute and intention. The kleene operator working with a std::string will put all the data into one string. So use your attribute:
41. qi::rule<Iterator, std::vector<std::string>(), ascii::space_type> rec;``
Now as #sehe points out, the skipper with fn_name and string will just concatenate names with spaces and newlines. So don't use skippers there.
39. qi::rule<Iterator, std::string()> fn_name;
40. qi::rule<Iterator, std::string()> string;
The other error I made was to see the %= and call it a list operator. From here, it is a definition operator. I'm not sure why there are two but playing around, it seems you need to use %= with semantic action. Here is the corrected code:
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_object.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>
#include <iostream>
#include <string>
namespace client
{
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
template <typename Iterator>
struct command_parser : qi::grammar<Iterator, std::vector<std::string>(), ascii::space_type>
{
command_parser() : command_parser::base_type(start)
{
using qi::int_;
using qi::lit;
using qi::double_;
using qi::lexeme;
using ascii::char_;
fn_name = +qi::char_("a-zA-Z");
string = +qi::char_("a-zA-Z_0-9");
rec = *(lit(",") >> string);
start %= fn_name >> lit("(") >> string >> rec >> lit(")");
}
qi::rule<Iterator, std::string()> fn_name;
qi::rule<Iterator, std::string()> string;
qi::rule<Iterator, std::vector<std::string>(), ascii::space_type> rec;
qi::rule<Iterator, std::vector<std::string>(), ascii::space_type> start;
};
}
int main()
{
namespace qi = boost::spirit::qi;
client::command_parser<std::string::iterator> CP;
std::string cmd("function(1,2,3,4 , 5, foo) ");
std::vector<std::string> VV;
bool result = qi::phrase_parse(cmd.begin(), cmd.end(), CP, qi::ascii::space, VV);
if (result) {
for (auto sss : VV) {
std::cout << sss << std::endl;
}
}
else {
std::cout << "Fail" << std::endl;
}
return 0;
}
And here is an example using X3:
#include <boost/spirit/home/x3.hpp>
#include <iostream>
#include <vector>
//your attribute, could be more complex, might use namespace
using attr = std::vector<std::string>;
namespace parser {
namespace x3 = boost::spirit::x3;
const auto fn_name = +x3::char_("a-zA-Z");
const auto string = +x3::char_("a-zA-Z_0-9");
const auto start = x3::rule<struct _, attr>() = fn_name >> "(" >> string % ',' >> ")";
}
int main()
{
namespace x3 = boost::spirit::x3;
std::string cmd("fun(1,.2,3,4 , 5, foo) ");
attr VV;
auto it = cmd.begin();
bool result = phrase_parse(it, cmd.end(), parser::start, x3::space, VV);
if (result) {
for (auto sss : VV) {
std::cout << "-> " << sss << std::endl;
}
}
else
std::cout << "Fail at" << std::endl;
return 0;
}

Boost Spirit template specialization failure

Below is a very compact version of a grammar I'm trying to write using boost::spirit::qi.
Environment: VS2013, x86, Boost1.64
When #including the header file, the compiler complains about the line
rBlock = "{" >> +(rInvocation) >> "}";
with a very long log (I've only copied the beginning and the end):
more than one partial specialization matches the template argument list
...
...
see reference to function template instantiation
'boost::spirit::qi::rule
&boost::spirit::qi::rule::operator =>(const Expr &)' being compiled
Where is my mistake?
The header file:
//mygrammar.h
#pragma once
#include <boost/spirit/include/qi.hpp>
namespace myNS
{
typedef std::string Identifier;
typedef ::boost::spirit::qi::rule <const char*, Identifier()> myIdentifierRule;
typedef ::boost::variant<char, int> Expression;
typedef ::boost::spirit::qi::rule <const char*, Expression()> myExpressionRule;
struct IdntifierEqArgument
{
Identifier ident;
Expression arg;
};
typedef ::boost::variant < IdntifierEqArgument, Expression > Argument;
typedef ::boost::spirit::qi::rule <const char*, Argument()> myArgumentRule;
typedef ::std::vector<Argument> ArgumentList;
typedef ::boost::spirit::qi::rule <const char*, myNS::ArgumentList()> myArgumentListRule;
struct Invocation
{
Identifier identifier;
::boost::optional<ArgumentList> args;
};
typedef ::boost::spirit::qi::rule <const char*, Invocation()> myInvocationRule;
typedef ::std::vector<Invocation> Block;
typedef ::boost::spirit::qi::rule <const char*, myNS::Block()> myBlockRule;
}
BOOST_FUSION_ADAPT_STRUCT(
myNS::IdntifierEqArgument,
(auto, ident)
(auto, arg)
);
BOOST_FUSION_ADAPT_STRUCT(
myNS::Invocation,
(auto, identifier)
(auto, args)
);
namespace myNS
{
struct myRules
{
myIdentifierRule rIdentifier;
myExpressionRule rExpression;
myArgumentRule rArgument;
myArgumentListRule rArgumentList;
myInvocationRule rInvocation;
myBlockRule rBlock;
myRules()
{
using namespace ::boost::spirit;
using namespace ::boost::spirit::qi;
rIdentifier = as_string[((qi::alpha | '_') >> *(qi::alnum | '_'))];
rExpression = char_ | int_;
rArgument = (rIdentifier >> "=" >> rExpression) | rExpression;
rArgumentList = rArgument >> *("," >> rArgument);
rInvocation = rIdentifier >> "(" >> -rArgumentList >> ")";
rBlock = "{" >> +(rInvocation) >> "}";
}
};
}
I'm not exactly sure where the issue is triggered, but it clearly is a symptom of too many ambiguities in the attribute forwarding rules.
Conceptually this could be triggered by your attribute types having similar/compatible layouts. In language theory, you're looking at a mismatch between C++'s nominative type system versus the approximation of structural typing in the attribute propagation system. But enough theorism :)
I don't think attr_cast<> will save you here as it probably uses the same mechanics and heuristics under the hood.
It drew my attention that making the ArgumentList optional is ... not very useful (as an empty list already accurately reflects absense of arguments).
So I tried simplifying the rules:
rArgumentList = -(rArgument % ',');
rInvocation = rIdentifier >> '(' >> rArgumentList >> ')';
And the declared attribute type can be simply ArgumentList instead of boost::optional::ArgumentList.
This turns out to remove the ambiguity when propagating into the vector<Invocation>, so ... you're saved.
If this feels "accidental" to you, you should! What would I do if this hadn't removed the ambiguity "by chance"? I'd have created a semantic action to propagate the Invocation by simpler mechanics. There's a good chance that fusion::push_back(_val, _1) or similar would have worked.
See also Boost Spirit: "Semantic actions are evil"?
Review And Demo
In the cleaned up review here I present a few fixes/improvements and a test run that dumps the parsed AST.
Separate AST from parser (you don't want use qi in the AST types. You specifically do not want using namespace directives in the face of generic template libraries)
Do not use auto in the adapt macros. That's not a feature. Instead, since you can ostensibly use C++11, use the C++11 (decltype) based macros
BOOST_FUSION_ADAPT_STRUCT(myAST::IdntifierEqArgument, ident,arg);
BOOST_FUSION_ADAPT_STRUCT(myAST::Invocation, identifier,args);
AST is leading (also, prefer c++11 for clarity):
namespace myAST {
using Identifier = std::string;
using Expression = boost::variant<char, int>;
struct IdntifierEqArgument {
Identifier ident;
Expression arg;
};
using Argument = boost::variant<IdntifierEqArgument, Expression>;
using ArgumentList = std::vector<Argument>;
struct Invocation {
Identifier identifier;
ArgumentList args;
};
using Block = std::vector<Invocation>;
}
It's nice to have the definitions separate
Regarding the parser,
I'd prefer the qi::grammar convention. Also,
You didn't declare any of the rules with a skipper. I "guessed" from context that whitespace is insignificant outside of the rules for Expression and Identifier.
Expression ate every char_, so also would eat ')' or even '3'. I noticed this only when testing and after debugging with:
//#define BOOST_SPIRIT_DEBUG
BOOST_SPIRIT_DEBUG_NODES((start)(rBlock)(rInvocation)(rIdentifier)(rArgumentList)(rArgument)(rExpression))
I highly recommend using these facilities
All in all the parser comes down to
namespace myNS {
namespace qi = boost::spirit::qi;
template <typename Iterator = char const*>
struct myRules : qi::grammar<Iterator, myAST::Block()> {
myRules() : myRules::base_type(start) {
rIdentifier = qi::raw [(qi::alpha | '_') >> *(qi::alnum | '_')];
rExpression = qi::alpha | qi::int_;
rArgument = (rIdentifier >> '=' >> rExpression) | rExpression;
rArgumentList = -(rArgument % ',');
rInvocation = rIdentifier >> '(' >> rArgumentList >> ')';
rBlock = '{' >> +rInvocation >> '}';
start = qi::skip(qi::space) [ rBlock ];
BOOST_SPIRIT_DEBUG_NODES((start)(rBlock)(rInvocation)(rIdentifier)(rArgumentList)(rArgument)(rExpression))
}
private:
qi::rule<Iterator, myAST::Block()> start;
using Skipper = qi::space_type;
qi::rule<Iterator, myAST::Argument(), Skipper> rArgument;
qi::rule<Iterator, myAST::ArgumentList(), Skipper> rArgumentList;
qi::rule<Iterator, myAST::Invocation(), Skipper> rInvocation;
qi::rule<Iterator, myAST::Block(), Skipper> rBlock;
// implicit lexemes
qi::rule<Iterator, myAST::Identifier()> rIdentifier;
qi::rule<Iterator, myAST::Expression()> rExpression;
};
}
Adding a test driver
int main() {
std::string const input = R"(
{
foo()
bar(a, b, 42)
qux(someThing_awful01 = 9)
}
)";
auto f = input.data(), l = f + input.size();
myAST::Block block;
bool ok = parse(f, l, myNS::myRules<>{}, block);
if (ok) {
std::cout << "Parse success\n";
for (auto& invocation : block) {
std::cout << invocation.identifier << "(";
for (auto& arg : invocation.args) std::cout << arg << ",";
std::cout << ")\n";
}
}
else
std::cout << "Parse failed\n";
if (f!=l)
std::cout << "Remaining unparsed input: '" << std::string(f,l) << "'\n";
}
Complete Demo
See it Live On Coliru
//#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
namespace myAST {
using Identifier = std::string;
using Expression = boost::variant<char, int>;
struct IdntifierEqArgument {
Identifier ident;
Expression arg;
};
using Argument = boost::variant<IdntifierEqArgument, Expression>;
using ArgumentList = std::vector<Argument>;
struct Invocation {
Identifier identifier;
ArgumentList args;
};
using Block = std::vector<Invocation>;
// for debug printing
static inline std::ostream& operator<<(std::ostream& os, myAST::IdntifierEqArgument const& named) {
return os << named.ident << "=" << named.arg;
}
}
BOOST_FUSION_ADAPT_STRUCT(myAST::IdntifierEqArgument, ident,arg);
BOOST_FUSION_ADAPT_STRUCT(myAST::Invocation, identifier,args);
namespace myNS {
namespace qi = boost::spirit::qi;
template <typename Iterator = char const*>
struct myRules : qi::grammar<Iterator, myAST::Block()> {
myRules() : myRules::base_type(start) {
rIdentifier = qi::raw [(qi::alpha | '_') >> *(qi::alnum | '_')];
rExpression = qi::alpha | qi::int_;
rArgument = (rIdentifier >> '=' >> rExpression) | rExpression;
rArgumentList = -(rArgument % ',');
rInvocation = rIdentifier >> '(' >> rArgumentList >> ')';
rBlock = '{' >> +rInvocation >> '}';
start = qi::skip(qi::space) [ rBlock ];
BOOST_SPIRIT_DEBUG_NODES((start)(rBlock)(rInvocation)(rIdentifier)(rArgumentList)(rArgument)(rExpression))
}
private:
qi::rule<Iterator, myAST::Block()> start;
using Skipper = qi::space_type;
qi::rule<Iterator, myAST::Argument(), Skipper> rArgument;
qi::rule<Iterator, myAST::ArgumentList(), Skipper> rArgumentList;
qi::rule<Iterator, myAST::Invocation(), Skipper> rInvocation;
qi::rule<Iterator, myAST::Block(), Skipper> rBlock;
// implicit lexemes
qi::rule<Iterator, myAST::Identifier()> rIdentifier;
qi::rule<Iterator, myAST::Expression()> rExpression;
};
}
int main() {
std::string const input = R"(
{
foo()
bar(a, b, 42)
qux(someThing_awful01 = 9)
}
)";
auto f = input.data(), l = f + input.size();
myAST::Block block;
bool ok = parse(f, l, myNS::myRules<>{}, block);
if (ok) {
std::cout << "Parse success\n";
for (auto& invocation : block) {
std::cout << invocation.identifier << "(";
for (auto& arg : invocation.args) std::cout << arg << ",";
std::cout << ")\n";
}
}
else
std::cout << "Parse failed\n";
if (f!=l)
std::cout << "Remaining unparsed input: '" << std::string(f,l) << "'\n";
}
Prints output
Parse success
foo()
bar(a,b,42,)
qux(someThing_awful01=9,)
Remaining unparsed input: '
'

detecting function call with regex

I want to find out whether I can detect function call using regex. The basic case is easy: somefunction(1, 2);
But what if I had code:
somefunction(someotherfunction(), someotherotherfunction());
or
somefunction(function () { return 1; }, function() {return 2;});
or
caller_function(somefunction(function () { return 1; }, function() {return 2;}))
In this case I need to match same number of opening braces and closing braces so that I can find end of call to somefunction
Is it possible?
Thanks in advance.
Your question is misleading. It's not as simple as you think.
First, the grammar isn't regular. Regular expressions are not the right tool.
Second, you ask "detecting function call" but the samples show anonymous function definitions, a totally different ball game.
Here's a start using Boost Spirit:
start = skip(space) [ fcall ];
fcall = ident >> '(' >> -args >> ')';
args = arg % ',';
arg = fdef | fcall;
fdef = lexeme["function"] >> '(' >> -formals >> ')' >> body;
formals = ident % ',';
identch = alpha | char_("_");
ident = identch >> *(identch|digit);
body = '{' >> *~char_('}') >> '}';
Which would map onto an AST like:
struct function_definition {
std::vector<std::string> formal_arguments;
std::string body;
};
struct function_call;
using argument = boost::variant<
function_definition,
boost::recursive_wrapper<function_call>
>;
struct function_call {
std::string name;
std::vector<argument> args;
};
DEMO
Live On Coliru
// #define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/io.hpp>
#include <boost/fusion/adapted/struct.hpp>
struct function_definition {
std::vector<std::string> formal_arguments;
std::string body;
};
struct function_call;
using argument = boost::variant<
function_definition,
boost::recursive_wrapper<function_call>
>;
struct function_call {
std::string name;
std::vector<argument> args;
};
BOOST_FUSION_ADAPT_STRUCT(function_call, name, args)
BOOST_FUSION_ADAPT_STRUCT(function_definition, formal_arguments, body)
namespace qi = boost::spirit::qi;
template <typename It>
struct Parser : qi::grammar<It, function_call()> {
Parser() : Parser::base_type(start) {
using namespace qi;
start = skip(space) [ fcall ];
fcall = ident >> '(' >> -args >> ')';
args = arg % ',';
arg = fdef | fcall;
fdef = lexeme["function"] >> '(' >> -formals >> ')' >> body;
formals = ident % ',';
identch = alpha | char_("_");
ident = identch >> *(identch|digit);
body = '{' >> *~char_('}') >> '}';
BOOST_SPIRIT_DEBUG_NODES((start)(fcall)(args)(arg)(fdef)(formals)(ident)(body))
}
private:
using Skipper = qi::space_type;
qi::rule<It, function_call()> start;
qi::rule<It, function_call(), Skipper> fcall;
qi::rule<It, argument(), Skipper> arg;
qi::rule<It, std::vector<argument>(), Skipper> args;
qi::rule<It, function_definition(), Skipper> fdef;
qi::rule<It, std::vector<std::string>(), Skipper> formals;
qi::rule<It, char()> identch;
qi::rule<It, std::string()> ident, body;
};
// for debug:
#include <experimental/iterator>
static inline std::ostream& operator<<(std::ostream& os, function_definition const& v) {
os << "function(";
std::copy(v.formal_arguments.begin(), v.formal_arguments.end(), std::experimental::make_ostream_joiner(os, ", "));
return os << ") {" << v.body << "}";
}
static inline std::ostream& operator<<(std::ostream& os, function_call const& v) {
os << v.name << "(";
std::copy(v.args.begin(), v.args.end(), std::experimental::make_ostream_joiner(os, ", "));
return os << ")";
}
int main() {
std::string const input = "caller_function(somefunction(function () { return 1; }, function() {return 2;}))";
using It = std::string::const_iterator;
Parser<It> const p;
It f = input.begin(), l = input.end();
function_call parsed;
bool ok = parse(f, l, p, parsed);
if (ok) {
std::cout << "Parsed ok: " << parsed << "\n";
} else {
std::cout << "Parse failed\n";
}
if (f!=l)
std::cout << "Remaining unparsed input: '" << std::string(f,l) << "'\n";
}
Prints
Parsed ok: caller_function(somefunction(function() { return 1; }, function() {return 2;}))

boost::spirit parsing into a fusion adapted structure optional but exclusive

If there's a structure:
struct record
{
std::string type;
std::string delimiter;
uint32_t length;
std::string name;
record()
{
type = "";
delimiter = "";
length = 0;
name = "";
}
};
Which is adapted using boost::fusion, and the below grammar:
struct record_parser : qi::grammar<Iterator, record(), ascii::space_type>
{
record_parser() : record_parser::base_type(start)
{
using qi::lit;
using qi::uint_;
using qi::lexeme;
using ascii::char_;
using ascii::blank;
using ascii::string;
using qi::attr;
using qi::eps;
type %= lexeme[+(char_ - (blank|char('(')))];
delimiter_double_quote %= char('(') >> lexeme[char('"') >> +(char_ - char('"')) >> char('"') ] >> char(')');
delimiter_single_quote %= char('(') >> lexeme[char('\'') >> +(char_ - char('\'')) >> char('\'')] >> char(')');
delimiter %= (delimiter_double_quote | delimiter_single_quote);
name %= lexeme[+(char_ - (blank|char(';')))] >> char(';');
length %= (char('(') >> uint_ >> char(')'));
start %=
eps >
lit("record")
>> char('{')
>> type
>> (delimiter | attr("")) >> (length | attr(0))
>> name
>> char('}')
;
}
qi::rule<Iterator, std::string(), ascii::space_type> type;
qi::rule<Iterator, std::string(), ascii::space_type> delimiter_double_quote;
qi::rule<Iterator, std::string(), ascii::space_type> delimiter_single_quote;
qi::rule<Iterator, std::string(), ascii::space_type> delimiter;
qi::rule<Iterator, uint32_t(), ascii::space_type> length;
qi::rule<Iterator, std::string(), ascii::space_type> name;
qi::rule<Iterator, record(), ascii::space_type> start;
};
I am looking to parse 'delimiter' and 'length' as optional. However, one of them has to be present, and if one is present, the other one should not exist.
For Example:
record { string(5) Alex; }
record { string("|") Alex; }
But Not:
record { string(5)("|") Alex; }
record { string Alex; }
I have attempted to do it this way, but compilation fails:
start %=
eps >
lit("record")
>> char('{')
>> type
>> ((delimiter >> attr(0)) | (attr("") >> length))
>> name
>> char('}')
;
Thank you for your help in advance. Below is the full source code:
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_object.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>
#include <string>
namespace client
{
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace phoenix = boost::phoenix;
struct record
{
std::string type;
std::string delimiter;
uint32_t length;
std::string name;
record()
{
type = "";
delimiter = "";
length = 0;
name = "";
}
};
}
BOOST_FUSION_ADAPT_STRUCT(
client::record,
(std::string, type)
(std::string, delimiter)
(uint32_t, length)
(std::string, name)
)
namespace client
{
template <typename Iterator>
struct record_parser : qi::grammar<Iterator, record(), ascii::space_type>
{
record_parser() : record_parser::base_type(start)
{
using qi::lit;
using qi::uint_;
using qi::lexeme;
using ascii::char_;
using ascii::blank;
using ascii::string;
using qi::attr;
using qi::eps;
type %= lexeme[+(char_ - (blank|char('(')))];
delimiter_double_quote %= char('(') >> lexeme[char('"') >> +(char_ - char('"')) >> char('"') ] >> char(')');
delimiter_single_quote %= char('(') >> lexeme[char('\'') >> +(char_ - char('\'')) >> char('\'')] >> char(')');
delimiter %= (delimiter_double_quote | delimiter_single_quote);
name %= lexeme[+(char_ - (blank|char(';')))] >> char(';');
length %= (char('(') >> uint_ >> char(')'));
start %=
eps >
lit("record")
>> char('{')
>> type
>> (delimiter | attr("")) >> (length | attr(0))
>> name
>> char('}')
;
}
qi::rule<Iterator, std::string(), ascii::space_type> type;
qi::rule<Iterator, std::string(), ascii::space_type> delimiter_double_quote;
qi::rule<Iterator, std::string(), ascii::space_type> delimiter_single_quote;
qi::rule<Iterator, std::string(), ascii::space_type> delimiter;
qi::rule<Iterator, uint32_t(), ascii::space_type> length;
qi::rule<Iterator, std::string(), ascii::space_type> name;
qi::rule<Iterator, record(), ascii::space_type> start;
};
}
////////////////////////////////////////////////////////////////////////////
// Main program
////////////////////////////////////////////////////////////////////////////
int main()
{
std::string storage = "record { string(5) Alex; }";
using boost::spirit::ascii::space;
typedef std::string::const_iterator iterator_type;
typedef client::record_parser<iterator_type> record_parser;
record_parser g; // Our grammar
client::record rec;
std::string::const_iterator iter = storage.begin();
std::string::const_iterator end = storage.end();
bool r = phrase_parse(iter, end, g, space, rec);
if (r && iter == end)
{
std::cout << boost::fusion::tuple_open('[');
std::cout << boost::fusion::tuple_close(']');
std::cout << boost::fusion::tuple_delimiter(", ");
std::cout << "-------------------------\n";
std::cout << "Parsing succeeded\n";
std::cout << "got: " << boost::fusion::as_vector(rec) << std::endl;
std::cout << "\n-------------------------\n";
}
else
{
std::string::const_iterator some = iter+30;
std::string context(iter, (some>end)?end:some);
std::cout << "-------------------------\n";
std::cout << "Parsing failed\n";
std::cout << "stopped at -->" << context << "...\n";
std::cout << "-------------------------\n";
}
return 0;
}
You can just write out the combinations:
>> (
delimiter >> attr(0)
| attr("") >> length
| attr("") >> attr(0)
)
The best way to make it work with automatic attribute propagation is to use an AST structure that is similar:
namespace client {
struct record {
std::string type;
struct param_t {
std::string delimiter;
uint32_t length = 0;
} param;
std::string name;
};
}
BOOST_FUSION_ADAPT_STRUCT(client::record::param_t, delimiter, length)
BOOST_FUSION_ADAPT_STRUCT(client::record, type, param, name)
Full Demo Live On Coliru
Note how much simpler the grammar has been made (all those char(' ') things are unnecessary; use lexemes only if you declare a skipper; use ~char_ instead of character set subtraction; use graph instead of char_ - space etc.).
type = +(graph - '(');
delimiter_double_quote = '"' >> +~char_('"') >> '"' ;
delimiter_single_quote = "'" >> +~char_("'") >> "'" ;
delimiter = '(' >> (delimiter_double_quote | delimiter_single_quote) >> ')';
name = +(graph - ';');
length = '(' >> uint_ >> ')';
start = eps > lit("record") >> '{'
>> type
>> (
delimiter >> attr(0)
| attr("") >> length
| attr("") >> attr(0)
)
>> name >> ';' >> '}'
;
Full code:
Live On Coliru
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>
#include <string>
namespace qi = boost::spirit::qi;
namespace client {
struct record {
std::string type;
struct param_t {
std::string delimiter;
uint32_t length = 0;
} param;
std::string name;
};
}
BOOST_FUSION_ADAPT_STRUCT(client::record::param_t, delimiter, length)
BOOST_FUSION_ADAPT_STRUCT(client::record, type, param, name)
namespace client {
std::ostream& operator<<(std::ostream& os, record::param_t const& v) { return os << boost::fusion::as_vector(v); }
std::ostream& operator<<(std::ostream& os, record const& v) { return os << boost::fusion::as_vector(v); }
}
namespace client
{
template <typename Iterator, typename Skipper = qi::ascii::space_type>
struct record_parser : qi::grammar<Iterator, record(), Skipper>
{
record_parser() : record_parser::base_type(start)
{
using namespace qi;
type = +(graph - '(');
delimiter_double_quote = '"' >> +~char_('"') >> '"' ;
delimiter_single_quote = "'" >> +~char_("'") >> "'" ;
delimiter = '(' >> (delimiter_double_quote | delimiter_single_quote) >> ')';
name = +(graph - ';');
length = '(' >> uint_ >> ')';
start = eps > lit("record") >> '{'
>> type
>> (
delimiter >> attr(0)
| attr("") >> length
| attr("") >> attr(0)
)
>> name >> ';' >> '}'
;
}
private:
qi::rule<Iterator, record(), Skipper> start;
qi::rule<Iterator, uint32_t(), Skipper> length;
qi::rule<Iterator, std::string(), Skipper> delimiter;
// lexemes
qi::rule<Iterator, std::string()> type, delimiter_double_quote, delimiter_single_quote, name;
};
}
int main()
{
for (std::string const storage : {
"record { string(5) Alex; }",
"record { string(\"|\") Alex; }",
})
{
typedef std::string::const_iterator iterator_type;
typedef client::record_parser<iterator_type> record_parser;
record_parser g; // Our grammar
client::record rec;
auto iter = storage.begin(), end = storage.end();
bool r = phrase_parse(iter, end, g, qi::ascii::space, rec);
if (r) {
std::cout << "Parsing succeeded: " << rec << std::endl;
} else {
std::cout << "Parsing failed\n";
}
if (iter != end) {
std::cout << "Remaining: '" << std::string(iter, end) << "'...\n";
}
}
}
Prints:
Parsing succeeded: (string ( 5) Alex)
Parsing succeeded: (string (| 0) Alex)
Because it's 2016, adding a X3 example too. Once again, taking the variant approach, which I find to be typical in Spirit code.
namespace AST {
struct record {
std::string type;
boost::variant<std::string, uint32_t> param;
std::string name;
};
}
BOOST_FUSION_ADAPT_STRUCT(AST::record, type, param, name)
namespace parser {
using namespace x3;
auto quoted = [](char q) { return q >> +~char_(q) >> q; };
static auto const type = +(graph - '(');
static auto const delimiter = '(' >> (quoted('"') | quoted('\'')) >> ')';
static auto const name = +(graph - ';');
static auto const length = '(' >> uint_ >> ')';
static auto const start = lit("record") >> '{' >> type >> (delimiter | length) >> name >> ';' >> '}';
}
That's all. The calling code is virtually unchanged:
int main()
{
for (std::string const storage : {
"record { string(5) Alex; }",
"record { string(\"|\") Alex; }",
"record { string Alex; }",
})
{
typedef std::string::const_iterator iterator_type;
AST::record rec;
auto iter = storage.begin(), end = storage.end();
bool r = phrase_parse(iter, end, parser::start, x3::ascii::space, rec);
if (r) {
std::cout << "Parsing succeeded: " << boost::fusion::as_vector(rec) << std::endl;
} else {
std::cout << "Parsing failed\n";
}
if (iter != end) {
std::cout << "Remaining: '" << std::string(iter, end) << "'\n";
}
}
}
Everything compiles a lot quicker and I'd not be surprised if the resultant code was at least twice as fast at runtime too.
Live On Coliru
#include <boost/spirit/home/x3.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
namespace x3 = boost::spirit::x3;
namespace AST {
struct record {
std::string type;
boost::variant<std::string, uint32_t> param;
std::string name;
};
}
BOOST_FUSION_ADAPT_STRUCT(AST::record, type, param, name)
namespace parser {
using namespace x3;
auto quoted = [](char q) { return q >> +~char_(q) >> q; };
static auto const type = +(graph - '(');
static auto const delimiter = '(' >> (quoted('"') | quoted('\'')) >> ')';
static auto const name = +(graph - ';');
static auto const length = '(' >> uint_ >> ')';
static auto const start = lit("record") >> '{' >> type >> (delimiter | length) >> name >> ';' >> '}';
}
#include <iostream>
#include <boost/fusion/include/io.hpp>
#include <boost/fusion/include/as_vector.hpp>
#include <boost/optional/optional_io.hpp>
int main()
{
for (std::string const storage : {
"record { string(5) Alex; }",
"record { string(\"|\") Alex; }",
"record { string Alex; }",
})
{
typedef std::string::const_iterator iterator_type;
AST::record rec;
auto iter = storage.begin(), end = storage.end();
bool r = phrase_parse(iter, end, parser::start, x3::ascii::space, rec);
if (r) {
std::cout << "Parsing succeeded: " << boost::fusion::as_vector(rec) << std::endl;
} else {
std::cout << "Parsing failed\n";
}
if (iter != end) {
std::cout << "Remaining: '" << std::string(iter, end) << "'\n";
}
}
}
Prints
Parsing succeeded: (string 5 Alex)
Parsing succeeded: (string | Alex)
Parsing failed
Remaining: 'record { string Alex; }'
sehe's first answer is perfect (or would be if he corrected what he realized in the comments), but I just wanted to add an explanation of the problem and a possible alternative. The code below is based on that excellent answer.
You have a couple of problems with the attributes of your start rule. The attribute you want to get is record which is basically tuple<string,string,uint32_t,string>. Let's see the attributes of several parsers:
Something similar (but simpler) to your original rule:
Attribute of: "lit("record") >> char_('{') >> type >> delimiter >> length >> name >> char_('}')"
tuple<char,string,string,uint32_t,string,char>
As you can see you have two extra char caused b your use of char_(has an attribute of char) instead of lit(has no attribute). omit[char_] could also work, but would be a little silly.
Let's change char_ to lit:
Attribute of: "lit("record") >> lit('{') >> type >> delimiter >> length >> name >> lit('}')"
tuple<string,string,uint32_t,string>
Which is what we want.
Your original rule with lit:
Attribute of: "lit("record") >> lit('{') >> type >> (delimiter | attr("")) >> (length | attr(0)) >> name >> lit('}')"
tuple<string,variant<string,char const (&)[1]>,variant<uint32_t,int>,string>
Since the branches of | aren't identical, you get variants instead of the attribute you want. (In this simple case everything works as if there were no variants though)
Let's remove the variants (since they cause errors in more complex scenarios):
Attribute of: "lit("record") >> lit('{') >> type >> (delimiter | attr(string())) >> (length | attr(uint32_t())) >> name >> lit('}')"
tuple<string,string,uint32_t,string>
This works in the cases you want but also when both are missing.
sehe's approach:
Attribute of: "lit("record") >> lit('{') >> type >> ((delimiter >> attr(uint32_t())) | (attr(string()) >> length)) >> name >> lit('}')"
tuple<string,tuple<string,uint32_t>,string>
Looking at this synthesized attribute you can see the need to create the param_t helper struct to make your record attribute match.
See on Coliru a way to "calculate" the previous attributes.
The possible alternative is a custom directive using boost::fusion::flatten_view. Keep in mind that this directive has very little testing so I would recommend the approach shown by sehe, but it seems to work (at least in this case).
The example in this question with this directive on Wandbox
Several other examples where this directive can be useful
flatten_directive.hpp
#pragma once
#include <boost/spirit/home/qi/meta_compiler.hpp>
#include <boost/spirit/home/qi/skip_over.hpp>
#include <boost/spirit/home/qi/parser.hpp>
#include <boost/spirit/home/support/unused.hpp>
#include <boost/spirit/home/support/common_terminals.hpp>
#include <boost/spirit/home/qi/detail/attributes.hpp>
#include <boost/spirit/home/support/info.hpp>
#include <boost/spirit/home/support/handles_container.hpp>
#include <boost/fusion/include/flatten_view.hpp>
#include <boost/fusion/include/for_each.hpp>
#include <boost/fusion/include/zip_view.hpp>
namespace custom
{
BOOST_SPIRIT_TERMINAL(flatten);
}
namespace boost {
namespace spirit
{
///////////////////////////////////////////////////////////////////////////
// Enablers
///////////////////////////////////////////////////////////////////////////
template <>
struct use_directive<qi::domain, custom::tag::flatten> // enables flatten
: mpl::true_ {};
}
}
namespace custom
{
template <typename Subject>
struct flatten_directive : boost::spirit::qi::unary_parser<flatten_directive<Subject> >
{
typedef Subject subject_type;
flatten_directive(Subject const& subject)
: subject(subject) {}
template <typename Context, typename Iterator>
struct attribute
{
typedef boost::fusion::flatten_view<typename
boost::spirit::traits::attribute_of<subject_type, Context, Iterator>::type>
type;//the attribute of the directive is a flatten_view of whatever is the attribute of the subject
};
template <typename Iterator, typename Context
, typename Skipper, typename Attribute>
bool parse(Iterator& first, Iterator const& last
, Context& context, Skipper const& skipper
, Attribute& attr) const
{
Iterator temp = first;
boost::spirit::qi::skip_over(first, last, skipper);
typename boost::spirit::traits::attribute_of<subject_type, Context, Iterator>::type original_attr;
if (subject.parse(first, last, context, skipper, original_attr))//parse normally
{
typename attribute<Context, Iterator>::type flattened_attr(original_attr);//flatten the attribute
typedef boost::fusion::vector<Attribute&,typename attribute<Context,Iterator>::type&> sequences;
boost::fusion::for_each(//assign to each element of Attribute the corresponding element of the flattened sequence
boost::fusion::zip_view<sequences>(
sequences(attr,flattened_attr)
)
,
[](const auto& pair)//substitute with a functor with templated operator() to support c++98/03
{
boost::spirit::traits::assign_to(boost::fusion::at_c<1>(pair),boost::fusion::at_c<0>(pair));
}
);
return true;
}
first = temp;
return false;
}
template <typename Context>
boost::spirit::info what(Context& context) const
{
return info("flatten", subject.what(context));
}
Subject subject;
};
}//custom
///////////////////////////////////////////////////////////////////////////
// Parser generators: make_xxx function (objects)
///////////////////////////////////////////////////////////////////////////
namespace boost {
namespace spirit {
namespace qi
{
template <typename Subject, typename Modifiers>
struct make_directive<custom::tag::flatten, Subject, Modifiers>
{
typedef custom::flatten_directive<Subject> result_type;
result_type operator()(unused_type, Subject const& subject, unused_type) const
{
return result_type(subject);
}
};
}
}
}
namespace boost {
namespace spirit {
namespace traits
{
///////////////////////////////////////////////////////////////////////////
template <typename Subject>
struct has_semantic_action<custom::flatten_directive<Subject> >
: unary_has_semantic_action<Subject> {};
///////////////////////////////////////////////////////////////////////////
template <typename Subject, typename Attribute, typename Context
, typename Iterator>
struct handles_container<custom::flatten_directive<Subject>, Attribute
, Context, Iterator>
: unary_handles_container<Subject, Attribute, Context, Iterator> {};
}
}
}
main.cpp
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>
#include <boost/fusion/include/flatten_view.hpp>
#include <boost/fusion/include/copy.hpp>
#include "flatten_directive.hpp"
#include <string>
namespace qi = boost::spirit::qi;
namespace client {
struct record {
std::string type;
std::string delimiter;
uint32_t length = 0;
std::string name;
};
}
BOOST_FUSION_ADAPT_STRUCT(client::record, type, delimiter, length, name)
namespace client {
std::ostream& operator<<(std::ostream& os, record const& v) { return os << boost::fusion::tuple_open('[') << boost::fusion::tuple_close(']') << boost::fusion::tuple_delimiter(", ") << boost::fusion::as_vector(v); }
}
namespace client
{
template <typename Iterator, typename Skipper = qi::ascii::space_type>
struct record_parser : qi::grammar<Iterator, record(), Skipper>
{
record_parser() : record_parser::base_type(start)
{
using namespace qi;
type = +(graph - '(');
delimiter_double_quote = '"' >> +~char_('"') >> '"';
delimiter_single_quote = "'" >> +~char_("'") >> "'";
delimiter = '(' >> (delimiter_double_quote | delimiter_single_quote) >> ')';
name = +(graph - ';');
length = '(' >> uint_ >> ')';
start =
custom::flatten[
lit("record")
>> '{'
>> type
>> (
delimiter >> attr(uint32_t())//the attributes of both branches must be exactly identical
| attr(std::string("")) >> length//const char[1]!=std::string int!=uint32_t
)
>> name
>> ';'
>> '}'
]
;
}
private:
qi::rule<Iterator, record(), Skipper> start;
qi::rule<Iterator, uint32_t(), Skipper> length;
qi::rule<Iterator, std::string(), Skipper> delimiter;
// lexemes
qi::rule<Iterator, std::string()> type, delimiter_double_quote, delimiter_single_quote, name;
};
}
int main()
{
for (std::string const storage : {
"record { string(5) Alex; }",
"record { string(\"|\") Alex; }",
"record { string Alex; }",
"record { string (\"|\")(5) Alex; }"
})
{
typedef std::string::const_iterator iterator_type;
typedef client::record_parser<iterator_type> record_parser;
record_parser g; // Our grammar
client::record rec;
auto iter = storage.begin(), end = storage.end();
bool r = phrase_parse(iter, end, g, qi::ascii::space, rec);
if (r) {
std::cout << "Parsing succeeded: " << rec << std::endl;
}
else {
std::cout << "Parsing failed\n";
}
if (iter != end) {
std::cout << "Remaining: '" << std::string(iter, end) << "'...\n";
}
}
}
Here's the more typical approach that parsed variant<std::string, uint32_t> so the AST reflects that only one can be present:
With Nil-Param
With the same misunderstanding as in my first answer, allowing both params to be optional:
Live On Coliru
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>
#include <boost/optional/optional_io.hpp>
#include <string>
namespace qi = boost::spirit::qi;
namespace client {
struct nil { friend std::ostream& operator<<(std::ostream& os, nil) { return os << "(nil)"; } };
struct record {
std::string type;
boost::variant<nil, std::string, uint32_t> param;
std::string name;
};
}
BOOST_FUSION_ADAPT_STRUCT(client::record, type, param, name)
namespace client
{
template <typename Iterator, typename Skipper = qi::ascii::space_type>
struct record_parser : qi::grammar<Iterator, record(), Skipper>
{
record_parser() : record_parser::base_type(start)
{
using namespace qi;
type = +(graph - '(');
delimiter_double_quote = '"' >> +~char_('"') >> '"' ;
delimiter_single_quote = "'" >> +~char_("'") >> "'" ;
delimiter = '(' >> (delimiter_double_quote | delimiter_single_quote) >> ')';
name = +(graph - ';');
length = '(' >> uint_ >> ')';
start = eps > lit("record") >> '{'
>> type
>> (delimiter | length | attr(nil{}))
>> name >> ';' >> '}'
;
}
private:
qi::rule<Iterator, record(), Skipper> start;
qi::rule<Iterator, uint32_t(), Skipper> length;
qi::rule<Iterator, std::string(), Skipper> delimiter;
// lexemes
qi::rule<Iterator, std::string()> type, delimiter_double_quote, delimiter_single_quote, name;
};
}
int main()
{
for (std::string const storage : {
"record { string(5) Alex; }",
"record { string(\"|\") Alex; }",
"record { string Alex; }",
})
{
typedef std::string::const_iterator iterator_type;
typedef client::record_parser<iterator_type> record_parser;
record_parser g; // Our grammar
client::record rec;
auto iter = storage.begin(), end = storage.end();
bool r = phrase_parse(iter, end, g, qi::ascii::space, rec);
if (r) {
std::cout << "Parsing succeeded: " << boost::fusion::as_vector(rec) << std::endl;
} else {
std::cout << "Parsing failed\n";
}
if (iter != end) {
std::cout << "Remaining: '" << std::string(iter, end) << "'...\n";
}
}
}
Prints
Parsing succeeded: (string 5 Alex)
Parsing succeeded: (string | Alex)
Parsing succeeded: (string (nil) Alex)
Without Nil-Param
Requiring exactly one:
Live On Coliru
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>
#include <boost/optional/optional_io.hpp>
#include <string>
namespace qi = boost::spirit::qi;
namespace client {
struct record {
std::string type;
boost::variant<std::string, uint32_t> param;
std::string name;
};
}
BOOST_FUSION_ADAPT_STRUCT(client::record, type, param, name)
namespace client
{
template <typename Iterator, typename Skipper = qi::ascii::space_type>
struct record_parser : qi::grammar<Iterator, record(), Skipper>
{
record_parser() : record_parser::base_type(start)
{
using namespace qi;
type = +(graph - '(');
delimiter_double_quote = '"' >> +~char_('"') >> '"' ;
delimiter_single_quote = "'" >> +~char_("'") >> "'" ;
delimiter = '(' >> (delimiter_double_quote | delimiter_single_quote) >> ')';
name = +(graph - ';');
length = '(' >> uint_ >> ')';
start = eps > lit("record") >> '{'
>> type
>> (delimiter | length)
>> name >> ';' >> '}'
;
}
private:
qi::rule<Iterator, record(), Skipper> start;
qi::rule<Iterator, uint32_t(), Skipper> length;
qi::rule<Iterator, std::string(), Skipper> delimiter;
// lexemes
qi::rule<Iterator, std::string()> type, delimiter_double_quote, delimiter_single_quote, name;
};
}
int main()
{
for (std::string const storage : {
"record { string(5) Alex; }",
"record { string(\"|\") Alex; }",
"record { string Alex; }",
})
{
typedef std::string::const_iterator iterator_type;
typedef client::record_parser<iterator_type> record_parser;
record_parser g; // Our grammar
client::record rec;
auto iter = storage.begin(), end = storage.end();
bool r = phrase_parse(iter, end, g, qi::ascii::space, rec);
if (r) {
std::cout << "Parsing succeeded: " << boost::fusion::as_vector(rec) << std::endl;
} else {
std::cout << "Parsing failed\n";
}
if (iter != end) {
std::cout << "Remaining: '" << std::string(iter, end) << "'...\n";
}
}
}
Prints
Parsing succeeded: (string 5 Alex)
Parsing succeeded: (string | Alex)
terminate called after throwing an instance of 'boost::exception_detail::clone_impl<boost::exception_detail::error_info_injector<boost::spirit::qi::expectation_failure<__gnu_cxx::__normal_iterator<char const*, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > > > >'
what(): boost::spirit::qi::expectation_failure

Treat escaped newline as line continuation

Here is an example of the syntax -- two groups of items:
I_name m_name parameter1=value parameter2=value
I_name m_name parameter1=value \
parameter2=value
My question is how to define the skip-type.
It is not just space_type but space_type minus newline.
But newline followed by backslash is a skip-type.
E.g.
I define name like that:
qi::rule<Iterator, std::string(), ascii::space_type> m_sName;
m_sName %= qi::lexeme[ascii::alpha >> *ascii::alnum];
This is obviously not correct, as the space_type must include newline-backslash.
The following grammar works for me.
*("\\\n" | ~qi::char_('\n')) % '\n'
It will ignore any newline after the backslash. And the following is a simple test.
#include <vector>
#include <string>
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#define BOOST_TEST_MODULE example
#include <boost/test/unit_test.hpp>
typedef std::vector<std::string> Lines;
inline auto ParseLines(std::string const& str) {
Lines lines;
namespace qi = boost::spirit::qi;
if (qi::parse(
str.begin(), str.end(),
*("\\\n" | ~qi::char_('\n')) % '\n',
lines)) {
return lines;
}
else {
throw std::invalid_argument("Parse error at ParseLines");
}
}
BOOST_AUTO_TEST_CASE(TestParseLines) {
std::string const str =
"I_name m_name parameter1=value parameter2=value\n"
"I_name m_name parameter1 = value \\\n"
"parameter2 = value";
Lines const expected{
"I_name m_name parameter1=value parameter2=value",
"I_name m_name parameter1 = value parameter2 = value"
};
BOOST_TEST(ParseLines(str) == expected);
}
You should use "-std=c++14 -lboost_unit_test_framework" for compilation. Anyway, it is easy to convert the code for c++03.
qi::blank is exactly that. It's qi::space without newlines.
You can do this too: ("\\\n" | qi::blank)
To be able to declare a rule with such a skipper, define a skipper grammar:
template <typename It>
struct my_skipper : qi::grammar<It> {
my_skipper() : my_skipper::base_type(start) {}
qi::rule<It> start = ("\\\n" | qi::blank);
};
Full Demo
Live On Coliru
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapted.hpp>
#include <map>
namespace qi = boost::spirit::qi;
namespace ast {
struct record {
std::string iname, mname;
std::map<std::string, std::string> params;
};
using records = std::vector<record>;
}
BOOST_FUSION_ADAPT_STRUCT(ast::record, iname, mname, params)
template <typename It>
struct my_parser : qi::grammar<It, ast::records()> {
using Skipper = qi::rule<It>;
my_parser() : my_parser::base_type(start) {
skipper = ("\\\n" | qi::blank);
name = +qi::graph;
key = +(qi::graph - '=');
param = key >> '=' >> name;
record = name >> name >> *param;
records = *(record >> +qi::eol);
start = qi::skip(qi::copy(skipper)) [ records ];
}
private:
Skipper skipper;
qi::rule<It, ast::records(), Skipper> records;
qi::rule<It, ast::record(), Skipper> record;
qi::rule<It, ast::records()> start;
qi::rule<It, std::pair<std::string, std::string>()> param;
qi::rule<It, std::string()> name, key;
};
int main() {
#if 1
using It = boost::spirit::istream_iterator;
It f(std::cin >> std::noskipws), l;
#else
using It = std::string::const_iterator;
std::string const input = "something here a=1\n";
It f = input.begin(), l = input.end();
#endif
ast::records data;
bool ok = qi::parse(f, l, my_parser<It>(), data);
if (ok) {
std::cout << "Parsed:\n";
for (auto& r : data) {
std::cout << "\t" << r.iname << " " << r.mname;
for (auto& p : r.params)
std::cout << " [" << p.first << ": " << p.second << "]";
std::cout << "\n";
}
} else {
std::cout << "Parse failed\n";
}
if (f!=l)
std::cout << "Remaining input: '" << std::string(f,l) << "'\n";
}
Prints (for the input in your question):
Parsed:
I_name m_name [parameter1: value] [parameter2: value]
I_name m_name [parameter1: value] [parameter2: value]