Related
Closed. This question needs to be more focused. It is not currently accepting answers.
Want to improve this question? Update the question so it focuses on one problem only by editing this post.
Closed 11 months ago.
Improve this question
I would like to implement a parser for a nested (and optional) structure with boos::spirit as "namespace" in C++ language.
What is the simplest way to do it?
You can create a recursive rule like so:
namespace_ = lexeme["namespace"] >> '{' >> *namespace_ >> '}';
To also allow optional name identifiers:
rule<It, std::string()> identifier_ = //
raw[(alpha | '_') >> *(alnum | '_')]; //
namespace_ =
lexeme["namespace"] >> -identifier_ >> '{' >> *namespace_ >> '}';
To also account for C++-style comments:
using Skip = rule<It>;
Skip skip = space //
| "//" >> *~char_("\r\n") >> (eol | eoi) //
| "/*" >> *(char_ - "*/") >> "*/" //
;
rule<It, Skip> namespace_;
rule<It, std::string()> identifier_ = //
raw[(alpha | '_') >> *(alnum | '_')]; //
namespace_ =
lexeme["namespace"] >> -identifier_ >> '{' >> *namespace_ >> '}';
Demo:
Live On Coliru
#include <boost/spirit/include/qi.hpp>
auto parse(std::string_view input) {
using namespace boost::spirit::qi;
using It = std::string_view::const_iterator;
using Skip = rule<It>;
Skip skip = space //
| "//" >> *~char_("\r\n") >> (eol | eoi) //
| "/*" >> *(char_ - "*/") >> "*/" //
;
rule<It, Skip> namespace_;
rule<It, std::string()> identifier_ = //
raw[(alpha | '_') >> *(alnum | '_')]; //
namespace_ =
lexeme["namespace"] >> -identifier_ >> '{' >> *namespace_ >> '}';
phrase_parse(begin(input), end(input), eps > *namespace_ > eoi, skip);
}
int main() {
parse(R"(
namespace X { namespace Y {
namespace Z1 {
}
namespace Z2 {
}
}} // namespace X::Y
)");
}
BONUS
Adding AST representation and debug output of the parsed data: Live On Coliruhttp://coliru.stacked-crooked.com/a/58542397b7f751e0
#include <boost/fusion/adapted.hpp>
#include <boost/spirit/include/qi.hpp>
#include <iomanip>
namespace Ast {
using Id = std::string;
struct Namespace;
using Namespaces = std::vector<Namespace>;
struct Namespace {
boost::optional<Id> id;
Namespaces children;
};
}
BOOST_FUSION_ADAPT_STRUCT(Ast::Namespace, id, children)
auto parse(std::string_view input) {
using namespace boost::spirit::qi;
using It = std::string_view::const_iterator;
using Skip = rule<It>;
Skip skip = space //
| "//" >> *~char_("\r\n") >> (eol | eoi) //
| "/*" >> *(char_ - "*/") >> "*/" //
;
rule<It, Ast::Namespace(), Skip> namespace_;
rule<It, Ast::Id()> identifier_ = //
raw[(alpha | '_') >> *(alnum | '_')]; //
namespace_ =
lexeme["namespace"] >> -identifier_ >> '{' >> *namespace_ >> '}';
Ast::Namespaces parsed;
phrase_parse(begin(input), end(input), eps > *namespace_ > eoi, skip, parsed);
return parsed;
}
namespace Ast {
void print(std::ostream& os, Namespace const& ns, unsigned indent = 0) {
os << std::setw(indent) << ""
<< "namespace " << ns.id.value_or("/*anonymous*/") << " {\n";
for (auto& child : ns.children) {
print(os, child, indent+2);
}
os << std::setw(indent) << "" << "}\n";
}
}
int main() {
auto program = parse(R"(
namespace X { namespace Y {
namespace Z1 {
}
namespace Z2 {
}
}} // namespace X::Y
namespace { }
)");
for (auto& ns : program) {
print(std::cout, ns);
}
}
Prints
namespace X {
namespace Y {
namespace Z1 {
}
namespace Z2 {
}
}
}
namespace /*anonymous*/ {
}
UPDATE
In response to the comments I made a more involved example that parses input where struct can appear at global or namespace level (or, indeed inside a struct namespace), like:
namespace Math {
long factorial(int x);
}
struct GlobalA {
int foo();
double bar(string stuff, int i, bool flag);
struct Nested {
/* todo implementation */
};
};
namespace X { namespace Y {
struct Mixin{};
namespace Z1 {
struct Derived : GlobalA, Mixin {
void qux();
};
}
namespace Z2 {
}
}} // namespace X::Y
namespace { }
See it Live On Coliru
//#define BOOST_SPIRIT_DEBUG
#include <boost/fusion/adapted.hpp>
#include <boost/fusion/include/io.hpp>
#include <boost/optional/optional_io.hpp>
#include <boost/spirit/include/qi.hpp>
namespace Ast {
struct Id : std::string {
using std::string::string;
using std::string::operator=;
};
struct Namespace;
using Namespaces = std::vector<Namespace>;
struct VariableDeclaration { Id type, name; };
using VariableDeclarations = std::vector<VariableDeclaration>;
struct FunctionDeclaration {
Id return_type, name;
VariableDeclarations args;
};
struct StructDeclaration;
using Declaration = boost::variant< //
VariableDeclaration, //
FunctionDeclaration, //
boost::recursive_wrapper<StructDeclaration>>;
using Bases = std::list<Id>;
using Members = std::vector<Declaration>;
struct StructDeclaration {
Id name;
Bases bases;
Members members;
};
using NsMember = boost::variant<Declaration, Namespace>;
using NsMembers = std::vector<NsMember>;
struct Namespace {
boost::optional<Id> id;
NsMembers members;
};
using Program = NsMembers;
} // namespace Ast
BOOST_FUSION_ADAPT_STRUCT(Ast::VariableDeclaration, type, name)
BOOST_FUSION_ADAPT_STRUCT(Ast::FunctionDeclaration, return_type, name, args)
BOOST_FUSION_ADAPT_STRUCT(Ast::StructDeclaration, name, bases, members)
BOOST_FUSION_ADAPT_STRUCT(Ast::Namespace, id, members)
///// BEGIN DEBUG OUTPUT FACILITIES
namespace Ast {
static std::ostream& operator<<(std::ostream& os, Namespace const& ns) {
os << "namespace " << ns.id.value_or("/*anonymous*/") << " {\n";
for (auto& mem : ns.members)
os << mem;
return os << "}\n";
}
static std::ostream& operator<<(std::ostream& os, FunctionDeclaration const& fd) {
os << fd.return_type << " " << fd.name << "(";
for (bool first = true; auto& arg : fd.args) {
os << (std::exchange(first, false) ? "" : ", ") //
<< arg.type << " " << arg.name;
}
return os << ");";
}
static std::ostream& operator<<(std::ostream& os, VariableDeclaration const& vd) {
return os << vd.type << " " << vd.name << ";";
}
static std::ostream& operator<<(std::ostream& os, StructDeclaration const& sd) {
os << "struct " << sd.name;
if (sd.bases.size())
for (bool first = true; auto const& base : sd.bases)
os << (std::exchange(first, false) ? " : " : ", ") << base;
os << " {\n";
for (auto& mem : sd.members)
os << mem << "\n";
return os << "};\n";
}
}
///// END DEBUG OUTPUT FACILITIES
namespace qi = boost::spirit::qi;
template <typename It> struct ProgramParser : qi::grammar<It, Ast::Program()> {
ProgramParser() : ProgramParser::base_type(start) {
using namespace qi;
keywords_ += "if", "do", "for", "else", "while", "not", "and", "or",
"xor", "continue", "break", "case", "goto", "struct", "class",
"enum", "namespace";
kw_lexeme = keywords_ >> !(alnum|'_');
skipper_ = space //
| "//" >> *~char_("\r\n") >> (eol | eoi) //
| "/*" >> *(char_ - "*/") >> "*/" //
;
identifier_ = !kw_lexeme >> raw[(alpha | '_') >> *(alnum | '_')];
vardecl_ = identifier_ >> identifier_;
fundecl_ = identifier_ >> identifier_ >> //
'(' >> -(vardecl_ % ',') >> ')' >> ';';
decl_ = fundecl_ | vardecl_ | struct_;
Ast::Bases const no_bases;
baselist_ = ':' >> identifier_ % ',' | attr(no_bases);
struct_ = //
lexeme["struct" >> !graph] >> identifier_ //
>> baselist_ >> '{' //
>> *decl_ //
>> '}' >> ';';
nsmember_ = namespace_ | decl_;
namespace_ = lexeme["namespace" >> ((!graph) | '{')] >>
-identifier_ >> '{' >> *nsmember_ >> '}';
program_ = *nsmember_;
start = skip(skipper_.alias())[program_ > eoi];
BOOST_SPIRIT_DEBUG_NODES((start)(program_)(nsmember_)(namespace_)(
struct_)(decl_)(vardecl_)(fundecl_)(baselist_)(identifier_))
}
private:
qi::symbols<char> keywords_;
qi::rule<It> kw_lexeme;
qi::rule<It, Ast::Program()> start;
qi::rule<It, Ast::Id()> identifier_;
using Skip = qi::rule<It>;
Skip skipper_;
qi::rule<It, Ast::Bases(), Skip> baselist_;
qi::rule<It, Ast::Declaration(), Skip> decl_;
qi::rule<It, Ast::FunctionDeclaration(), Skip> fundecl_;
qi::rule<It, Ast::Namespace(), Skip> namespace_;
qi::rule<It, Ast::NsMember(), Skip> nsmember_;
qi::rule<It, Ast::Program(), Skip> program_;
qi::rule<It, Ast::StructDeclaration(), Skip> struct_;
qi::rule<It, Ast::VariableDeclaration(), Skip> vardecl_;
};
Ast::Program parse_program(std::string_view input) {
using It = std::string_view::const_iterator;
Ast::Program parsed;
static ProgramParser<It> const p;
parse(begin(input), end(input), p, parsed);
return parsed;
}
int main() {
auto program = parse_program(R"(
namespace Math {
long factorial(int x);
}
struct GlobalA {
int foo();
double bar(string stuff, int i, bool flag);
struct Nested {
/* todo implementation */
};
};
namespace X { namespace Y {
struct Mixin{};
namespace Z1 {
struct Derived : GlobalA, Mixin {
void qux();
};
}
namespace Z2 {
}
}} // namespace X::Y
namespace { }
)");
for (auto& member : program)
std::cout << member << '\n';
}
The output (not pretty-printed):
namespace Math {
long factorial(int x);}
struct GlobalA {
int foo();
double bar(string stuff, int i, bool flag);
struct Nested {
};
};
namespace X {
namespace Y {
struct Mixin {
};
namespace Z1 {
struct Derived : GlobalA, Mixin {
void qux();
};
}
namespace Z2 {
}
}
}
namespace /*anonymous*/ {
}
Below is a very compact version of a grammar I'm trying to write using boost::spirit::qi.
Environment: VS2013, x86, Boost1.64
When #including the header file, the compiler complains about the line
rBlock = "{" >> +(rInvocation) >> "}";
with a very long log (I've only copied the beginning and the end):
more than one partial specialization matches the template argument list
...
...
see reference to function template instantiation
'boost::spirit::qi::rule
&boost::spirit::qi::rule::operator =>(const Expr &)' being compiled
Where is my mistake?
The header file:
//mygrammar.h
#pragma once
#include <boost/spirit/include/qi.hpp>
namespace myNS
{
typedef std::string Identifier;
typedef ::boost::spirit::qi::rule <const char*, Identifier()> myIdentifierRule;
typedef ::boost::variant<char, int> Expression;
typedef ::boost::spirit::qi::rule <const char*, Expression()> myExpressionRule;
struct IdntifierEqArgument
{
Identifier ident;
Expression arg;
};
typedef ::boost::variant < IdntifierEqArgument, Expression > Argument;
typedef ::boost::spirit::qi::rule <const char*, Argument()> myArgumentRule;
typedef ::std::vector<Argument> ArgumentList;
typedef ::boost::spirit::qi::rule <const char*, myNS::ArgumentList()> myArgumentListRule;
struct Invocation
{
Identifier identifier;
::boost::optional<ArgumentList> args;
};
typedef ::boost::spirit::qi::rule <const char*, Invocation()> myInvocationRule;
typedef ::std::vector<Invocation> Block;
typedef ::boost::spirit::qi::rule <const char*, myNS::Block()> myBlockRule;
}
BOOST_FUSION_ADAPT_STRUCT(
myNS::IdntifierEqArgument,
(auto, ident)
(auto, arg)
);
BOOST_FUSION_ADAPT_STRUCT(
myNS::Invocation,
(auto, identifier)
(auto, args)
);
namespace myNS
{
struct myRules
{
myIdentifierRule rIdentifier;
myExpressionRule rExpression;
myArgumentRule rArgument;
myArgumentListRule rArgumentList;
myInvocationRule rInvocation;
myBlockRule rBlock;
myRules()
{
using namespace ::boost::spirit;
using namespace ::boost::spirit::qi;
rIdentifier = as_string[((qi::alpha | '_') >> *(qi::alnum | '_'))];
rExpression = char_ | int_;
rArgument = (rIdentifier >> "=" >> rExpression) | rExpression;
rArgumentList = rArgument >> *("," >> rArgument);
rInvocation = rIdentifier >> "(" >> -rArgumentList >> ")";
rBlock = "{" >> +(rInvocation) >> "}";
}
};
}
I'm not exactly sure where the issue is triggered, but it clearly is a symptom of too many ambiguities in the attribute forwarding rules.
Conceptually this could be triggered by your attribute types having similar/compatible layouts. In language theory, you're looking at a mismatch between C++'s nominative type system versus the approximation of structural typing in the attribute propagation system. But enough theorism :)
I don't think attr_cast<> will save you here as it probably uses the same mechanics and heuristics under the hood.
It drew my attention that making the ArgumentList optional is ... not very useful (as an empty list already accurately reflects absense of arguments).
So I tried simplifying the rules:
rArgumentList = -(rArgument % ',');
rInvocation = rIdentifier >> '(' >> rArgumentList >> ')';
And the declared attribute type can be simply ArgumentList instead of boost::optional::ArgumentList.
This turns out to remove the ambiguity when propagating into the vector<Invocation>, so ... you're saved.
If this feels "accidental" to you, you should! What would I do if this hadn't removed the ambiguity "by chance"? I'd have created a semantic action to propagate the Invocation by simpler mechanics. There's a good chance that fusion::push_back(_val, _1) or similar would have worked.
See also Boost Spirit: "Semantic actions are evil"?
Review And Demo
In the cleaned up review here I present a few fixes/improvements and a test run that dumps the parsed AST.
Separate AST from parser (you don't want use qi in the AST types. You specifically do not want using namespace directives in the face of generic template libraries)
Do not use auto in the adapt macros. That's not a feature. Instead, since you can ostensibly use C++11, use the C++11 (decltype) based macros
BOOST_FUSION_ADAPT_STRUCT(myAST::IdntifierEqArgument, ident,arg);
BOOST_FUSION_ADAPT_STRUCT(myAST::Invocation, identifier,args);
AST is leading (also, prefer c++11 for clarity):
namespace myAST {
using Identifier = std::string;
using Expression = boost::variant<char, int>;
struct IdntifierEqArgument {
Identifier ident;
Expression arg;
};
using Argument = boost::variant<IdntifierEqArgument, Expression>;
using ArgumentList = std::vector<Argument>;
struct Invocation {
Identifier identifier;
ArgumentList args;
};
using Block = std::vector<Invocation>;
}
It's nice to have the definitions separate
Regarding the parser,
I'd prefer the qi::grammar convention. Also,
You didn't declare any of the rules with a skipper. I "guessed" from context that whitespace is insignificant outside of the rules for Expression and Identifier.
Expression ate every char_, so also would eat ')' or even '3'. I noticed this only when testing and after debugging with:
//#define BOOST_SPIRIT_DEBUG
BOOST_SPIRIT_DEBUG_NODES((start)(rBlock)(rInvocation)(rIdentifier)(rArgumentList)(rArgument)(rExpression))
I highly recommend using these facilities
All in all the parser comes down to
namespace myNS {
namespace qi = boost::spirit::qi;
template <typename Iterator = char const*>
struct myRules : qi::grammar<Iterator, myAST::Block()> {
myRules() : myRules::base_type(start) {
rIdentifier = qi::raw [(qi::alpha | '_') >> *(qi::alnum | '_')];
rExpression = qi::alpha | qi::int_;
rArgument = (rIdentifier >> '=' >> rExpression) | rExpression;
rArgumentList = -(rArgument % ',');
rInvocation = rIdentifier >> '(' >> rArgumentList >> ')';
rBlock = '{' >> +rInvocation >> '}';
start = qi::skip(qi::space) [ rBlock ];
BOOST_SPIRIT_DEBUG_NODES((start)(rBlock)(rInvocation)(rIdentifier)(rArgumentList)(rArgument)(rExpression))
}
private:
qi::rule<Iterator, myAST::Block()> start;
using Skipper = qi::space_type;
qi::rule<Iterator, myAST::Argument(), Skipper> rArgument;
qi::rule<Iterator, myAST::ArgumentList(), Skipper> rArgumentList;
qi::rule<Iterator, myAST::Invocation(), Skipper> rInvocation;
qi::rule<Iterator, myAST::Block(), Skipper> rBlock;
// implicit lexemes
qi::rule<Iterator, myAST::Identifier()> rIdentifier;
qi::rule<Iterator, myAST::Expression()> rExpression;
};
}
Adding a test driver
int main() {
std::string const input = R"(
{
foo()
bar(a, b, 42)
qux(someThing_awful01 = 9)
}
)";
auto f = input.data(), l = f + input.size();
myAST::Block block;
bool ok = parse(f, l, myNS::myRules<>{}, block);
if (ok) {
std::cout << "Parse success\n";
for (auto& invocation : block) {
std::cout << invocation.identifier << "(";
for (auto& arg : invocation.args) std::cout << arg << ",";
std::cout << ")\n";
}
}
else
std::cout << "Parse failed\n";
if (f!=l)
std::cout << "Remaining unparsed input: '" << std::string(f,l) << "'\n";
}
Complete Demo
See it Live On Coliru
//#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
namespace myAST {
using Identifier = std::string;
using Expression = boost::variant<char, int>;
struct IdntifierEqArgument {
Identifier ident;
Expression arg;
};
using Argument = boost::variant<IdntifierEqArgument, Expression>;
using ArgumentList = std::vector<Argument>;
struct Invocation {
Identifier identifier;
ArgumentList args;
};
using Block = std::vector<Invocation>;
// for debug printing
static inline std::ostream& operator<<(std::ostream& os, myAST::IdntifierEqArgument const& named) {
return os << named.ident << "=" << named.arg;
}
}
BOOST_FUSION_ADAPT_STRUCT(myAST::IdntifierEqArgument, ident,arg);
BOOST_FUSION_ADAPT_STRUCT(myAST::Invocation, identifier,args);
namespace myNS {
namespace qi = boost::spirit::qi;
template <typename Iterator = char const*>
struct myRules : qi::grammar<Iterator, myAST::Block()> {
myRules() : myRules::base_type(start) {
rIdentifier = qi::raw [(qi::alpha | '_') >> *(qi::alnum | '_')];
rExpression = qi::alpha | qi::int_;
rArgument = (rIdentifier >> '=' >> rExpression) | rExpression;
rArgumentList = -(rArgument % ',');
rInvocation = rIdentifier >> '(' >> rArgumentList >> ')';
rBlock = '{' >> +rInvocation >> '}';
start = qi::skip(qi::space) [ rBlock ];
BOOST_SPIRIT_DEBUG_NODES((start)(rBlock)(rInvocation)(rIdentifier)(rArgumentList)(rArgument)(rExpression))
}
private:
qi::rule<Iterator, myAST::Block()> start;
using Skipper = qi::space_type;
qi::rule<Iterator, myAST::Argument(), Skipper> rArgument;
qi::rule<Iterator, myAST::ArgumentList(), Skipper> rArgumentList;
qi::rule<Iterator, myAST::Invocation(), Skipper> rInvocation;
qi::rule<Iterator, myAST::Block(), Skipper> rBlock;
// implicit lexemes
qi::rule<Iterator, myAST::Identifier()> rIdentifier;
qi::rule<Iterator, myAST::Expression()> rExpression;
};
}
int main() {
std::string const input = R"(
{
foo()
bar(a, b, 42)
qux(someThing_awful01 = 9)
}
)";
auto f = input.data(), l = f + input.size();
myAST::Block block;
bool ok = parse(f, l, myNS::myRules<>{}, block);
if (ok) {
std::cout << "Parse success\n";
for (auto& invocation : block) {
std::cout << invocation.identifier << "(";
for (auto& arg : invocation.args) std::cout << arg << ",";
std::cout << ")\n";
}
}
else
std::cout << "Parse failed\n";
if (f!=l)
std::cout << "Remaining unparsed input: '" << std::string(f,l) << "'\n";
}
Prints output
Parse success
foo()
bar(a,b,42,)
qux(someThing_awful01=9,)
Remaining unparsed input: '
'
If there's a structure:
struct record
{
std::string type;
std::string delimiter;
uint32_t length;
std::string name;
record()
{
type = "";
delimiter = "";
length = 0;
name = "";
}
};
Which is adapted using boost::fusion, and the below grammar:
struct record_parser : qi::grammar<Iterator, record(), ascii::space_type>
{
record_parser() : record_parser::base_type(start)
{
using qi::lit;
using qi::uint_;
using qi::lexeme;
using ascii::char_;
using ascii::blank;
using ascii::string;
using qi::attr;
using qi::eps;
type %= lexeme[+(char_ - (blank|char('(')))];
delimiter_double_quote %= char('(') >> lexeme[char('"') >> +(char_ - char('"')) >> char('"') ] >> char(')');
delimiter_single_quote %= char('(') >> lexeme[char('\'') >> +(char_ - char('\'')) >> char('\'')] >> char(')');
delimiter %= (delimiter_double_quote | delimiter_single_quote);
name %= lexeme[+(char_ - (blank|char(';')))] >> char(';');
length %= (char('(') >> uint_ >> char(')'));
start %=
eps >
lit("record")
>> char('{')
>> type
>> (delimiter | attr("")) >> (length | attr(0))
>> name
>> char('}')
;
}
qi::rule<Iterator, std::string(), ascii::space_type> type;
qi::rule<Iterator, std::string(), ascii::space_type> delimiter_double_quote;
qi::rule<Iterator, std::string(), ascii::space_type> delimiter_single_quote;
qi::rule<Iterator, std::string(), ascii::space_type> delimiter;
qi::rule<Iterator, uint32_t(), ascii::space_type> length;
qi::rule<Iterator, std::string(), ascii::space_type> name;
qi::rule<Iterator, record(), ascii::space_type> start;
};
I am looking to parse 'delimiter' and 'length' as optional. However, one of them has to be present, and if one is present, the other one should not exist.
For Example:
record { string(5) Alex; }
record { string("|") Alex; }
But Not:
record { string(5)("|") Alex; }
record { string Alex; }
I have attempted to do it this way, but compilation fails:
start %=
eps >
lit("record")
>> char('{')
>> type
>> ((delimiter >> attr(0)) | (attr("") >> length))
>> name
>> char('}')
;
Thank you for your help in advance. Below is the full source code:
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_object.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>
#include <string>
namespace client
{
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace phoenix = boost::phoenix;
struct record
{
std::string type;
std::string delimiter;
uint32_t length;
std::string name;
record()
{
type = "";
delimiter = "";
length = 0;
name = "";
}
};
}
BOOST_FUSION_ADAPT_STRUCT(
client::record,
(std::string, type)
(std::string, delimiter)
(uint32_t, length)
(std::string, name)
)
namespace client
{
template <typename Iterator>
struct record_parser : qi::grammar<Iterator, record(), ascii::space_type>
{
record_parser() : record_parser::base_type(start)
{
using qi::lit;
using qi::uint_;
using qi::lexeme;
using ascii::char_;
using ascii::blank;
using ascii::string;
using qi::attr;
using qi::eps;
type %= lexeme[+(char_ - (blank|char('(')))];
delimiter_double_quote %= char('(') >> lexeme[char('"') >> +(char_ - char('"')) >> char('"') ] >> char(')');
delimiter_single_quote %= char('(') >> lexeme[char('\'') >> +(char_ - char('\'')) >> char('\'')] >> char(')');
delimiter %= (delimiter_double_quote | delimiter_single_quote);
name %= lexeme[+(char_ - (blank|char(';')))] >> char(';');
length %= (char('(') >> uint_ >> char(')'));
start %=
eps >
lit("record")
>> char('{')
>> type
>> (delimiter | attr("")) >> (length | attr(0))
>> name
>> char('}')
;
}
qi::rule<Iterator, std::string(), ascii::space_type> type;
qi::rule<Iterator, std::string(), ascii::space_type> delimiter_double_quote;
qi::rule<Iterator, std::string(), ascii::space_type> delimiter_single_quote;
qi::rule<Iterator, std::string(), ascii::space_type> delimiter;
qi::rule<Iterator, uint32_t(), ascii::space_type> length;
qi::rule<Iterator, std::string(), ascii::space_type> name;
qi::rule<Iterator, record(), ascii::space_type> start;
};
}
////////////////////////////////////////////////////////////////////////////
// Main program
////////////////////////////////////////////////////////////////////////////
int main()
{
std::string storage = "record { string(5) Alex; }";
using boost::spirit::ascii::space;
typedef std::string::const_iterator iterator_type;
typedef client::record_parser<iterator_type> record_parser;
record_parser g; // Our grammar
client::record rec;
std::string::const_iterator iter = storage.begin();
std::string::const_iterator end = storage.end();
bool r = phrase_parse(iter, end, g, space, rec);
if (r && iter == end)
{
std::cout << boost::fusion::tuple_open('[');
std::cout << boost::fusion::tuple_close(']');
std::cout << boost::fusion::tuple_delimiter(", ");
std::cout << "-------------------------\n";
std::cout << "Parsing succeeded\n";
std::cout << "got: " << boost::fusion::as_vector(rec) << std::endl;
std::cout << "\n-------------------------\n";
}
else
{
std::string::const_iterator some = iter+30;
std::string context(iter, (some>end)?end:some);
std::cout << "-------------------------\n";
std::cout << "Parsing failed\n";
std::cout << "stopped at -->" << context << "...\n";
std::cout << "-------------------------\n";
}
return 0;
}
You can just write out the combinations:
>> (
delimiter >> attr(0)
| attr("") >> length
| attr("") >> attr(0)
)
The best way to make it work with automatic attribute propagation is to use an AST structure that is similar:
namespace client {
struct record {
std::string type;
struct param_t {
std::string delimiter;
uint32_t length = 0;
} param;
std::string name;
};
}
BOOST_FUSION_ADAPT_STRUCT(client::record::param_t, delimiter, length)
BOOST_FUSION_ADAPT_STRUCT(client::record, type, param, name)
Full Demo Live On Coliru
Note how much simpler the grammar has been made (all those char(' ') things are unnecessary; use lexemes only if you declare a skipper; use ~char_ instead of character set subtraction; use graph instead of char_ - space etc.).
type = +(graph - '(');
delimiter_double_quote = '"' >> +~char_('"') >> '"' ;
delimiter_single_quote = "'" >> +~char_("'") >> "'" ;
delimiter = '(' >> (delimiter_double_quote | delimiter_single_quote) >> ')';
name = +(graph - ';');
length = '(' >> uint_ >> ')';
start = eps > lit("record") >> '{'
>> type
>> (
delimiter >> attr(0)
| attr("") >> length
| attr("") >> attr(0)
)
>> name >> ';' >> '}'
;
Full code:
Live On Coliru
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>
#include <string>
namespace qi = boost::spirit::qi;
namespace client {
struct record {
std::string type;
struct param_t {
std::string delimiter;
uint32_t length = 0;
} param;
std::string name;
};
}
BOOST_FUSION_ADAPT_STRUCT(client::record::param_t, delimiter, length)
BOOST_FUSION_ADAPT_STRUCT(client::record, type, param, name)
namespace client {
std::ostream& operator<<(std::ostream& os, record::param_t const& v) { return os << boost::fusion::as_vector(v); }
std::ostream& operator<<(std::ostream& os, record const& v) { return os << boost::fusion::as_vector(v); }
}
namespace client
{
template <typename Iterator, typename Skipper = qi::ascii::space_type>
struct record_parser : qi::grammar<Iterator, record(), Skipper>
{
record_parser() : record_parser::base_type(start)
{
using namespace qi;
type = +(graph - '(');
delimiter_double_quote = '"' >> +~char_('"') >> '"' ;
delimiter_single_quote = "'" >> +~char_("'") >> "'" ;
delimiter = '(' >> (delimiter_double_quote | delimiter_single_quote) >> ')';
name = +(graph - ';');
length = '(' >> uint_ >> ')';
start = eps > lit("record") >> '{'
>> type
>> (
delimiter >> attr(0)
| attr("") >> length
| attr("") >> attr(0)
)
>> name >> ';' >> '}'
;
}
private:
qi::rule<Iterator, record(), Skipper> start;
qi::rule<Iterator, uint32_t(), Skipper> length;
qi::rule<Iterator, std::string(), Skipper> delimiter;
// lexemes
qi::rule<Iterator, std::string()> type, delimiter_double_quote, delimiter_single_quote, name;
};
}
int main()
{
for (std::string const storage : {
"record { string(5) Alex; }",
"record { string(\"|\") Alex; }",
})
{
typedef std::string::const_iterator iterator_type;
typedef client::record_parser<iterator_type> record_parser;
record_parser g; // Our grammar
client::record rec;
auto iter = storage.begin(), end = storage.end();
bool r = phrase_parse(iter, end, g, qi::ascii::space, rec);
if (r) {
std::cout << "Parsing succeeded: " << rec << std::endl;
} else {
std::cout << "Parsing failed\n";
}
if (iter != end) {
std::cout << "Remaining: '" << std::string(iter, end) << "'...\n";
}
}
}
Prints:
Parsing succeeded: (string ( 5) Alex)
Parsing succeeded: (string (| 0) Alex)
Because it's 2016, adding a X3 example too. Once again, taking the variant approach, which I find to be typical in Spirit code.
namespace AST {
struct record {
std::string type;
boost::variant<std::string, uint32_t> param;
std::string name;
};
}
BOOST_FUSION_ADAPT_STRUCT(AST::record, type, param, name)
namespace parser {
using namespace x3;
auto quoted = [](char q) { return q >> +~char_(q) >> q; };
static auto const type = +(graph - '(');
static auto const delimiter = '(' >> (quoted('"') | quoted('\'')) >> ')';
static auto const name = +(graph - ';');
static auto const length = '(' >> uint_ >> ')';
static auto const start = lit("record") >> '{' >> type >> (delimiter | length) >> name >> ';' >> '}';
}
That's all. The calling code is virtually unchanged:
int main()
{
for (std::string const storage : {
"record { string(5) Alex; }",
"record { string(\"|\") Alex; }",
"record { string Alex; }",
})
{
typedef std::string::const_iterator iterator_type;
AST::record rec;
auto iter = storage.begin(), end = storage.end();
bool r = phrase_parse(iter, end, parser::start, x3::ascii::space, rec);
if (r) {
std::cout << "Parsing succeeded: " << boost::fusion::as_vector(rec) << std::endl;
} else {
std::cout << "Parsing failed\n";
}
if (iter != end) {
std::cout << "Remaining: '" << std::string(iter, end) << "'\n";
}
}
}
Everything compiles a lot quicker and I'd not be surprised if the resultant code was at least twice as fast at runtime too.
Live On Coliru
#include <boost/spirit/home/x3.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
namespace x3 = boost::spirit::x3;
namespace AST {
struct record {
std::string type;
boost::variant<std::string, uint32_t> param;
std::string name;
};
}
BOOST_FUSION_ADAPT_STRUCT(AST::record, type, param, name)
namespace parser {
using namespace x3;
auto quoted = [](char q) { return q >> +~char_(q) >> q; };
static auto const type = +(graph - '(');
static auto const delimiter = '(' >> (quoted('"') | quoted('\'')) >> ')';
static auto const name = +(graph - ';');
static auto const length = '(' >> uint_ >> ')';
static auto const start = lit("record") >> '{' >> type >> (delimiter | length) >> name >> ';' >> '}';
}
#include <iostream>
#include <boost/fusion/include/io.hpp>
#include <boost/fusion/include/as_vector.hpp>
#include <boost/optional/optional_io.hpp>
int main()
{
for (std::string const storage : {
"record { string(5) Alex; }",
"record { string(\"|\") Alex; }",
"record { string Alex; }",
})
{
typedef std::string::const_iterator iterator_type;
AST::record rec;
auto iter = storage.begin(), end = storage.end();
bool r = phrase_parse(iter, end, parser::start, x3::ascii::space, rec);
if (r) {
std::cout << "Parsing succeeded: " << boost::fusion::as_vector(rec) << std::endl;
} else {
std::cout << "Parsing failed\n";
}
if (iter != end) {
std::cout << "Remaining: '" << std::string(iter, end) << "'\n";
}
}
}
Prints
Parsing succeeded: (string 5 Alex)
Parsing succeeded: (string | Alex)
Parsing failed
Remaining: 'record { string Alex; }'
sehe's first answer is perfect (or would be if he corrected what he realized in the comments), but I just wanted to add an explanation of the problem and a possible alternative. The code below is based on that excellent answer.
You have a couple of problems with the attributes of your start rule. The attribute you want to get is record which is basically tuple<string,string,uint32_t,string>. Let's see the attributes of several parsers:
Something similar (but simpler) to your original rule:
Attribute of: "lit("record") >> char_('{') >> type >> delimiter >> length >> name >> char_('}')"
tuple<char,string,string,uint32_t,string,char>
As you can see you have two extra char caused b your use of char_(has an attribute of char) instead of lit(has no attribute). omit[char_] could also work, but would be a little silly.
Let's change char_ to lit:
Attribute of: "lit("record") >> lit('{') >> type >> delimiter >> length >> name >> lit('}')"
tuple<string,string,uint32_t,string>
Which is what we want.
Your original rule with lit:
Attribute of: "lit("record") >> lit('{') >> type >> (delimiter | attr("")) >> (length | attr(0)) >> name >> lit('}')"
tuple<string,variant<string,char const (&)[1]>,variant<uint32_t,int>,string>
Since the branches of | aren't identical, you get variants instead of the attribute you want. (In this simple case everything works as if there were no variants though)
Let's remove the variants (since they cause errors in more complex scenarios):
Attribute of: "lit("record") >> lit('{') >> type >> (delimiter | attr(string())) >> (length | attr(uint32_t())) >> name >> lit('}')"
tuple<string,string,uint32_t,string>
This works in the cases you want but also when both are missing.
sehe's approach:
Attribute of: "lit("record") >> lit('{') >> type >> ((delimiter >> attr(uint32_t())) | (attr(string()) >> length)) >> name >> lit('}')"
tuple<string,tuple<string,uint32_t>,string>
Looking at this synthesized attribute you can see the need to create the param_t helper struct to make your record attribute match.
See on Coliru a way to "calculate" the previous attributes.
The possible alternative is a custom directive using boost::fusion::flatten_view. Keep in mind that this directive has very little testing so I would recommend the approach shown by sehe, but it seems to work (at least in this case).
The example in this question with this directive on Wandbox
Several other examples where this directive can be useful
flatten_directive.hpp
#pragma once
#include <boost/spirit/home/qi/meta_compiler.hpp>
#include <boost/spirit/home/qi/skip_over.hpp>
#include <boost/spirit/home/qi/parser.hpp>
#include <boost/spirit/home/support/unused.hpp>
#include <boost/spirit/home/support/common_terminals.hpp>
#include <boost/spirit/home/qi/detail/attributes.hpp>
#include <boost/spirit/home/support/info.hpp>
#include <boost/spirit/home/support/handles_container.hpp>
#include <boost/fusion/include/flatten_view.hpp>
#include <boost/fusion/include/for_each.hpp>
#include <boost/fusion/include/zip_view.hpp>
namespace custom
{
BOOST_SPIRIT_TERMINAL(flatten);
}
namespace boost {
namespace spirit
{
///////////////////////////////////////////////////////////////////////////
// Enablers
///////////////////////////////////////////////////////////////////////////
template <>
struct use_directive<qi::domain, custom::tag::flatten> // enables flatten
: mpl::true_ {};
}
}
namespace custom
{
template <typename Subject>
struct flatten_directive : boost::spirit::qi::unary_parser<flatten_directive<Subject> >
{
typedef Subject subject_type;
flatten_directive(Subject const& subject)
: subject(subject) {}
template <typename Context, typename Iterator>
struct attribute
{
typedef boost::fusion::flatten_view<typename
boost::spirit::traits::attribute_of<subject_type, Context, Iterator>::type>
type;//the attribute of the directive is a flatten_view of whatever is the attribute of the subject
};
template <typename Iterator, typename Context
, typename Skipper, typename Attribute>
bool parse(Iterator& first, Iterator const& last
, Context& context, Skipper const& skipper
, Attribute& attr) const
{
Iterator temp = first;
boost::spirit::qi::skip_over(first, last, skipper);
typename boost::spirit::traits::attribute_of<subject_type, Context, Iterator>::type original_attr;
if (subject.parse(first, last, context, skipper, original_attr))//parse normally
{
typename attribute<Context, Iterator>::type flattened_attr(original_attr);//flatten the attribute
typedef boost::fusion::vector<Attribute&,typename attribute<Context,Iterator>::type&> sequences;
boost::fusion::for_each(//assign to each element of Attribute the corresponding element of the flattened sequence
boost::fusion::zip_view<sequences>(
sequences(attr,flattened_attr)
)
,
[](const auto& pair)//substitute with a functor with templated operator() to support c++98/03
{
boost::spirit::traits::assign_to(boost::fusion::at_c<1>(pair),boost::fusion::at_c<0>(pair));
}
);
return true;
}
first = temp;
return false;
}
template <typename Context>
boost::spirit::info what(Context& context) const
{
return info("flatten", subject.what(context));
}
Subject subject;
};
}//custom
///////////////////////////////////////////////////////////////////////////
// Parser generators: make_xxx function (objects)
///////////////////////////////////////////////////////////////////////////
namespace boost {
namespace spirit {
namespace qi
{
template <typename Subject, typename Modifiers>
struct make_directive<custom::tag::flatten, Subject, Modifiers>
{
typedef custom::flatten_directive<Subject> result_type;
result_type operator()(unused_type, Subject const& subject, unused_type) const
{
return result_type(subject);
}
};
}
}
}
namespace boost {
namespace spirit {
namespace traits
{
///////////////////////////////////////////////////////////////////////////
template <typename Subject>
struct has_semantic_action<custom::flatten_directive<Subject> >
: unary_has_semantic_action<Subject> {};
///////////////////////////////////////////////////////////////////////////
template <typename Subject, typename Attribute, typename Context
, typename Iterator>
struct handles_container<custom::flatten_directive<Subject>, Attribute
, Context, Iterator>
: unary_handles_container<Subject, Attribute, Context, Iterator> {};
}
}
}
main.cpp
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>
#include <boost/fusion/include/flatten_view.hpp>
#include <boost/fusion/include/copy.hpp>
#include "flatten_directive.hpp"
#include <string>
namespace qi = boost::spirit::qi;
namespace client {
struct record {
std::string type;
std::string delimiter;
uint32_t length = 0;
std::string name;
};
}
BOOST_FUSION_ADAPT_STRUCT(client::record, type, delimiter, length, name)
namespace client {
std::ostream& operator<<(std::ostream& os, record const& v) { return os << boost::fusion::tuple_open('[') << boost::fusion::tuple_close(']') << boost::fusion::tuple_delimiter(", ") << boost::fusion::as_vector(v); }
}
namespace client
{
template <typename Iterator, typename Skipper = qi::ascii::space_type>
struct record_parser : qi::grammar<Iterator, record(), Skipper>
{
record_parser() : record_parser::base_type(start)
{
using namespace qi;
type = +(graph - '(');
delimiter_double_quote = '"' >> +~char_('"') >> '"';
delimiter_single_quote = "'" >> +~char_("'") >> "'";
delimiter = '(' >> (delimiter_double_quote | delimiter_single_quote) >> ')';
name = +(graph - ';');
length = '(' >> uint_ >> ')';
start =
custom::flatten[
lit("record")
>> '{'
>> type
>> (
delimiter >> attr(uint32_t())//the attributes of both branches must be exactly identical
| attr(std::string("")) >> length//const char[1]!=std::string int!=uint32_t
)
>> name
>> ';'
>> '}'
]
;
}
private:
qi::rule<Iterator, record(), Skipper> start;
qi::rule<Iterator, uint32_t(), Skipper> length;
qi::rule<Iterator, std::string(), Skipper> delimiter;
// lexemes
qi::rule<Iterator, std::string()> type, delimiter_double_quote, delimiter_single_quote, name;
};
}
int main()
{
for (std::string const storage : {
"record { string(5) Alex; }",
"record { string(\"|\") Alex; }",
"record { string Alex; }",
"record { string (\"|\")(5) Alex; }"
})
{
typedef std::string::const_iterator iterator_type;
typedef client::record_parser<iterator_type> record_parser;
record_parser g; // Our grammar
client::record rec;
auto iter = storage.begin(), end = storage.end();
bool r = phrase_parse(iter, end, g, qi::ascii::space, rec);
if (r) {
std::cout << "Parsing succeeded: " << rec << std::endl;
}
else {
std::cout << "Parsing failed\n";
}
if (iter != end) {
std::cout << "Remaining: '" << std::string(iter, end) << "'...\n";
}
}
}
Here's the more typical approach that parsed variant<std::string, uint32_t> so the AST reflects that only one can be present:
With Nil-Param
With the same misunderstanding as in my first answer, allowing both params to be optional:
Live On Coliru
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>
#include <boost/optional/optional_io.hpp>
#include <string>
namespace qi = boost::spirit::qi;
namespace client {
struct nil { friend std::ostream& operator<<(std::ostream& os, nil) { return os << "(nil)"; } };
struct record {
std::string type;
boost::variant<nil, std::string, uint32_t> param;
std::string name;
};
}
BOOST_FUSION_ADAPT_STRUCT(client::record, type, param, name)
namespace client
{
template <typename Iterator, typename Skipper = qi::ascii::space_type>
struct record_parser : qi::grammar<Iterator, record(), Skipper>
{
record_parser() : record_parser::base_type(start)
{
using namespace qi;
type = +(graph - '(');
delimiter_double_quote = '"' >> +~char_('"') >> '"' ;
delimiter_single_quote = "'" >> +~char_("'") >> "'" ;
delimiter = '(' >> (delimiter_double_quote | delimiter_single_quote) >> ')';
name = +(graph - ';');
length = '(' >> uint_ >> ')';
start = eps > lit("record") >> '{'
>> type
>> (delimiter | length | attr(nil{}))
>> name >> ';' >> '}'
;
}
private:
qi::rule<Iterator, record(), Skipper> start;
qi::rule<Iterator, uint32_t(), Skipper> length;
qi::rule<Iterator, std::string(), Skipper> delimiter;
// lexemes
qi::rule<Iterator, std::string()> type, delimiter_double_quote, delimiter_single_quote, name;
};
}
int main()
{
for (std::string const storage : {
"record { string(5) Alex; }",
"record { string(\"|\") Alex; }",
"record { string Alex; }",
})
{
typedef std::string::const_iterator iterator_type;
typedef client::record_parser<iterator_type> record_parser;
record_parser g; // Our grammar
client::record rec;
auto iter = storage.begin(), end = storage.end();
bool r = phrase_parse(iter, end, g, qi::ascii::space, rec);
if (r) {
std::cout << "Parsing succeeded: " << boost::fusion::as_vector(rec) << std::endl;
} else {
std::cout << "Parsing failed\n";
}
if (iter != end) {
std::cout << "Remaining: '" << std::string(iter, end) << "'...\n";
}
}
}
Prints
Parsing succeeded: (string 5 Alex)
Parsing succeeded: (string | Alex)
Parsing succeeded: (string (nil) Alex)
Without Nil-Param
Requiring exactly one:
Live On Coliru
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>
#include <boost/optional/optional_io.hpp>
#include <string>
namespace qi = boost::spirit::qi;
namespace client {
struct record {
std::string type;
boost::variant<std::string, uint32_t> param;
std::string name;
};
}
BOOST_FUSION_ADAPT_STRUCT(client::record, type, param, name)
namespace client
{
template <typename Iterator, typename Skipper = qi::ascii::space_type>
struct record_parser : qi::grammar<Iterator, record(), Skipper>
{
record_parser() : record_parser::base_type(start)
{
using namespace qi;
type = +(graph - '(');
delimiter_double_quote = '"' >> +~char_('"') >> '"' ;
delimiter_single_quote = "'" >> +~char_("'") >> "'" ;
delimiter = '(' >> (delimiter_double_quote | delimiter_single_quote) >> ')';
name = +(graph - ';');
length = '(' >> uint_ >> ')';
start = eps > lit("record") >> '{'
>> type
>> (delimiter | length)
>> name >> ';' >> '}'
;
}
private:
qi::rule<Iterator, record(), Skipper> start;
qi::rule<Iterator, uint32_t(), Skipper> length;
qi::rule<Iterator, std::string(), Skipper> delimiter;
// lexemes
qi::rule<Iterator, std::string()> type, delimiter_double_quote, delimiter_single_quote, name;
};
}
int main()
{
for (std::string const storage : {
"record { string(5) Alex; }",
"record { string(\"|\") Alex; }",
"record { string Alex; }",
})
{
typedef std::string::const_iterator iterator_type;
typedef client::record_parser<iterator_type> record_parser;
record_parser g; // Our grammar
client::record rec;
auto iter = storage.begin(), end = storage.end();
bool r = phrase_parse(iter, end, g, qi::ascii::space, rec);
if (r) {
std::cout << "Parsing succeeded: " << boost::fusion::as_vector(rec) << std::endl;
} else {
std::cout << "Parsing failed\n";
}
if (iter != end) {
std::cout << "Remaining: '" << std::string(iter, end) << "'...\n";
}
}
}
Prints
Parsing succeeded: (string 5 Alex)
Parsing succeeded: (string | Alex)
terminate called after throwing an instance of 'boost::exception_detail::clone_impl<boost::exception_detail::error_info_injector<boost::spirit::qi::expectation_failure<__gnu_cxx::__normal_iterator<char const*, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > > > >'
what(): boost::spirit::qi::expectation_failure
I am interested in Boost Spirit nowadays and trying to build something. Can we implement something like a const in C++ using Spirit? For instance, user will define an item like;
constant var PROG_LANG="Java";
"constant var" seems weird, I accept but you got the idea. I searched the internet but can't found anything about it.
What the BigBoss said :)
Only I'd do without the semantic actions - making it far less... verbose (See also Boost Spirit: "Semantic actions are evil"?):
vdef =
("constant" >> attr(true) | attr(false)) >>
"var" >> identifier >> '=' >> identifier_value >> ';' ;
That's all. This uses qi::attr to account for the default (missing constant keyword).
Here's a full demo with output:
http://liveworkspace.org/code/c9e4bef100d2249eb4d4b88205f85c4b
Output:
parse success: 'var myvariable = "has some value";'
data: false;myvariable;has some value;
parse success: 'constant var myvariable = "has some value";'
data: true;myvariable;has some value;
Code:
#include <boost/fusion/adapted.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/karma.hpp>
namespace qi = boost::spirit::qi;
namespace karma = boost::spirit::karma;
namespace phx = boost::phoenix;
struct var_definition {
bool is_constant;
std::string name;
std::string value;
var_definition() : is_constant( false ) {}
};
BOOST_FUSION_ADAPT_STRUCT(var_definition, (bool, is_constant)(std::string, name)(std::string, value))
void doParse(const std::string& input)
{
typedef std::string::const_iterator It;
qi::rule<It, std::string()> identifier, identifier_value;
qi::rule<It, var_definition(), qi::space_type> vdef;
{
using namespace qi;
identifier_value = '"' >> lexeme [ +~char_('"') ] > '"';
identifier = lexeme [ +graph ];
vdef =
("constant" >> attr(true) | attr(false)) >>
"var" >> identifier >> '=' >> identifier_value >> ';' ;
}
var_definition data;
It f(std::begin(input)), l(std::end(input));
bool ok = qi::phrase_parse(f,l,vdef,qi::space,data);
if (ok)
{
std::cout << "parse success: '" << input << "'\n";
std::cout << "data: " << karma::format_delimited(karma::auto_, ';', data) << "\n";
}
}
int main()
{
doParse("var myvariable = \"has some value\";");
doParse("constant var myvariable = \"has some value\";");
}
I don't get your question correctly, spirit is a parser and it has nothing to do with the meaning of constant it can only parse it, but if you mean parse an optional variable like constant then it can be something line:
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;
typedef std::string::const_iterator it;
struct var_definition {
bool is_constant;
std::string name;
std::string value;
var_definition() : is_constant( false ) {}
};
qi::rule<it, std::string()> identifier;
qi::rule<it, std::string()> identifier_value;
qi::rule<it, var_definition(), boost::spirit::ascii::space_type> vdef;
void mark_var_as_constant(var_definition& vd) {vd.is_constant=true;}
void set_var_name(var_definition& vd, std::string const& val) {vd.name=val;}
void set_var_value(var_definition& vd, std::string const& val) {vd.value=val;}
vdef %=
-qi::lit("constant")[phx::bind(mark_var_as_constant, qi::_val)] >>
qi::lit("var") >>
identifier[phx::bind(set_var_name, qi::_val, qi::_1)] >>
qi::char_('=') >>
identifier_value[phx::bind(set_var_value, qi::_val, qi::_1)] >>
qi::char_(';');
Of course there are other ways, for example:
(qi::lit("constant")[phx::bind(mark_var_as_constant, qi::_val)] | qi::eps)
And the easiest is:
qi::hold[ qi::lit("constant")[phx::bind(mark_var_as_constant, qi::_val)] ]
I'm writing a DSL and using a Boost Spirit lexer to tokenize my input. In my grammar, I want a rule similar to this (where tok is the lexer):
header_block =
tok.name >> ':' >> tok.stringval > ';' >>
tok.description >> ':' >> tok.stringval > ';'
;
Rather than specifying reserved words for the language (e.g. "name", "description") and deal with synchronizing these between the lexer and grammar, I want to just tokenize everything that matches [a-zA-Z_]\w* as a single token type (e.g. tok.symbol), and let the grammar sort it out. If I weren't using a lexer, I might do something like this:
stringval = lexeme['"' >> *(char_ - '"') >> '"'];
header_block =
lit("name") >> ':' >> stringval > ';' >>
lit("description") >> ':' >> stringval > ';'
;
With a lexer in the mix, I can compile the following rule, but of course it matches more than I want — it doesn't care about the particular symbol values "name" and "description":
header_block =
tok.symbol >> ':' >> tok.stringval > ';' >>
tok.symbol >> ':' >> tok.stringval > ';'
;
What I'm looking for is something like this:
header_block =
specific_symbol_matcher("name") >> ':' >> tok.stringval > ';' >>
specific_symbol_matcher("description") >> ':' >> tok.stringval > ';'
;
Does Qi provide anything I can use instead of my specific_symbol_matcher hand-waving, there? I'd rather not write my own matcher if I can get close using stuff that's provided. If I must write my own matcher, can anyone suggest how to do that?
If the token exposes a std::string, you should just be able to do:
statement =
( tok.keyword [ qi::_pass = (_1 == "if") ] >> if_stmt )
| ( tok.keyword [ qi::_pass = (_1 == "while) ] >> while_stmt );
If I understood you right, this is, more or less, what you were asking.
While you are at it, do look at qi::symbol<> and an especially nifty application of that, known as the Nabialek Trick.
Bonus material
In case you're just struggling to make an existing grammar work with a lexer, here's what I just did with the calc_utree_ast.cpp example to make it work with a lexer.
It shows
how you can directly consume the exposed attributes
how you can still parse based on char-literals, as long as these char literals are registered as (anonymous) tokens
how the (simple) expression gammar was minimally changed
how the skipping behaviour was moved into the lexer
///////////////////////////////////////////////////////////////////////////////
//
// Plain calculator example demonstrating the grammar. The parser is a
// syntax checker only and does not do any semantic evaluation.
//
// [ JDG May 10, 2002 ] spirit1
// [ JDG March 4, 2007 ] spirit2
// [ HK November 30, 2010 ] spirit2/utree
// [ SH July 17, 2012 ] use a lexer
//
///////////////////////////////////////////////////////////////////////////////
#define BOOST_SPIRIT_DEBUG
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/support_utree.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_function.hpp>
#include <iostream>
#include <string>
namespace lex = boost::spirit::lex;
namespace qi = boost::spirit::qi;
namespace spirit = boost::spirit;
namespace phx = boost::phoenix;
// base iterator type
typedef std::string::const_iterator BaseIteratorT;
// token type
typedef lex::lexertl::token<BaseIteratorT, boost::mpl::vector<char, uint32_t> > TokenT;
// lexer type
typedef lex::lexertl::actor_lexer<TokenT> LexerT;
template <typename LexerT_>
struct Tokens: public lex::lexer<LexerT_> {
Tokens() {
// literals
uint_ = "[0-9]+";
space = " \t\r\n";
// literal rules
this->self += uint_;
this->self += '+';
this->self += '-';
this->self += '*';
this->self += '/';
this->self += '(';
this->self += ')';
using lex::_pass;
using lex::pass_flags;
this->self += space [ _pass = pass_flags::pass_ignore ];
}
lex::token_def<uint32_t> uint_;
lex::token_def<lex::omit> space;
};
namespace client
{
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace spirit = boost::spirit;
struct expr
{
template <typename T1, typename T2 = void>
struct result { typedef void type; };
expr(char op) : op(op) {}
void operator()(spirit::utree& expr, spirit::utree const& rhs) const
{
spirit::utree lhs;
lhs.swap(expr);
expr.push_back(spirit::utf8_symbol_range_type(&op, &op+1));
expr.push_back(lhs);
expr.push_back(rhs);
}
char const op;
};
boost::phoenix::function<expr> const plus = expr('+');
boost::phoenix::function<expr> const minus = expr('-');
boost::phoenix::function<expr> const times = expr('*');
boost::phoenix::function<expr> const divide = expr('/');
struct negate_expr
{
template <typename T1, typename T2 = void>
struct result { typedef void type; };
void operator()(spirit::utree& expr, spirit::utree const& rhs) const
{
char const op = '-';
expr.clear();
expr.push_back(spirit::utf8_symbol_range_type(&op, &op+1));
expr.push_back(rhs);
}
};
boost::phoenix::function<negate_expr> neg;
///////////////////////////////////////////////////////////////////////////////
// Our calculator grammar
///////////////////////////////////////////////////////////////////////////////
template <typename Iterator>
struct calculator : qi::grammar<Iterator, spirit::utree()>
{
template <typename Tokens>
calculator(Tokens const& toks) : calculator::base_type(expression)
{
using qi::_val;
using qi::_1;
expression =
term [_val = _1]
>> *( ('+' >> term [plus(_val, _1)])
| ('-' >> term [minus(_val, _1)])
)
;
term =
factor [_val = _1]
>> *( ('*' >> factor [times(_val, _1)])
| ('/' >> factor [divide(_val, _1)])
)
;
factor =
toks.uint_ [_val = _1]
| '(' >> expression [_val = _1] >> ')'
| ('-' >> factor [neg(_val, _1)])
| ('+' >> factor [_val = _1])
;
BOOST_SPIRIT_DEBUG_NODE(expression);
BOOST_SPIRIT_DEBUG_NODE(term);
BOOST_SPIRIT_DEBUG_NODE(factor);
}
qi::rule<Iterator, spirit::utree()> expression, term, factor;
};
}
///////////////////////////////////////////////////////////////////////////////
// Main program
///////////////////////////////////////////////////////////////////////////////
int main()
{
std::cout << "/////////////////////////////////////////////////////////\n\n";
std::cout << "Expression parser...\n\n";
std::cout << "/////////////////////////////////////////////////////////\n\n";
std::cout << "Type an expression...or [q or Q] to quit\n\n";
using boost::spirit::utree;
typedef std::string::const_iterator iterator_type;
typedef Tokens<LexerT>::iterator_type IteratorT;
typedef client::calculator<IteratorT> calculator;
Tokens<LexerT> l;
calculator calc(l); // Our grammar
std::string str;
while (std::getline(std::cin, str))
{
if (str.empty() || str[0] == 'q' || str[0] == 'Q')
break;
std::string::const_iterator iter = str.begin();
std::string::const_iterator end = str.end();
utree ut;
bool r = lex::tokenize_and_parse(iter, end, l, calc, ut);
if (r && iter == end)
{
std::cout << "-------------------------\n";
std::cout << "Parsing succeeded: " << ut << "\n";
std::cout << "-------------------------\n";
}
else
{
std::string rest(iter, end);
std::cout << "-------------------------\n";
std::cout << "Parsing failed\n";
std::cout << "stopped at: \"" << rest << "\"\n";
std::cout << "-------------------------\n";
}
}
std::cout << "Bye... :-) \n\n";
return 0;
}
For the input
8*12312*(4+5)
It prints (without debug info)
Parsing succeeded: ( * ( * 8 12312 ) ( + 4 5 ) )