I want to use boost spirit to parse an expression like
function1(arg1, arg2, function2(arg1, arg2, arg3),
function3(arg1,arg2))
and call corresponding c++ functions. What should be the grammar to parse above expression and call the corresponding c++ function by phoneix::bind()?
I have 2 types of functions to call
1) string functions;
wstring GetSubString(wstring stringToCut, int position, int length);
wstring GetStringToken(wstring stringToTokenize, wstring seperators,
int tokenNumber );
2) Functions that return integer;
int GetCount();
int GetId(wstring srcId, wstring srcType);
Second Answer (more pragmatic)
Here's a second take, for comparison:
Just in case you really didn't want to parse into an abstract syntax tree representation, but rather evaluate the functions on-the-fly during parsing, you can simplify the grammar.
It comes in at 92 lines as opposed to 209 lines in the first answer. It really depends on what you're implementing which approach is more suitable.
This shorter approach has some downsides:
less flexible (not reusable)
less robust (if functions have side effects, they will happen even if parsing fails halfway)
less extensible (the supported functions are hardwired into the grammar1)
Full code:
//#define BOOST_SPIRIT_DEBUG
#define BOOST_SPIRIT_USE_PHOENIX_V3
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/phoenix/function.hpp>
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;
typedef boost::variant<int, std::string> value;
//////////////////////////////////////////////////
// Demo functions:
value AnswerToLTUAE() {
return 42;
}
value ReverseString(value const& input) {
auto& as_string = boost::get<std::string>(input);
return std::string(as_string.rbegin(), as_string.rend());
}
value Concatenate(value const& a, value const& b) {
std::ostringstream oss;
oss << a << b;
return oss.str();
}
BOOST_PHOENIX_ADAPT_FUNCTION_NULLARY(value, AnswerToLTUAE_, AnswerToLTUAE)
BOOST_PHOENIX_ADAPT_FUNCTION(value, ReverseString_, ReverseString, 1)
BOOST_PHOENIX_ADAPT_FUNCTION(value, Concatenate_, Concatenate, 2)
//////////////////////////////////////////////////
// Parser grammar
template <typename It, typename Skipper = qi::space_type>
struct parser : qi::grammar<It, value(), Skipper>
{
parser() : parser::base_type(expr_)
{
using namespace qi;
function_call_ =
(lit("AnswerToLTUAE") > '(' > ')')
[ _val = AnswerToLTUAE_() ]
| (lit("ReverseString") > '(' > expr_ > ')')
[ _val = ReverseString_(_1) ]
| (lit("Concatenate") > '(' > expr_ > ',' > expr_ > ')')
[ _val = Concatenate_(_1, _2) ]
;
string_ = as_string [
lexeme [ "'" >> *~char_("'") >> "'" ]
];
value_ = int_ | string_;
expr_ = function_call_ | value_;
on_error<fail> ( expr_, std::cout
<< phx::val("Error! Expecting ") << _4 << phx::val(" here: \"")
<< phx::construct<std::string>(_3, _2) << phx::val("\"\n"));
BOOST_SPIRIT_DEBUG_NODES((expr_)(function_call_)(value_)(string_))
}
private:
qi::rule<It, value(), Skipper> value_, function_call_, expr_, string_;
};
int main()
{
for (const std::string input: std::vector<std::string> {
"-99",
"'string'",
"AnswerToLTUAE()",
"ReverseString('string')",
"Concatenate('string', 987)",
"Concatenate('The Answer Is ', AnswerToLTUAE())",
})
{
auto f(std::begin(input)), l(std::end(input));
const static parser<decltype(f)> p;
value direct_eval;
bool ok = qi::phrase_parse(f,l,p,qi::space,direct_eval);
if (!ok)
std::cout << "invalid input\n";
else
{
std::cout << "input:\t" << input << "\n";
std::cout << "eval:\t" << direct_eval << "\n\n";
}
if (f!=l) std::cout << "unparsed: '" << std::string(f,l) << "'\n";
}
}
Note how, instead of using BOOST_PHOENIX_ADAPT_FUNCTION* we could have directly used boost::phoenix::bind.
The output is still the same:
input: -99
eval: -99
input: 'string'
eval: string
input: AnswerToLTUAE()
eval: 42
input: ReverseString('string')
eval: gnirts
input: Concatenate('string', 987)
eval: string987
input: Concatenate('The Answer Is ', AnswerToLTUAE())
eval: The Answer Is 42
1 This last downside is easily remedied by using the 'Nabialek Trick'
First Answer (complete)
I've gone and implemented a simple recursive expression grammar for functions having up-to-three parameters:
for (const std::string input: std::vector<std::string> {
"-99",
"'string'",
"AnswerToLTUAE()",
"ReverseString('string')",
"Concatenate('string', 987)",
"Concatenate('The Answer Is ', AnswerToLTUAE())",
})
{
auto f(std::begin(input)), l(std::end(input));
const static parser<decltype(f)> p;
expr parsed_script;
bool ok = qi::phrase_parse(f,l,p,qi::space,parsed_script);
if (!ok)
std::cout << "invalid input\n";
else
{
const static generator<boost::spirit::ostream_iterator> g;
std::cout << "input:\t" << input << "\n";
std::cout << "tree:\t" << karma::format(g, parsed_script) << "\n";
std::cout << "eval:\t" << evaluate(parsed_script) << "\n";
}
if (f!=l) std::cout << "unparsed: '" << std::string(f,l) << "'\n";
}
Which prints:
input: -99
tree: -99
eval: -99
input: 'string'
tree: 'string'
eval: string
input: AnswerToLTUAE()
tree: nullary_function_call()
eval: 42
input: ReverseString('string')
tree: unary_function_call('string')
eval: gnirts
input: Concatenate('string', 987)
tree: binary_function_call('string',987)
eval: string987
input: Concatenate('The Answer Is ', AnswerToLTUAE())
tree: binary_function_call('The Answer Is ',nullary_function_call())
eval: The Answer Is 42
Some notes:
I separated parsing from execution (which is always a good idea IMO)
I implemented function evaluation for zero, one or two parameters (this should be easy to extend)
Values are assumed to be integers or strings (should be easy to extend)
I added a karma generator to display the parsed expression (with a TODO marked in the comment)
I hope this helps:
//#define BOOST_SPIRIT_DEBUG
#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/include/karma.hpp>
#include <boost/variant/recursive_wrapper.hpp>
namespace qi = boost::spirit::qi;
namespace karma = boost::spirit::karma;
namespace phx = boost::phoenix;
typedef boost::variant<int, std::string> value;
typedef boost::variant<value, boost::recursive_wrapper<struct function_call> > expr;
typedef std::function<value() > nullary_function_impl;
typedef std::function<value(value const&) > unary_function_impl;
typedef std::function<value(value const&, value const&)> binary_function_impl;
typedef boost::variant<nullary_function_impl, unary_function_impl, binary_function_impl> function_impl;
typedef qi::symbols<char, function_impl> function_table;
struct function_call
{
typedef std::vector<expr> arguments_t;
function_call() = default;
function_call(function_impl f, arguments_t const& arguments)
: f(f), arguments(arguments) { }
function_impl f;
arguments_t arguments;
};
BOOST_FUSION_ADAPT_STRUCT(function_call, (function_impl, f)(function_call::arguments_t, arguments))
#ifdef BOOST_SPIRIT_DEBUG
namespace std
{
static inline std::ostream& operator<<(std::ostream& os, nullary_function_impl const& f) { return os << "<nullary_function_impl>"; }
static inline std::ostream& operator<<(std::ostream& os, unary_function_impl const& f) { return os << "<unary_function_impl>"; }
static inline std::ostream& operator<<(std::ostream& os, binary_function_impl const& f) { return os << "<binary_function_impl>"; }
}
static inline std::ostream& operator<<(std::ostream& os, function_call const& call) { return os << call.f << "(" << call.arguments.size() << ")"; }
#endif
//////////////////////////////////////////////////
// Evaluation
value evaluate(const expr& e);
struct eval : boost::static_visitor<value>
{
eval() {}
value operator()(const value& v) const
{
return v;
}
value operator()(const function_call& call) const
{
return boost::apply_visitor(invoke(call.arguments), call.f);
}
private:
struct invoke : boost::static_visitor<value>
{
function_call::arguments_t const& _args;
invoke(function_call::arguments_t const& args) : _args(args) {}
value operator()(nullary_function_impl const& f) const {
return f();
}
value operator()(unary_function_impl const& f) const {
auto a = evaluate(_args.at(0));
return f(a);
}
value operator()(binary_function_impl const& f) const {
auto a = evaluate(_args.at(0));
auto b = evaluate(_args.at(1));
return f(a, b);
}
};
};
value evaluate(const expr& e)
{
return boost::apply_visitor(eval(), e);
}
//////////////////////////////////////////////////
// Demo functions:
value AnswerToLTUAE() {
return 42;
}
value ReverseString(value const& input) {
auto& as_string = boost::get<std::string>(input);
return std::string(as_string.rbegin(), as_string.rend());
}
value Concatenate(value const& a, value const& b) {
std::ostringstream oss;
oss << a << b;
return oss.str();
}
//////////////////////////////////////////////////
// Parser grammar
template <typename It, typename Skipper = qi::space_type>
struct parser : qi::grammar<It, expr(), Skipper>
{
parser() : parser::base_type(expr_)
{
using namespace qi;
n_ary_ops.add
("AnswerToLTUAE", nullary_function_impl{ &::AnswerToLTUAE })
("ReverseString", unary_function_impl { &::ReverseString })
("Concatenate" , binary_function_impl { &::Concatenate });
function_call_ = n_ary_ops > '(' > expr_list > ')';
string_ = qi::lexeme [ "'" >> *~qi::char_("'") >> "'" ];
value_ = qi::int_ | string_;
expr_list = -expr_ % ',';
expr_ = function_call_ | value_;
on_error<fail> ( expr_, std::cout
<< phx::val("Error! Expecting ") << _4 << phx::val(" here: \"")
<< phx::construct<std::string>(_3, _2) << phx::val("\"\n"));
BOOST_SPIRIT_DEBUG_NODES((expr_)(expr_list)(function_call_)(value_)(string_))
}
private:
function_table n_ary_ops;
template <typename Attr> using Rule = qi::rule<It, Attr(), Skipper>;
Rule<std::string> string_;
Rule<value> value_;
Rule<function_call> function_call_;
Rule<std::vector<expr>> expr_list;
Rule<expr> expr_;
};
//////////////////////////////////////////////////
// Output generator
template <typename It>
struct generator : karma::grammar<It, expr()>
{
generator() : generator::base_type(expr_)
{
using namespace karma;
nullary_ = eps << "nullary_function_call"; // TODO reverse lookup :)
unary_ = eps << "unary_function_call";
binary_ = eps << "binary_function_call";
function_ = nullary_ | unary_ | binary_;
function_call_ = function_ << expr_list;
expr_list = '(' << -(expr_ % ',') << ')';
value_ = karma::int_ | ("'" << karma::string << "'");
expr_ = function_call_ | value_;
}
private:
template <typename Attr> using Rule = karma::rule<It, Attr()>;
Rule<nullary_function_impl> nullary_;
Rule<unary_function_impl> unary_;
Rule<binary_function_impl> binary_;
Rule<function_impl> function_;
Rule<function_call> function_call_;
Rule<value> value_;
Rule<std::vector<expr>> expr_list;
Rule<expr> expr_;
};
int main()
{
for (const std::string input: std::vector<std::string> {
"-99",
"'string'",
"AnswerToLTUAE()",
"ReverseString('string')",
"Concatenate('string', 987)",
"Concatenate('The Answer Is ', AnswerToLTUAE())",
})
{
auto f(std::begin(input)), l(std::end(input));
const static parser<decltype(f)> p;
expr parsed_script;
bool ok = qi::phrase_parse(f,l,p,qi::space,parsed_script);
if (!ok)
std::cout << "invalid input\n";
else
{
const static generator<boost::spirit::ostream_iterator> g;
std::cout << "input:\t" << input << "\n";
std::cout << "tree:\t" << karma::format(g, parsed_script) << "\n";
std::cout << "eval:\t" << evaluate(parsed_script) << "\n\n";
}
if (f!=l) std::cout << "unparsed: '" << std::string(f,l) << "'\n";
}
}
Related
I am trying to model a parser for a subset of the C language, for a school project. However, I seem stuck in the process of generating recursive parsing rules for Boost.Spirit, as my rules either overflow the stack or simply do not pick up anything.
For example, I want to model the following syntax:
a ::= ... | A[a] | a1 op a2 | ...
There are some other subsets of syntax for this expression rule, but those are working without problems. For example, if I were to parse A[3*4], it should be read as a recursive parsing where A[...] (A[a] in the syntax) is the array accessor and 3*4 (a1 op a2 in the syntax) is the index.
I've tried defining the following rule objects in the grammar struct:
qi::rule<Iterator, Type(), Skipper> expr_arr;
qi::rule<Iterator, Type(), Skipper> expr_binary_arith;
qi::rule<Iterator, Type(), Skipper> expr_a;
And giving them the following grammar:
expr_arr %= qi::lexeme[identifier >> qi::omit['[']] >> expr_a >> qi::lexeme[qi::omit[']']];
expr_binary_arith %= expr_a >> op_binary_arith >> expr_a;
expr_a %= (expr_binary_arith | expr_arr);
where "op_binary_arith" is a qi::symbol<> object with the allowed operator symbols.
This compiles fine, but upon execution enters a supposedly endless loop, and the stack overflows. I've tried looking at the answer by Sehe in the following question: How to set max recursion in boost spirit.
However, I have been unsuccessful in setting a max recursion depth. Firstly, I failed to make it compile without errors for almost any of my attempts, but on the last attempt it built successfully, albeit with very unexpected results.
Can someone guide me in the right direction, as to how I should go about implementing this grammar correctly?
PEG grammars do not handle left-recursion well. In general you have to split out helper rules to write without left-recursion.
In your particular case, the goal production
a ::= ... | A[a] | a1 op a2 | ...
Seems a little off. This would allows foo[bar] or foo + bar but not foo + bar[qux].
Usually, the choice between array element reference or plain identifier is at a lower level of precedence (often "simple expression").
Here's a tiny elaboration:
literal = number_literal | string_literal; // TODO exapnd?
expr_arr = identifier >> '[' >> (expr_a % ',') >> ']';
simple_expression = literal | expr_arr | identifier;
expr_binary_arith = simple_expression >> op_binary_arith >> expr_a;
expr_a = expr_binary_arith | simple_expression;
Now you can parse e.g.:
for (std::string const& input : {
"A[3*4]",
"A[F[3]]",
"A[8 + F[0x31]]",
"3 * \"foo\"",
})
{
std::cout << std::quoted(input) << " -> ";
It f=begin(input), l=end(input);
AST::Expr e;
if (parse(f,l,g,e)) {
std::cout << "Parsed: " << e << "\n";
} else {
std::cout << "Failed\n";
}
if (f!=l) {
std::cout << "Remaining: " << std::quoted(std::string(f,l)) << "\b";
}
}
Which prints Live On Coliru
"A[3*4]" -> Parsed: A[3*4]
"A[F[3]]" -> Parsed: A[F[3]]
"A[8 + F[0x31]]" -> Parsed: A[8+F[49]]
"3 * \"foo\"" -> Parsed: 3*"foo"
NOTE I deliberately left efficiency and operator precedence out of the picture for now.
These are talked about in detail in other answers:
Boost::Spirit Expression Parser
Implementing operator precedence with boost spirit
Boost::Spirit : Optimizing an expression parser
And many more
Full Demo Listing
Live On Coliru
//#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <iomanip>
#include <experimental/iterator>
namespace qi = boost::spirit::qi;
namespace AST {
using Var = std::string;
struct String : std::string {
using std::string::string;
};
using Literal = boost::variant<String, intmax_t, double>;
enum class ArithOp {
addition, subtraction, division, multplication
};
struct IndexExpr;
struct BinOpExpr;
using Expr = boost::variant<
Literal,
Var,
boost::recursive_wrapper<IndexExpr>,
boost::recursive_wrapper<BinOpExpr>
>;
struct IndexExpr {
Expr expr;
std::vector<Expr> indices;
};
struct BinOpExpr {
Expr lhs, rhs;
ArithOp op;
};
std::ostream& operator<<(std::ostream& os, Literal const& lit) {
struct {
std::ostream& os;
void operator()(String const& s) const { os << std::quoted(s); }
void operator()(double d) const { os << d; }
void operator()(intmax_t i) const { os << i; }
} vis {os};
boost::apply_visitor(vis, lit);
return os;
}
std::ostream& operator<<(std::ostream& os, ArithOp const& op) {
switch(op) {
case ArithOp::addition: return os << '+';
case ArithOp::subtraction: return os << '-';
case ArithOp::division: return os << '/';
case ArithOp::multplication: return os << '*';
}
return os << '?';
}
std::ostream& operator<<(std::ostream& os, BinOpExpr const& e) {
return os << e.lhs << e.op << e.rhs;
}
std::ostream& operator<<(std::ostream& os, IndexExpr const& e) {
std::copy(
begin(e.indices),
end(e.indices),
std::experimental::make_ostream_joiner(os << e.expr << '[', ","));
return os << ']';
}
}
BOOST_FUSION_ADAPT_STRUCT(AST::IndexExpr, expr, indices)
BOOST_FUSION_ADAPT_STRUCT(AST::BinOpExpr, lhs, op, rhs)
template <typename Iterator, typename Skipper = qi::space_type>
struct G : qi::grammar<Iterator, AST::Expr()> {
G() : G::base_type(start) {
using namespace qi;
identifier = alpha >> *alnum;
number_literal =
qi::real_parser<double, qi::strict_real_policies<double> >{}
| "0x" >> qi::uint_parser<intmax_t, 16> {}
| qi::int_parser<intmax_t, 10> {}
;
string_literal = '"' >> *('\\' >> char_escape | ~char_('"')) >> '"';
literal = number_literal | string_literal; // TODO exapnd?
expr_arr = identifier >> '[' >> (expr_a % ',') >> ']';
simple_expression = literal | expr_arr | identifier;
expr_binary_arith = simple_expression >> op_binary_arith >> expr_a;
expr_a = expr_binary_arith | simple_expression;
start = skip(space) [expr_a];
BOOST_SPIRIT_DEBUG_NODES(
(start)
(expr_a)(expr_binary_arith)(simple_expression)(expr_a)
(literal)(number_literal)(string_literal)
(identifier))
}
private:
struct escape_sym : qi::symbols<char, char> {
escape_sym() {
this->add
("b", '\b')
("f", '\f')
("r", '\r')
("n", '\n')
("t", '\t')
("\\", '\\')
;
}
} char_escape;
struct op_binary_arith_sym : qi::symbols<char, AST::ArithOp> {
op_binary_arith_sym() {
this->add
("+", AST::ArithOp::addition)
("-", AST::ArithOp::subtraction)
("/", AST::ArithOp::division)
("*", AST::ArithOp::multplication)
;
}
} op_binary_arith;
qi::rule<Iterator, AST::Expr()> start;
qi::rule<Iterator, AST::IndexExpr(), Skipper> expr_arr;
qi::rule<Iterator, AST::BinOpExpr(), Skipper> expr_binary_arith;
qi::rule<Iterator, AST::Expr(), Skipper> simple_expression, expr_a;
// implicit lexemes
qi::rule<Iterator, AST::Literal()> literal, string_literal, number_literal;
qi::rule<Iterator, AST::Var()> identifier;
};
int main() {
using It = std::string::const_iterator;
G<It> const g;
for (std::string const& input : {
"A[3*4]",
"A[F[3]]",
"A[8 + F[0x31]]",
"3 * \"foo\"",
})
{
std::cout << std::quoted(input) << " -> ";
It f=begin(input), l=end(input);
AST::Expr e;
if (parse(f,l,g,e)) {
std::cout << "Parsed: " << e << "\n";
} else {
std::cout << "Failed\n";
}
if (f!=l) {
std::cout << "Remaining: " << std::quoted(std::string(f,l)) << "\b";
}
}
}
I am playing with boost.spirit library and I cannot manage to report a simple error message from my semantic action.
// supported parameter types (int or quoted strings)
parameter = bsqi::int_ | bsqi::lexeme[L'"' > *(bsqi_coding::char_ - L'"') > L'"'];
parameter.name("parameter");
// comma separator list of parameters (or no parameters)
parameters = -(parameter % L',');
parameters.name("parameters");
// action with parameters
action = (Actions > L'(' > parameters > L')')[bsqi::_pass = boost::phoenix::bind(&ValidateAction, bsqi::_1, bsqi::_2)];
action.name("action");
The Actions is just a symbol table (boost::spirit::qi::symbols). The attribute of parameters is std::vector of boost::variant which describes the parameters types. I would like to produces a meaningful error message within semantic action ValidateAction with also indicating position within input what is wrong. If I just assign _pass to false, parsing ends but the error message is something like 'expecting ' and not that e.g. 2nd parameter has wrong type (expected int instead of string).
Somewhere I read that I can throw an exception from my semantic action, but the problem is that I didn't find whether and how I can access iterators from parsed values. For example I wanted to use expectation_failure exception so my error handler automatically is called, but I need to pass iterators to the exception which seems impossible.
Is there any nice way how to report semantic failures with more detailed information except returning just false?
I'd use filepos_iterator and just throw an exception, so you have complete control over the reporting.
Let me see what I can come up with in the remaining 15 minutes I have
Ok, took a little bit more time but think it's an instructive demo:
Live On Coliru
#include <boost/fusion/adapted.hpp>
#include <boost/fusion/include/io.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/include/support_line_pos_iterator.hpp>
#include <boost/spirit/repository/include/qi_iter_pos.hpp>
#include <boost/lexical_cast.hpp>
namespace qi = boost::spirit::qi;
namespace qr = boost::spirit::repository::qi;
namespace px = boost::phoenix;
namespace qi_coding = boost::spirit::ascii;
using It = boost::spirit::line_pos_iterator<std::string::const_iterator>;
namespace ast {
enum actionid { f_unary, f_binary };
enum param_type { int_param, string_param };
static inline std::ostream& operator<<(std::ostream& os, actionid id) {
switch(id) {
case f_unary: return os << "f_unary";
case f_binary: return os << "f_binary";
default: return os << "(unknown)";
} }
static inline std::ostream& operator<<(std::ostream& os, param_type t) {
switch(t) {
case int_param: return os << "integer";
case string_param: return os << "string";
default: return os << "(unknown)";
} }
using param_value = boost::variant<int, std::string>;
struct parameter {
It position;
param_value value;
friend std::ostream& operator<<(std::ostream& os, parameter const& p) { return os << p.value; }
};
using parameters = std::vector<parameter>;
struct action {
/*
*action() = default;
*template <typename Sequence> action(Sequence const& seq) { boost::fusion::copy(seq, *this); }
*/
actionid id;
parameters params;
};
}
namespace std {
static inline std::ostream& operator<<(std::ostream& os, ast::parameters const& v) {
std::copy(v.begin(), v.end(), std::ostream_iterator<ast::parameter>(os, " "));
return os;
}
}
BOOST_FUSION_ADAPT_STRUCT(ast::action, id, params)
BOOST_FUSION_ADAPT_STRUCT(ast::parameter, position, value)
struct BadAction : std::exception {
It _where;
std::string _what;
BadAction(It it, std::string msg) : _where(it), _what(std::move(msg)) {}
It where() const { return _where; }
char const* what() const noexcept { return _what.c_str(); }
};
struct ValidateAction {
std::map<ast::actionid, std::vector<ast::param_type> > const specs {
{ ast::f_unary, { ast::int_param } },
{ ast::f_binary, { ast::int_param, ast::string_param } },
};
ast::action operator()(It source, ast::action parsed) const {
auto check = [](ast::parameter const& p, ast::param_type expected_type) {
if (p.value.which() != expected_type) {
auto name = boost::lexical_cast<std::string>(expected_type);
throw BadAction(p.position, "Type mismatch (expecting " + name + ")");
}
};
int i;
try {
auto& formals = specs.at(parsed.id);
auto& actuals = parsed.params;
auto arity = formals.size();
for (i=0; i<arity; ++i)
check(actuals.at(i), formals.at(i));
if (actuals.size() > arity)
throw BadAction(actuals.at(arity).position, "Excess parameters");
} catch(std::out_of_range const&) {
throw BadAction(source, "Missing parameter #" + std::to_string(i+1));
}
return parsed;
}
};
template <typename It, typename Skipper = qi::space_type>
struct Parser : qi::grammar<It, ast::action(), Skipper> {
Parser() : Parser::base_type(start) {
using namespace qi;
parameter = qr::iter_pos >> (int_ | lexeme['"' >> *~qi_coding::char_('"') >> '"']);
parameters = -(parameter % ',');
action = actions_ >> '(' >> parameters >> ')';
start = (qr::iter_pos >> action) [ _val = validate_(_1, _2) ];
BOOST_SPIRIT_DEBUG_NODES((parameter)(parameters)(action))
}
private:
qi::rule<It, ast::action(), Skipper> start, action;
qi::rule<It, ast::parameters(), Skipper> parameters;
qi::rule<It, ast::parameter(), Skipper> parameter;
px::function<ValidateAction> validate_;
struct Actions : qi::symbols<char, ast::actionid> {
Actions() { this->add("f_unary", ast::f_unary)("f_binary", ast::f_binary); }
} actions_;
};
int main() {
for (std::string const input : {
// good
"f_unary( 0 )",
"f_binary ( 47, \"hello\")",
// errors
"f_binary ( 47, \"hello\") bogus",
"f_unary ( 47, \"hello\") ",
"f_binary ( 47, \r\n 7) ",
})
{
std::cout << "-----------------------\n";
Parser<It> p;
It f(input.begin()), l(input.end());
auto printErrorContext = [f,l](std::ostream& os, It where) {
auto line = get_current_line(f, where, l);
os << " line:" << get_line(where)
<< ", col:" << get_column(line.begin(), where) << "\n";
while (!line.empty() && std::strchr("\r\n", *line.begin()))
line.advance_begin(1);
std::cerr << line << "\n";
std::cerr << std::string(std::distance(line.begin(), where), ' ') << "^ --- here\n";
};
ast::action data;
try {
if (qi::phrase_parse(f, l, p > qi::eoi, qi::space, data)) {
std::cout << "Parsed: " << boost::fusion::as_vector(data) << "\n";
}
} catch(qi::expectation_failure<It> const& e) {
printErrorContext(std::cerr << "Expectation failed: " << e.what_, e.first);
} catch(BadAction const& ba) {
printErrorContext(std::cerr << "BadAction: " << ba.what(), ba.where());
}
if (f!=l) {
std::cout << "Remaining unparsed: '" << std::string(f,l) << "'\n";
}
}
}
Printing:
-----------------------
Parsed: (f_unary 0 )
-----------------------
Parsed: (f_binary 47 hello )
-----------------------
Expectation failed: <eoi> line:1, col:25
f_binary ( 47, "hello") bogus
^ --- here
Remaining unparsed: 'f_binary ( 47, "hello") bogus'
-----------------------
BadAction: Excess parameters line:1, col:15
f_unary ( 47, "hello")
^ --- here
Remaining unparsed: 'f_unary ( 47, "hello") '
-----------------------
BadAction: Type mismatch (expecting string) line:2, col:8
7)
^ --- here
Remaining unparsed: 'f_binary ( 47,
7) '
Lets say I have code like this (line numbers for reference):
1:
2:function FuncName_1 {
3: var Var_1 = 3;
4: var Var_2 = 4;
5: ...
I want to write a grammar that parses such text, puts all indentifiers (function and variable names) infos into a tree (utree?).
Each node should preserve: line_num, column_num and symbol value. example:
root: FuncName_1 (line:2,col:10)
children[0]: Var_1 (line:3, col:8)
children[1]: Var_1 (line:4, col:9)
I want to put it into the tree because I plan to traverse through that tree and for each node I must know the 'context': (all parent nodes of current nodes).
E.g, while processing node with Var_1, I must know that this is a name for local variable for function FuncName_1 (that is currently being processed as node, but one level earlier)
I cannot figure out few things
Can this be done in Spirit with semantic actions and utree's ? Or should I use variant<> trees ?
How to pass to the node those three informations (column,line,symbol_name) at the same time ? I know I must use pos_iterator as iterator type for grammar but how to access those information in sematic action ?
I'm a newbie in Boost so I read the Spirit documentaiton over and over, I try to google my problems but I somehow cannot put all the pieces together ot find the solution. Seems like there was no one me with such use case like mine before (or I'm just not able to find it)
Looks like the only solutions with position iterator are the ones with parsing error handling, but this is not the case I'm interested in.
The code that only parses the code I was taking about is below but I dont know how to move forward with it.
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/support_line_pos_iterator.hpp>
namespace qi = boost::spirit::qi;
typedef boost::spirit::line_pos_iterator<std::string::const_iterator> pos_iterator_t;
template<typename Iterator=pos_iterator_t, typename Skipper=qi::space_type>
struct ParseGrammar: public qi::grammar<Iterator, Skipper>
{
ParseGrammar():ParseGrammar::base_type(SourceCode)
{
using namespace qi;
KeywordFunction = lit("function");
KeywordVar = lit("var");
SemiColon = lit(';');
Identifier = lexeme [alpha >> *(alnum | '_')];
VarAssignemnt = KeywordVar >> Identifier >> char_('=') >> int_ >> SemiColon;
SourceCode = KeywordFunction >> Identifier >> '{' >> *VarAssignemnt >> '}';
}
qi::rule<Iterator, Skipper> SourceCode;
qi::rule<Iterator > KeywordFunction;
qi::rule<Iterator, Skipper> VarAssignemnt;
qi::rule<Iterator> KeywordVar;
qi::rule<Iterator> SemiColon;
qi::rule<Iterator > Identifier;
};
int main()
{
std::string const content = "function FuncName_1 {\n var Var_1 = 3;\n var Var_2 = 4; }";
pos_iterator_t first(content.begin()), iter = first, last(content.end());
ParseGrammar<pos_iterator_t> resolver; // Our parser
bool ok = phrase_parse(iter,
last,
resolver,
qi::space);
std::cout << std::boolalpha;
std::cout << "\nok : " << ok << std::endl;
std::cout << "full : " << (iter == last) << std::endl;
if(ok && iter == last)
{
std::cout << "OK: Parsing fully succeeded\n\n";
}
else
{
int line = get_line(iter);
int column = get_column(first, iter);
std::cout << "-------------------------\n";
std::cout << "ERROR: Parsing failed or not complete\n";
std::cout << "stopped at: " << line << ":" << column << "\n";
std::cout << "remaining: '" << std::string(iter, last) << "'\n";
std::cout << "-------------------------\n";
}
return 0;
}
This has been a fun exercise, where I finally put together a working demo of on_success[1] to annotate AST nodes.
Let's assume we want an AST like:
namespace ast
{
struct LocationInfo {
unsigned line, column, length;
};
struct Identifier : LocationInfo {
std::string name;
};
struct VarAssignment : LocationInfo {
Identifier id;
int value;
};
struct SourceCode : LocationInfo {
Identifier function;
std::vector<VarAssignment> assignments;
};
}
I know, 'location information' is probably overkill for the SourceCode node, but you know... Anyways, to make it easy to assign attributes to these nodes without requiring semantic actions or lots of specifically crafted constructors:
#include <boost/fusion/adapted/struct.hpp>
BOOST_FUSION_ADAPT_STRUCT(ast::Identifier, (std::string, name))
BOOST_FUSION_ADAPT_STRUCT(ast::VarAssignment, (ast::Identifier, id)(int, value))
BOOST_FUSION_ADAPT_STRUCT(ast::SourceCode, (ast::Identifier, function)(std::vector<ast::VarAssignment>, assignments))
There. Now we can declare the rules to expose these attributes:
qi::rule<Iterator, ast::SourceCode(), Skipper> SourceCode;
qi::rule<Iterator, ast::VarAssignment(), Skipper> VarAssignment;
qi::rule<Iterator, ast::Identifier()> Identifier;
// no skipper, no attributes:
qi::rule<Iterator> KeywordFunction, KeywordVar, SemiColon;
We don't (essentially) modify the grammar, at all: attribute propagation is "just automatic"[2] :
KeywordFunction = lit("function");
KeywordVar = lit("var");
SemiColon = lit(';');
Identifier = as_string [ alpha >> *(alnum | char_("_")) ];
VarAssignment = KeywordVar >> Identifier >> '=' >> int_ >> SemiColon;
SourceCode = KeywordFunction >> Identifier >> '{' >> *VarAssignment >> '}';
The magic
How do we get the source location information attached to our nodes?
auto set_location_info = annotate(_val, _1, _3);
on_success(Identifier, set_location_info);
on_success(VarAssignment, set_location_info);
on_success(SourceCode, set_location_info);
Now, annotate is just a lazy version of a calleable that is defined as:
template<typename It>
struct annotation_f {
typedef void result_type;
annotation_f(It first) : first(first) {}
It const first;
template<typename Val, typename First, typename Last>
void operator()(Val& v, First f, Last l) const {
do_annotate(v, f, l, first);
}
private:
void static do_annotate(ast::LocationInfo& li, It f, It l, It first) {
using std::distance;
li.line = get_line(f);
li.column = get_column(first, f);
li.length = distance(f, l);
}
static void do_annotate(...) { }
};
Due to way in which get_column works, the functor is stateful (as it remembers the start iterator)[3]. As you can see do_annotate just accepts anything that derives from LocationInfo.
Now, the proof of the pudding:
std::string const content = "function FuncName_1 {\n var Var_1 = 3;\n var Var_2 = 4; }";
pos_iterator_t first(content.begin()), iter = first, last(content.end());
ParseGrammar<pos_iterator_t> resolver(first); // Our parser
ast::SourceCode program;
bool ok = phrase_parse(iter,
last,
resolver,
qi::space,
program);
std::cout << std::boolalpha;
std::cout << "ok : " << ok << std::endl;
std::cout << "full: " << (iter == last) << std::endl;
if(ok && iter == last)
{
std::cout << "OK: Parsing fully succeeded\n\n";
std::cout << "Function name: " << program.function.name << " (see L" << program.printLoc() << ")\n";
for (auto const& va : program.assignments)
std::cout << "variable " << va.id.name << " assigned value " << va.value << " at L" << va.printLoc() << "\n";
}
else
{
int line = get_line(iter);
int column = get_column(first, iter);
std::cout << "-------------------------\n";
std::cout << "ERROR: Parsing failed or not complete\n";
std::cout << "stopped at: " << line << ":" << column << "\n";
std::cout << "remaining: '" << std::string(iter, last) << "'\n";
std::cout << "-------------------------\n";
}
This prints:
ok : true
full: true
OK: Parsing fully succeeded
Function name: FuncName_1 (see L1:1:56)
variable Var_1 assigned value 3 at L2:3:14
variable Var_2 assigned value 4 at L3:3:15
Full Demo Program
See it Live On Coliru
Also showing:
error handling, e.g.:
Error: expecting "=" in line 3:
var Var_2 - 4; }
^---- here
ok : false
full: false
-------------------------
ERROR: Parsing failed or not complete
stopped at: 1:1
remaining: 'function FuncName_1 {
var Var_1 = 3;
var Var_2 - 4; }'
-------------------------
BOOST_SPIRIT_DEBUG macros
A bit of a hacky way to conveniently stream the LocationInfo part of any AST node, sorry :)
//#define BOOST_SPIRIT_DEBUG
#define BOOST_SPIRIT_USE_PHOENIX_V3
#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/include/support_line_pos_iterator.hpp>
#include <iomanip>
namespace qi = boost::spirit::qi;
namespace phx= boost::phoenix;
typedef boost::spirit::line_pos_iterator<std::string::const_iterator> pos_iterator_t;
namespace ast
{
namespace manip { struct LocationInfoPrinter; }
struct LocationInfo {
unsigned line, column, length;
manip::LocationInfoPrinter printLoc() const;
};
struct Identifier : LocationInfo {
std::string name;
};
struct VarAssignment : LocationInfo {
Identifier id;
int value;
};
struct SourceCode : LocationInfo {
Identifier function;
std::vector<VarAssignment> assignments;
};
///////////////////////////////////////////////////////////////////////////
// Completely unnecessary tweak to get a "poor man's" io manipulator going
// so we can do `std::cout << x.printLoc()` on types of `x` deriving from
// LocationInfo
namespace manip {
struct LocationInfoPrinter {
LocationInfoPrinter(LocationInfo const& ref) : ref(ref) {}
LocationInfo const& ref;
friend std::ostream& operator<<(std::ostream& os, LocationInfoPrinter const& lip) {
return os << lip.ref.line << ':' << lip.ref.column << ':' << lip.ref.length;
}
};
}
manip::LocationInfoPrinter LocationInfo::printLoc() const { return { *this }; }
// feel free to disregard this hack
///////////////////////////////////////////////////////////////////////////
}
BOOST_FUSION_ADAPT_STRUCT(ast::Identifier, (std::string, name))
BOOST_FUSION_ADAPT_STRUCT(ast::VarAssignment, (ast::Identifier, id)(int, value))
BOOST_FUSION_ADAPT_STRUCT(ast::SourceCode, (ast::Identifier, function)(std::vector<ast::VarAssignment>, assignments))
struct error_handler_f {
typedef qi::error_handler_result result_type;
template<typename T1, typename T2, typename T3, typename T4>
qi::error_handler_result operator()(T1 b, T2 e, T3 where, T4 const& what) const {
std::cerr << "Error: expecting " << what << " in line " << get_line(where) << ": \n"
<< std::string(b,e) << "\n"
<< std::setw(std::distance(b, where)) << '^' << "---- here\n";
return qi::fail;
}
};
template<typename It>
struct annotation_f {
typedef void result_type;
annotation_f(It first) : first(first) {}
It const first;
template<typename Val, typename First, typename Last>
void operator()(Val& v, First f, Last l) const {
do_annotate(v, f, l, first);
}
private:
void static do_annotate(ast::LocationInfo& li, It f, It l, It first) {
using std::distance;
li.line = get_line(f);
li.column = get_column(first, f);
li.length = distance(f, l);
}
static void do_annotate(...) {}
};
template<typename Iterator=pos_iterator_t, typename Skipper=qi::space_type>
struct ParseGrammar: public qi::grammar<Iterator, ast::SourceCode(), Skipper>
{
ParseGrammar(Iterator first) :
ParseGrammar::base_type(SourceCode),
annotate(first)
{
using namespace qi;
KeywordFunction = lit("function");
KeywordVar = lit("var");
SemiColon = lit(';');
Identifier = as_string [ alpha >> *(alnum | char_("_")) ];
VarAssignment = KeywordVar > Identifier > '=' > int_ > SemiColon; // note: expectation points
SourceCode = KeywordFunction >> Identifier >> '{' >> *VarAssignment >> '}';
on_error<fail>(VarAssignment, handler(_1, _2, _3, _4));
on_error<fail>(SourceCode, handler(_1, _2, _3, _4));
auto set_location_info = annotate(_val, _1, _3);
on_success(Identifier, set_location_info);
on_success(VarAssignment, set_location_info);
on_success(SourceCode, set_location_info);
BOOST_SPIRIT_DEBUG_NODES((KeywordFunction)(KeywordVar)(SemiColon)(Identifier)(VarAssignment)(SourceCode))
}
phx::function<error_handler_f> handler;
phx::function<annotation_f<Iterator>> annotate;
qi::rule<Iterator, ast::SourceCode(), Skipper> SourceCode;
qi::rule<Iterator, ast::VarAssignment(), Skipper> VarAssignment;
qi::rule<Iterator, ast::Identifier()> Identifier;
// no skipper, no attributes:
qi::rule<Iterator> KeywordFunction, KeywordVar, SemiColon;
};
int main()
{
std::string const content = "function FuncName_1 {\n var Var_1 = 3;\n var Var_2 - 4; }";
pos_iterator_t first(content.begin()), iter = first, last(content.end());
ParseGrammar<pos_iterator_t> resolver(first); // Our parser
ast::SourceCode program;
bool ok = phrase_parse(iter,
last,
resolver,
qi::space,
program);
std::cout << std::boolalpha;
std::cout << "ok : " << ok << std::endl;
std::cout << "full: " << (iter == last) << std::endl;
if(ok && iter == last)
{
std::cout << "OK: Parsing fully succeeded\n\n";
std::cout << "Function name: " << program.function.name << " (see L" << program.printLoc() << ")\n";
for (auto const& va : program.assignments)
std::cout << "variable " << va.id.name << " assigned value " << va.value << " at L" << va.printLoc() << "\n";
}
else
{
int line = get_line(iter);
int column = get_column(first, iter);
std::cout << "-------------------------\n";
std::cout << "ERROR: Parsing failed or not complete\n";
std::cout << "stopped at: " << line << ":" << column << "\n";
std::cout << "remaining: '" << std::string(iter, last) << "'\n";
std::cout << "-------------------------\n";
}
return 0;
}
[1] sadly un(der)documented, except for the conjure sample(s)
[2] well, I used as_string to get proper assignment to Identifier without too much work
[3] There could be smarter ways about this in terms of performance, but for now, let's keep it simple
Trying to tweak the boost spirit x3 calc example to parse functions that can take functions as arguments. However it does not compile.
namespace client{ namespace ast{
struct ts;
struct fnc;
typedef boost::variant<
ts,
boost::recursive_wrapper<fnc>
> node;
struct ts{
unsigned int id;
};
struct fnc{
std::vector<char> id;
std::vector<node> args;
};
}}
BOOST_FUSION_ADAPT_STRUCT(
client::ast::ts,
(unsigned int, id)
)
BOOST_FUSION_ADAPT_STRUCT(
client::ast::fnc,
(std::vector<char>, id)
(std::vector<client::ast::node>, args)
)
namespace client{
namespace x3 = boost::spirit::x3;
namespace calc_grammar{
using x3::uint_;
using x3::alpha;
using x3::alnum;
using x3::lit;
using x3::char_;
x3::rule<class funct, ast::fnc> const funct("function");
x3::rule<class ts, ast::ts> const ts("timeseries");
x3::rule<class funct_name, std::vector<char>> const funct_name("function_name");
auto const funct_def = funct_name >> lit('(') >> -((ts|funct)%lit(',')) >> lit(')');
auto const ts_def = lit('#') >> uint_ >> lit('#');
auto const funct_name_def = lit('#') >> alpha >> *(alnum|char_('_'));
auto const calc = x3::grammar(
"calc",
funct = funct_def,
ts = ts_def,
funct_name = funct_name_def
);
}
using calc_grammar::calc;
}
error C2665: 'boost::detail::variant::make_initializer_node::apply::initializer_node::initialize': none of the 5 overloads could convert all the argument types
There's also a note to user in variant.hpp
// NOTE TO USER :
// Compile error here indicates that the given type is not
// unambiguously convertible to one of the variant's types
// (or that no conversion exists).
Yet I am none the wiser...
I spotted this old question. X3 has evolved a bit in the mean time and I though I'd answer it now anyways.
I suspected that the main issue might have been with (missing) (implicit) constructors on the variant members.
Anyhow, here's a live demo with a more lightweight grammar:
namespace grammar_def {
using namespace x3;
rule<class funct, ast::fnc> const funct("function");
auto const ts = lexeme [ '#' >> uint_ >> '#' ];
auto const fname = lexeme [ '#' >> raw [ alpha >> *(alnum | '_') ] ];
auto const expr = ts|funct;
auto const funct_def = fname >> '(' >> -expr % ',' >> ')';
BOOST_SPIRIT_DEFINE(funct)
}
I also added some output streaming helpers. Note how I changed the id type to std::string for simplicity (it's hard/impossible to overload operator<< for vector<char> without invading namespace std):
namespace client { namespace ast {
static std::ostream& operator<<(std::ostream& os, ts const& v) {
using namespace boost::fusion;
return os << tuple_open("") << tuple_close("") << tuple_delimiter("") << as_vector(v);
}
static std::ostream& operator<<(std::ostream& os, fnc const& v) {
using namespace boost::fusion;
return os << tuple_open("") << tuple_close("") << tuple_delimiter("") << as_vector(v);
}
template<typename T>
static std::ostream& operator<<(std::ostream& os, std::vector<T> const& v) {
os << "("; for (auto& el : v) os << (&el==&v[0]?"":", ") << el; return os << ")";
}
} }
Demo
This has more (optional) plumbing to allow for richer debug information:
Live On Coliru
//#define BOOST_SPIRIT_X3_DEBUG
#include <iostream>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/as_vector.hpp>
#include <boost/fusion/include/io.hpp>
#include <boost/spirit/home/x3.hpp>
namespace client { namespace ast {
struct ts;
struct fnc;
//using string = std::vector<char>;
using string = std::string; // for easier printing/debugging
struct ts {
unsigned int id;
ts(unsigned id=0):id(id) {}
};
typedef boost::variant<ts, boost::recursive_wrapper<fnc> > node;
struct fnc {
string id;
std::vector<node> args;
};
} }
BOOST_FUSION_ADAPT_STRUCT(client::ast::ts, id)
BOOST_FUSION_ADAPT_STRUCT(client::ast::fnc, id, args)
//namespace std { static ostream& operator<<(ostream&os, vector<char> const& v) { return os.write(&v[0], v.size()); } }
namespace client { namespace ast {
static std::ostream& operator<<(std::ostream& os, ts const& v) {
using namespace boost::fusion;
return os << tuple_open("") << tuple_close("") << tuple_delimiter("") << as_vector(v);
}
static std::ostream& operator<<(std::ostream& os, fnc const& v) {
using namespace boost::fusion;
return os << tuple_open("") << tuple_close("") << tuple_delimiter("") << as_vector(v);
}
template<typename T>
static std::ostream& operator<<(std::ostream& os, std::vector<T> const& v) {
os << "("; for (auto& el : v) os << (&el==&v[0]?"":", ") << el; return os << ")";
}
} }
namespace client {
namespace x3 = boost::spirit::x3;
namespace grammar_def {
using namespace x3;
x3::rule<class funct, ast::fnc> const funct("function");
auto const ts // = x3::rule<class ts, ast::ts> {"timeseries"}
= lexeme [ '#' >> uint_ >> '#' ];
auto const fname // = x3::rule<class fname, ast::string> {"function_name"}
= lexeme [ '#' >> raw [ alpha >> *(alnum | '_') ] ];
auto const expr // = rule<struct expr_, ast::node > {"expr"}
= ts|funct;
auto const funct_def = fname >> '(' >> -expr % ',' >> ')';
BOOST_SPIRIT_DEFINE(funct)
}
auto const& grammar = x3::skip(x3::space) [grammar_def::funct];
}
#include <iostream>
int main() {
std::string const s {
"#pow( #1#, \n"
" #trunc(\n"
" #pi ()\n"
" ) )"};
std::cout << "Parsing '" << s << "'\n";
auto f = s.begin();
client::ast::fnc parsed;
if (parse(f, s.end(), client::grammar, parsed)) {
std::cout << "Parse succeeded: " << parsed << "\n";
} else {
std::cout << "Parse failed\n";
}
if (f != s.end())
std::cout << "Remaining unparsed input: '" << std::string(f, s.end()) << "'\n";
}
Prints:
Parsing '#pow( #1#,
#trunc(
#pi ()
) )'
Parse succeeded: pow(1, trunc(pi()))
Looking for some C++ library (like boost::program_options) that is able to return line number of an INI file, where the given option or section was found.
Use cases:
I ask that library to find value "vvv" in a section "[SSS]". Library returns line number where "vvv" in section "[SSS]" is found, or -1. It gives me an ability to say "line 55: vvv must be < 256".
I iterate INI file for sections and validate their names. When some wild secsion is found, i tell: "line 55: section [Hahaha] is unknown".
update: i know about "INI is older than mammoth", but currently i have to port large windows project to cross-platform and cannot get rid of .ini files soon.
Once again, took the opportunity to play with Boost Spirit. This time I got to play with line_pos_iterator.
Here is the fruit of my labour: https://gist.github.com/1425972
When POSITIONINFO == 0
input is streaming
output is raw strings (well, map<string, map<string, string> > for the sections)
When POSITIONINFO == 1
input is buffered
output is textnode_t:
struct textnode_t {
int sline, eline, scol, ecol;
string_t text;
};
This means that the resulting map<textnode_t, map<textnode_t, textnode_t> > is able to report exactly what (line,col) start and end points mark the individual text nodes. See test output for a demo
Comments (#, /* ... */ style) have been implemented
Whitespace is 'tolerated'
name = value # use a comment to force inclusion of trailing whitespace
alternative = escape\ with slash\
De-escaping of the slashes is left as an exercise
Errors are also reported with full position info if enabled
NOTE C++11 support is NOT required, but I used it to dump the result of the parse. I'm too lazy to write it with C++03 verbose iterator style. :)
All code, makefile, example.ini can be found here: https://gist.github.com/1425972
Code
/* inireader.h */
#pragma once
#define POSITIONINFO 0
#include <map>
#include <string>
#include <iterator>
#include <boost/tuple/tuple_comparison.hpp>
template <typename S=std::string, typename Cmp=std::less<S> >
class IniFile
{
public:
IniFile(Cmp cmp=Cmp()) : _cmp(cmp)
{}
IniFile(const std::string& filename, Cmp cmp=Cmp()) : _cmp(cmp)
{ open(filename); }
void open(const std::string& filename);
typedef S string_t;
#if POSITIONINFO
struct textnode_t
{
int sline, eline,
scol, ecol;
string_t text;
operator const string_t&() const { return text; }
friend std::ostream& operator<<(std::ostream& os, const textnode_t& t)
{
os << "[L:" << t.sline << ",C" << t.scol << " .. L" << t.eline << ",C" << t.ecol << ":";
for (typename string_t::const_iterator it=t.text.begin(); it!=t.text.end(); ++it)
switch (*it)
{
case '\r' : os << "\\r"; break;
case '\n' : os << "\\n"; break;
case '\t' : os << "\\t"; break;
case '\0' : os << "\\0"; break;
default: os << *it ; break;
}
return os << "]";
}
bool operator<(const textnode_t& o) const
{ return boost::tie(text/*, sline, eline, scol, ecol*/) <
boost::tie(o.text/*, o.sline, o.eline, o.scol, o.ecol*/); }
textnode_t() : sline(0), eline(0), scol(0), ecol(0) { }
};
#else
typedef string_t textnode_t;
#endif
typedef std::pair<textnode_t, textnode_t> keyvalue_t;
typedef std::map<textnode_t, textnode_t> section_t;
typedef std::map<textnode_t, section_t> sections_t;
private:
Cmp _cmp;
};
///////////////////////////////////////
// template implementation
//#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/support_istream_iterator.hpp>
#include <boost/spirit/include/support_line_pos_iterator.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/adapted/std_pair.hpp>
#include <fstream>
namespace qi = boost::spirit::qi;
namespace phx= boost::phoenix;
namespace inireader
{
struct printer
{
printer(std::ostream& os) : _os(os) {}
std::ostream& _os;
typedef boost::spirit::utf8_string string;
void element(string const& tag, string const& value, int depth) const
{
for (int i = 0; i < (depth*4); ++i) // indent to depth
_os << ' ';
_os << "tag: " << tag;
if (value != "")
_os << ", value: " << value;
_os << std::endl;
}
};
void print_info(std::ostream& os, boost::spirit::info const& what)
{
using boost::spirit::basic_info_walker;
printer pr(os);
basic_info_walker<printer> walker(pr, what.tag, 0);
boost::apply_visitor(walker, what.value);
}
template <typename It, typename Skipper, typename Ini>
struct Grammar : qi::grammar<It, typename Ini::sections_t(), Skipper>
{
typedef typename Ini::string_t string_t;
typedef typename Ini::textnode_t textnode_t;
struct textbuilder
{
template <typename> struct result { typedef textnode_t type; };
textbuilder(It begin) : _begin(begin) { }
textnode_t operator()(const boost::iterator_range<It>& iters) const
{
#if !POSITIONINFO
return textnode_t(std::begin(iters), std::end(iters));
#else
using boost::spirit::get_line;
using boost::spirit::get_line_start;
using boost::spirit::get_column;
textnode_t element;
element.text = string_t (std::begin(iters) , std::end(iters));
element.sline = get_line (std::begin(iters));
element.eline = get_line (std::end(iters));
It sol = get_line_start (_begin , std::begin(iters));
element.scol = get_column (sol , std::begin(iters));
element.ecol = get_column (sol , std::end(iters));
return element;
#endif
}
private:
const It _begin;
} makenode;
Grammar(It begin) : Grammar::base_type(inifile), makenode(begin)
{
using namespace qi;
txt_ch = (lit('\\') > char_) | (char_ - (eol | '#' | "/*"));
key = raw [ lexeme [ +(txt_ch - char_("=")) ] ] [ _val = phx::bind(makenode, _1) ];
value = raw [ lexeme [ +txt_ch ] ] [ _val = phx::bind(makenode, _1) ];
pair %= key > '=' > value;
heading = ('[' > raw [ +~char_(']') ] > ']') [ _val = phx::bind(makenode, _1) ];
section %= heading >> +eol >> -((pair-heading) % +eol);
inifile %= -(section % +eol) >> *eol > eoi;
comment =
('#' >> *(char_ - eol))
| ("/*" > *(char_ - "*/") > "*/");
//BOOST_SPIRIT_DEBUG_NODE(comment);
//BOOST_SPIRIT_DEBUG_NODE(txt_ch);
BOOST_SPIRIT_DEBUG_NODE(heading);
BOOST_SPIRIT_DEBUG_NODE(section);
BOOST_SPIRIT_DEBUG_NODE(key);
BOOST_SPIRIT_DEBUG_NODE(value);
BOOST_SPIRIT_DEBUG_NODE(pair);
BOOST_SPIRIT_DEBUG_NODE(inifile);
}
typedef typename Ini::keyvalue_t keyvalue_t;
typedef typename Ini::section_t section_t;
typedef typename Ini::sections_t sections_t;
typedef typename string_t::value_type Char;
qi::rule<It> comment;
qi::rule<It, Char()> txt_ch;
qi::rule<It, textnode_t(), Skipper> key, value, heading;
qi::rule<It, keyvalue_t(), Skipper> pair;
qi::rule<It, std::pair<textnode_t, section_t>(), Skipper> section;
qi::rule<It, sections_t(), Skipper> inifile;
};
template <typename It, typename Builder>
typename Builder::template result<void>::type
fragment(const It& first, const It& last, const Builder& builder)
{
size_t len = std::distance(first, last);
It frag_end = first;
std::advance(frag_end, std::min(10ul, len));
return builder(boost::iterator_range<It>(first, frag_end));
}
}
template <typename S, typename Cmp>
void IniFile<S, Cmp>::open(const std::string& filename)
{
using namespace qi;
std::ifstream ifs(filename.c_str());
ifs.unsetf(std::ios::skipws);
#if POSITIONINFO
typedef std::string::const_iterator RawIt;
typedef boost::spirit::line_pos_iterator<RawIt> It;
typedef rule<It> Skipper;
std::string buffer(std::istreambuf_iterator<char>(ifs), (std::istreambuf_iterator<char>()));
It f(buffer.begin()), l(buffer.end());
#else
typedef boost::spirit::istream_iterator It;
typedef rule<It> Skipper;
It f(ifs), l;
#endif
inireader::Grammar<It, Skipper, IniFile<S, Cmp> > grammar(f);
Skipper skip = char_(" \t") | grammar.comment;
try
{
sections_t data;
bool ok = phrase_parse(f, l, grammar, skip, data);
if (ok)
{
std::cout << "Parse success!" << std::endl;
///////// C++11 specific features for quick display //////////
for (auto& section : data)
{
std::cout << "[" << section.first << "]" << std::endl;
for (auto& pair : section.second)
std::cout << pair.first << " = " << pair.second << std::endl;
///////// End C++11 specific /////////////////////////////////
}
} else
{
std::cerr << "Parse failed" << std::endl;
}
} catch (const qi::expectation_failure<It>& e)
{
std::cerr << "Exception: " << e.what() <<
" " << inireader::fragment(e.first, e.last, grammar.makenode) << "... ";
inireader::print_info(std::cerr, e.what_);
}
if (f!=l)
{
std::cerr << "Stopped at: '" << inireader::fragment(f, l, grammar.makenode) << "'" << std::endl;
}
}
Demo Input
[Cat1]
name1=100 #skipped
name2=200 \#not \\skipped
name3= dhfj dhjgfd/* skipped
*/
[Cat_2]
UsagePage=9
Usage=19
Offset=0x1204
/*
[Cat_2_bak]
UsagePage=9
Usage=19
Offset=0x1204
*/
[Cat_3]
UsagePage=12
Usage=39
#Usage4=39
Offset=0x12304
Demo Output (POSITIONINFO == 0)
Parse success!
[Cat1]
name1 = 100
name2 = 200 \#not \\skipped
name3 = dhfj dhjgfd
[Cat_2]
Offset = 0x1204
Usage = 19
UsagePage = 9
[Cat_3]
Offset = 0x12304
Usage = 39
UsagePage = 12
Demo Output (POSITIONINFO == 1)
Parse success!
[[L:1,C2 .. L1,C6:Cat1]]
[L:2,C2 .. L2,C7:name1] = [L:2,C8 .. L2,C12:100 ]
[L:6,C2 .. L6,C7:name2] = [L:6,C8 .. L6,C27:200 \#not \\skipped]
[L:7,C2 .. L7,C7:name3] = [L:7,C11 .. L7,C22:dhfj dhjgfd]
[[L:13,C3 .. L13,C8:Cat_2]]
[L:16,C2 .. L16,C8:Offset] = [L:16,C9 .. L16,C15:0x1204]
[L:15,C2 .. L15,C7:Usage] = [L:15,C8 .. L15,C10:19]
[L:14,C2 .. L14,C11:UsagePage] = [L:14,C12 .. L14,C13:9]
[[L:25,C3 .. L25,C8:Cat_3]]
[L:29,C2 .. L29,C8:Offset] = [L:29,C9 .. L29,C16:0x12304]
[L:27,C2 .. L27,C7:Usage] = [L:27,C8 .. L27,C10:39]
[L:26,C2 .. L26,C11:UsagePage] = [L:26,C12 .. L26,C14:12]