Simple expression parser example using Boost::Spirit? - c++

Is anyone aware of an online resource where I can find out how to write a simple expression parser using Boost::Spirit?.
I do not necessarily need to evaluate the expression, but I need to parse it and be able to return a boolean to indicate whether the expression is parsable or not (e.g. brackets not matching etc).
I need the parser to be able recognise function names (e.g. foo and foobar), so this would also be a useful example to help me learn writing BNF notation.
The expressions will be normal arithmetic equations, i.e. comprising of the following symbols:
opening/closing brackets
arithmetic operators
recognized function names, and check for their required arguments

Here's some old Spirit prototype code I had laying around:
#include <iostream>
#include <algorithm>
#include <vector>
#include <string>
#include <exception>
#include <iterator>
#include <sstream>
#include <list>
#include <boost/spirit.hpp>
#include <boost/shared_ptr.hpp>
using namespace std;
using namespace boost::spirit;
using namespace boost;
void g(unsigned int i)
{
cout << "row: " << i << endl;
}
struct u
{
u(const char* c): s(c) {}
void operator()(const char* first, const char* last) const
{
cout << s << ": " << string(first, last) << endl;
}
private:
string s;
};
struct Exp
{
};
struct Range: public Exp
{
};
struct Index: public Exp
{
};
struct String: public Exp
{
};
struct Op
{
virtual ~Op() = 0;
virtual string name() = 0;
};
Op::~Op() {}
struct CountIf: public Op
{
string name() { return "CountIf"; }
};
struct Sum: public Op
{
string name() { return "Sum"; }
};
struct Statement
{
virtual ~Statement() = 0;
virtual void print() = 0;
};
Statement::~Statement() {}
struct Formula: public Statement
{
Formula(const char* first, const char* last): s(first, last), op(new CountIf)
{
typedef rule<phrase_scanner_t> r_t;
r_t r_index = (+alpha_p)[u("col")] >> uint_p[&g];
r_t r_range = r_index >> ':' >> r_index;
r_t r_string = ch_p('\"') >> *alnum_p >> '\"';
r_t r_exp = r_range | r_index | r_string; // will invoke actions for index twice due to range
r_t r_list = !(r_exp[u("arg")] % ',');
r_t r_op = as_lower_d["countif"] | as_lower_d["sum"];
r_t r_formula = r_op >> '(' >> r_list >> ')';
cout << s << ": matched: " << boolalpha << parse(s.c_str(), r_formula, space_p).full << endl;
}
void print() { cout << "Formula: " << s << " / " << op->name() << endl; }
private:
string s;
shared_ptr<Op> op;
list<shared_ptr<Exp> > exp_list;
};
struct Comment: public Statement
{
Comment(const char* first, const char* last): comment(first, last) {}
void print() {cout << "Comment: " << comment << endl; }
private:
string comment;
};
struct MakeFormula
{
MakeFormula(list<shared_ptr<Statement> >& list_): list(list_) {}
void operator()(const char* first, const char* last) const
{
cout << "MakeFormula: " << string(first, last) << endl;
list.push_back(shared_ptr<Statement>(new Formula(first, last)));
}
private:
list<shared_ptr<Statement> >& list;
};
struct MakeComment
{
MakeComment(list<shared_ptr<Statement> >& list_): list(list_) {}
void operator()(const char* first, const char* last) const
{
cout << "MakeComment: " << string(first, last) << endl;
list.push_back(shared_ptr<Statement>(new Comment(first, last)));
}
private:
list<shared_ptr<Statement> >& list;
};
int main(int argc, char* argv[])
try
{
//typedef vector<string> v_t;
//v_t v(argv + 1, argv + argc);
// copy(v.begin(), v.end(), ostream_iterator<v_t::value_type>(cout, "\n"));
string s;
getline(cin, s);
// =COUNTIF(J2:J36, "Abc")
typedef list<shared_ptr<Statement> > list_t;
list_t list;
typedef rule<phrase_scanner_t> r_t;
r_t r_index = (+alpha_p)[u("col")] >> uint_p[&g];
r_t r_range = r_index >> ':' >> r_index;
r_t r_string = ch_p('\"') >> *alnum_p >> '\"';
r_t r_exp = r_range | r_index | r_string; // will invoke actions for index twice due to range
r_t r_list = !(r_exp[u("arg")] % ',');
r_t r_op = as_lower_d["countif"] | as_lower_d["sum"];
r_t r_formula = r_op >> '(' >> r_list >> ')';
r_t r_statement = (ch_p('=') >> r_formula [MakeFormula(list)])
| (ch_p('\'') >> (*anychar_p)[MakeComment(list)])
;
cout << s << ": matched: " << boolalpha << parse(s.c_str(), r_statement, space_p).full << endl;
for (list_t::const_iterator it = list.begin(); it != list.end(); ++it)
{
(*it)->print();
}
}
catch(const exception& ex)
{
cerr << "Error: " << ex.what() << endl;
}
Try running it and entering a line like:
=COUNTIF(J2:J36, "Abc")

The current version of Spirit (V2.x) contains a whole series of calculator examples from the very simple to a full fledged mini-c interpreter. You should have a look there as those are a perfect starting point for writing your own expression parser.

I'm not sure if this qualifies as simple either, but I've used this uri-grammar available at http://code.google.com/p/uri-grammar/source/browse/trunk/src/uri/grammar.hpp. It may not be trivial, but at least its parsing something that you probably understand already (URIs). When reading these grammars, its best to read from the bottom up, since that's where the most generic tokens tend to be defined.

Related

Spirit X3 composed attributes

I am trying to compose spirit rules but I cannot figure out what the attribute of this new rule would be.
The following code is working as I would expect it.
#include <iostream>
#include <boost/spirit/home/x3.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>
#include <boost/fusion/tuple.hpp>
namespace ast{
struct Record{
int id;
std::string name;
};
struct Document{
Record rec;
Record rec2;
//std::vector<Record> rec;
std::string name;
};
using boost::fusion::operator<<;
}
BOOST_FUSION_ADAPT_STRUCT(ast::Record,
name, id
)
BOOST_FUSION_ADAPT_STRUCT(ast::Document,
rec, rec2,
//rec,
name
)
namespace parser{
namespace x3 = boost::spirit::x3;
namespace ascii = boost::spirit::x3::ascii;
using x3::lit;
using x3::int_;
using ascii::char_;
const auto identifier = +char_("a-z");
const x3::rule<class record, ast::Record> record = "record";
const auto record_def = lit("record") >> identifier >> lit("{") >> int_ >> lit("}");
const x3::rule<class document, ast::Document> document = "document";
const auto document_def =
record >> record
//+record // This should generate a sequence
>> identifier
;
BOOST_SPIRIT_DEFINE(document, record);
}
namespace{
constexpr char g_input[] = R"input(
record foo{42}
record bar{73}
foobar
)input";
}
int main(){
using boost::spirit::x3::ascii::space;
std::string str = g_input;
ast::Document unit;
bool r = phrase_parse(str.begin(), str.end(), parser::document, space, unit);
std::cout << "Got: " << unit << "\n";
return 0;
}
But when I change the rule to parse multiple records(instead of exactly 2) I would expect it to have a std::vector<Record> as an attribute. But all I get is a long compiler error that does not help me very much.
Can someone point me to what I am doing wrong in order to compose the attributes correctly?
I think the whole reason it didn't compile is because you tried to print the result... and std::vector<Record> doesn't know how to be streamed:
namespace ast {
using boost::fusion::operator<<;
static inline std::ostream& operator<<(std::ostream& os, std::vector<Record> const& rs) {
os << "{ ";
for (auto& r : rs) os << r << " ";
return os << "}";
}
}
Some more notes:
adding lexemes where absolutely required (!)
simplifying (no need to BOOST_SPIRIT_DEFINE unless recursive rules/separate TUs)
dropping redundant lit
I arrived at
Live On Coliru
#include <iostream>
#include <boost/spirit/home/x3.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>
namespace ast {
struct Record{
int id;
std::string name;
};
struct Document{
std::vector<Record> rec;
std::string name;
};
}
BOOST_FUSION_ADAPT_STRUCT(ast::Record, name, id)
BOOST_FUSION_ADAPT_STRUCT(ast::Document, rec, name)
namespace ast {
using boost::fusion::operator<<;
static inline std::ostream& operator<<(std::ostream& os, std::vector<Record> const& rs) {
os << "{ ";
for (auto& r : rs) os << r << " ";
return os << "}";
}
}
namespace parser {
namespace x3 = boost::spirit::x3;
namespace ascii = x3::ascii;
const auto identifier = x3::lexeme[+x3::char_("a-z")];
const auto record = x3::rule<class record, ast::Record> {"record"}
= x3::lexeme["record"] >> identifier >> "{" >> x3::int_ >> "}";
const auto document = x3::rule<class document, ast::Document> {"document"}
= +record
>> identifier
;
}
int main(){
std::string const str = "record foo{42} record bar{73} foobar";
auto f = str.begin(), l = str.end();
ast::Document unit;
if (phrase_parse(f, l, parser::document, parser::ascii::space, unit)) {
std::cout << "Got: " << unit << "\n";
} else {
std::cout << "Parse failed\n";
}
if (f != l) {
std::cout << "Remaining unparsed input: '" << std::string(f,l) << "'\n";
}
}
Prints
Got: ({ (foo 42) (bar 73) } foobar)

Putting a value to ostream

I have this code below which parses a for statement, but I am not sure how to put any value into the ostream when calling the method write(...). What can I do? (e.g write("for (........."))
#include <ostream>
#include <iostream>
using namespace std;
//I cut out the declaration bit here
typedef const string type;
private:
type *initializer;
type *condition;
type *increment;
type *body;
public:
void write(ostream& stream) const {
stream
<< "for ("
<< *initializer << "; "
<< *condition << "; "
<< *increment << ")\n{\n"
<< *body
<< "}";
}
I guess you try to learn using ostream as an input in a function. But it seems that you mixing things that how to use classs and methods.
Maybe this is no avail but i can give you a little snippet to give you some opinion.
#include <iostream>
#include <string>
using namespace std;
typedef const string type;
type *init;
type *cond;
type *incr;
type *body;
void write(ostream& stream) {
stream
<< "for ("
<< *init << "; "
<< *cond << "; "
<< *incr << ")\n{\n"
<< *body
<< "\n}";
}
int main(int argc, char* argv[])
{
const string ini = "int i = 0";
const string con = "i < 10";
const string inc = "i++";
const string bod = "cout << i << endl;";
init = &ini;
cond = &con;
incr = &inc;
body = &bod;
write(cout);
return 0;
}
Try this code, examine and read more for more details.

boost::spirit access position iterator from semantic actions

Lets say I have code like this (line numbers for reference):
1:
2:function FuncName_1 {
3: var Var_1 = 3;
4: var Var_2 = 4;
5: ...
I want to write a grammar that parses such text, puts all indentifiers (function and variable names) infos into a tree (utree?).
Each node should preserve: line_num, column_num and symbol value. example:
root: FuncName_1 (line:2,col:10)
children[0]: Var_1 (line:3, col:8)
children[1]: Var_1 (line:4, col:9)
I want to put it into the tree because I plan to traverse through that tree and for each node I must know the 'context': (all parent nodes of current nodes).
E.g, while processing node with Var_1, I must know that this is a name for local variable for function FuncName_1 (that is currently being processed as node, but one level earlier)
I cannot figure out few things
Can this be done in Spirit with semantic actions and utree's ? Or should I use variant<> trees ?
How to pass to the node those three informations (column,line,symbol_name) at the same time ? I know I must use pos_iterator as iterator type for grammar but how to access those information in sematic action ?
I'm a newbie in Boost so I read the Spirit documentaiton over and over, I try to google my problems but I somehow cannot put all the pieces together ot find the solution. Seems like there was no one me with such use case like mine before (or I'm just not able to find it)
Looks like the only solutions with position iterator are the ones with parsing error handling, but this is not the case I'm interested in.
The code that only parses the code I was taking about is below but I dont know how to move forward with it.
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/support_line_pos_iterator.hpp>
namespace qi = boost::spirit::qi;
typedef boost::spirit::line_pos_iterator<std::string::const_iterator> pos_iterator_t;
template<typename Iterator=pos_iterator_t, typename Skipper=qi::space_type>
struct ParseGrammar: public qi::grammar<Iterator, Skipper>
{
ParseGrammar():ParseGrammar::base_type(SourceCode)
{
using namespace qi;
KeywordFunction = lit("function");
KeywordVar = lit("var");
SemiColon = lit(';');
Identifier = lexeme [alpha >> *(alnum | '_')];
VarAssignemnt = KeywordVar >> Identifier >> char_('=') >> int_ >> SemiColon;
SourceCode = KeywordFunction >> Identifier >> '{' >> *VarAssignemnt >> '}';
}
qi::rule<Iterator, Skipper> SourceCode;
qi::rule<Iterator > KeywordFunction;
qi::rule<Iterator, Skipper> VarAssignemnt;
qi::rule<Iterator> KeywordVar;
qi::rule<Iterator> SemiColon;
qi::rule<Iterator > Identifier;
};
int main()
{
std::string const content = "function FuncName_1 {\n var Var_1 = 3;\n var Var_2 = 4; }";
pos_iterator_t first(content.begin()), iter = first, last(content.end());
ParseGrammar<pos_iterator_t> resolver; // Our parser
bool ok = phrase_parse(iter,
last,
resolver,
qi::space);
std::cout << std::boolalpha;
std::cout << "\nok : " << ok << std::endl;
std::cout << "full : " << (iter == last) << std::endl;
if(ok && iter == last)
{
std::cout << "OK: Parsing fully succeeded\n\n";
}
else
{
int line = get_line(iter);
int column = get_column(first, iter);
std::cout << "-------------------------\n";
std::cout << "ERROR: Parsing failed or not complete\n";
std::cout << "stopped at: " << line << ":" << column << "\n";
std::cout << "remaining: '" << std::string(iter, last) << "'\n";
std::cout << "-------------------------\n";
}
return 0;
}
This has been a fun exercise, where I finally put together a working demo of on_success[1] to annotate AST nodes.
Let's assume we want an AST like:
namespace ast
{
struct LocationInfo {
unsigned line, column, length;
};
struct Identifier : LocationInfo {
std::string name;
};
struct VarAssignment : LocationInfo {
Identifier id;
int value;
};
struct SourceCode : LocationInfo {
Identifier function;
std::vector<VarAssignment> assignments;
};
}
I know, 'location information' is probably overkill for the SourceCode node, but you know... Anyways, to make it easy to assign attributes to these nodes without requiring semantic actions or lots of specifically crafted constructors:
#include <boost/fusion/adapted/struct.hpp>
BOOST_FUSION_ADAPT_STRUCT(ast::Identifier, (std::string, name))
BOOST_FUSION_ADAPT_STRUCT(ast::VarAssignment, (ast::Identifier, id)(int, value))
BOOST_FUSION_ADAPT_STRUCT(ast::SourceCode, (ast::Identifier, function)(std::vector<ast::VarAssignment>, assignments))
There. Now we can declare the rules to expose these attributes:
qi::rule<Iterator, ast::SourceCode(), Skipper> SourceCode;
qi::rule<Iterator, ast::VarAssignment(), Skipper> VarAssignment;
qi::rule<Iterator, ast::Identifier()> Identifier;
// no skipper, no attributes:
qi::rule<Iterator> KeywordFunction, KeywordVar, SemiColon;
We don't (essentially) modify the grammar, at all: attribute propagation is "just automatic"[2] :
KeywordFunction = lit("function");
KeywordVar = lit("var");
SemiColon = lit(';');
Identifier = as_string [ alpha >> *(alnum | char_("_")) ];
VarAssignment = KeywordVar >> Identifier >> '=' >> int_ >> SemiColon;
SourceCode = KeywordFunction >> Identifier >> '{' >> *VarAssignment >> '}';
The magic
How do we get the source location information attached to our nodes?
auto set_location_info = annotate(_val, _1, _3);
on_success(Identifier, set_location_info);
on_success(VarAssignment, set_location_info);
on_success(SourceCode, set_location_info);
Now, annotate is just a lazy version of a calleable that is defined as:
template<typename It>
struct annotation_f {
typedef void result_type;
annotation_f(It first) : first(first) {}
It const first;
template<typename Val, typename First, typename Last>
void operator()(Val& v, First f, Last l) const {
do_annotate(v, f, l, first);
}
private:
void static do_annotate(ast::LocationInfo& li, It f, It l, It first) {
using std::distance;
li.line = get_line(f);
li.column = get_column(first, f);
li.length = distance(f, l);
}
static void do_annotate(...) { }
};
Due to way in which get_column works, the functor is stateful (as it remembers the start iterator)[3]. As you can see do_annotate just accepts anything that derives from LocationInfo.
Now, the proof of the pudding:
std::string const content = "function FuncName_1 {\n var Var_1 = 3;\n var Var_2 = 4; }";
pos_iterator_t first(content.begin()), iter = first, last(content.end());
ParseGrammar<pos_iterator_t> resolver(first); // Our parser
ast::SourceCode program;
bool ok = phrase_parse(iter,
last,
resolver,
qi::space,
program);
std::cout << std::boolalpha;
std::cout << "ok : " << ok << std::endl;
std::cout << "full: " << (iter == last) << std::endl;
if(ok && iter == last)
{
std::cout << "OK: Parsing fully succeeded\n\n";
std::cout << "Function name: " << program.function.name << " (see L" << program.printLoc() << ")\n";
for (auto const& va : program.assignments)
std::cout << "variable " << va.id.name << " assigned value " << va.value << " at L" << va.printLoc() << "\n";
}
else
{
int line = get_line(iter);
int column = get_column(first, iter);
std::cout << "-------------------------\n";
std::cout << "ERROR: Parsing failed or not complete\n";
std::cout << "stopped at: " << line << ":" << column << "\n";
std::cout << "remaining: '" << std::string(iter, last) << "'\n";
std::cout << "-------------------------\n";
}
This prints:
ok : true
full: true
OK: Parsing fully succeeded
Function name: FuncName_1 (see L1:1:56)
variable Var_1 assigned value 3 at L2:3:14
variable Var_2 assigned value 4 at L3:3:15
Full Demo Program
See it Live On Coliru
Also showing:
error handling, e.g.:
Error: expecting "=" in line 3:
var Var_2 - 4; }
^---- here
ok : false
full: false
-------------------------
ERROR: Parsing failed or not complete
stopped at: 1:1
remaining: 'function FuncName_1 {
var Var_1 = 3;
var Var_2 - 4; }'
-------------------------
BOOST_SPIRIT_DEBUG macros
A bit of a hacky way to conveniently stream the LocationInfo part of any AST node, sorry :)
//#define BOOST_SPIRIT_DEBUG
#define BOOST_SPIRIT_USE_PHOENIX_V3
#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/include/support_line_pos_iterator.hpp>
#include <iomanip>
namespace qi = boost::spirit::qi;
namespace phx= boost::phoenix;
typedef boost::spirit::line_pos_iterator<std::string::const_iterator> pos_iterator_t;
namespace ast
{
namespace manip { struct LocationInfoPrinter; }
struct LocationInfo {
unsigned line, column, length;
manip::LocationInfoPrinter printLoc() const;
};
struct Identifier : LocationInfo {
std::string name;
};
struct VarAssignment : LocationInfo {
Identifier id;
int value;
};
struct SourceCode : LocationInfo {
Identifier function;
std::vector<VarAssignment> assignments;
};
///////////////////////////////////////////////////////////////////////////
// Completely unnecessary tweak to get a "poor man's" io manipulator going
// so we can do `std::cout << x.printLoc()` on types of `x` deriving from
// LocationInfo
namespace manip {
struct LocationInfoPrinter {
LocationInfoPrinter(LocationInfo const& ref) : ref(ref) {}
LocationInfo const& ref;
friend std::ostream& operator<<(std::ostream& os, LocationInfoPrinter const& lip) {
return os << lip.ref.line << ':' << lip.ref.column << ':' << lip.ref.length;
}
};
}
manip::LocationInfoPrinter LocationInfo::printLoc() const { return { *this }; }
// feel free to disregard this hack
///////////////////////////////////////////////////////////////////////////
}
BOOST_FUSION_ADAPT_STRUCT(ast::Identifier, (std::string, name))
BOOST_FUSION_ADAPT_STRUCT(ast::VarAssignment, (ast::Identifier, id)(int, value))
BOOST_FUSION_ADAPT_STRUCT(ast::SourceCode, (ast::Identifier, function)(std::vector<ast::VarAssignment>, assignments))
struct error_handler_f {
typedef qi::error_handler_result result_type;
template<typename T1, typename T2, typename T3, typename T4>
qi::error_handler_result operator()(T1 b, T2 e, T3 where, T4 const& what) const {
std::cerr << "Error: expecting " << what << " in line " << get_line(where) << ": \n"
<< std::string(b,e) << "\n"
<< std::setw(std::distance(b, where)) << '^' << "---- here\n";
return qi::fail;
}
};
template<typename It>
struct annotation_f {
typedef void result_type;
annotation_f(It first) : first(first) {}
It const first;
template<typename Val, typename First, typename Last>
void operator()(Val& v, First f, Last l) const {
do_annotate(v, f, l, first);
}
private:
void static do_annotate(ast::LocationInfo& li, It f, It l, It first) {
using std::distance;
li.line = get_line(f);
li.column = get_column(first, f);
li.length = distance(f, l);
}
static void do_annotate(...) {}
};
template<typename Iterator=pos_iterator_t, typename Skipper=qi::space_type>
struct ParseGrammar: public qi::grammar<Iterator, ast::SourceCode(), Skipper>
{
ParseGrammar(Iterator first) :
ParseGrammar::base_type(SourceCode),
annotate(first)
{
using namespace qi;
KeywordFunction = lit("function");
KeywordVar = lit("var");
SemiColon = lit(';');
Identifier = as_string [ alpha >> *(alnum | char_("_")) ];
VarAssignment = KeywordVar > Identifier > '=' > int_ > SemiColon; // note: expectation points
SourceCode = KeywordFunction >> Identifier >> '{' >> *VarAssignment >> '}';
on_error<fail>(VarAssignment, handler(_1, _2, _3, _4));
on_error<fail>(SourceCode, handler(_1, _2, _3, _4));
auto set_location_info = annotate(_val, _1, _3);
on_success(Identifier, set_location_info);
on_success(VarAssignment, set_location_info);
on_success(SourceCode, set_location_info);
BOOST_SPIRIT_DEBUG_NODES((KeywordFunction)(KeywordVar)(SemiColon)(Identifier)(VarAssignment)(SourceCode))
}
phx::function<error_handler_f> handler;
phx::function<annotation_f<Iterator>> annotate;
qi::rule<Iterator, ast::SourceCode(), Skipper> SourceCode;
qi::rule<Iterator, ast::VarAssignment(), Skipper> VarAssignment;
qi::rule<Iterator, ast::Identifier()> Identifier;
// no skipper, no attributes:
qi::rule<Iterator> KeywordFunction, KeywordVar, SemiColon;
};
int main()
{
std::string const content = "function FuncName_1 {\n var Var_1 = 3;\n var Var_2 - 4; }";
pos_iterator_t first(content.begin()), iter = first, last(content.end());
ParseGrammar<pos_iterator_t> resolver(first); // Our parser
ast::SourceCode program;
bool ok = phrase_parse(iter,
last,
resolver,
qi::space,
program);
std::cout << std::boolalpha;
std::cout << "ok : " << ok << std::endl;
std::cout << "full: " << (iter == last) << std::endl;
if(ok && iter == last)
{
std::cout << "OK: Parsing fully succeeded\n\n";
std::cout << "Function name: " << program.function.name << " (see L" << program.printLoc() << ")\n";
for (auto const& va : program.assignments)
std::cout << "variable " << va.id.name << " assigned value " << va.value << " at L" << va.printLoc() << "\n";
}
else
{
int line = get_line(iter);
int column = get_column(first, iter);
std::cout << "-------------------------\n";
std::cout << "ERROR: Parsing failed or not complete\n";
std::cout << "stopped at: " << line << ":" << column << "\n";
std::cout << "remaining: '" << std::string(iter, last) << "'\n";
std::cout << "-------------------------\n";
}
return 0;
}
[1] sadly un(der)documented, except for the conjure sample(s)
[2] well, I used as_string to get proper assignment to Identifier without too much work
[3] There could be smarter ways about this in terms of performance, but for now, let's keep it simple

Generic way to convert a string into a numerical type?

I have this class:
template<typename T> class Parser
{
public:
Parser() : count(0) {}
virtual void parse(const string&);
void get_token(void);
private:
T result;
char token;
string expression;
int count;
};
now had the class not been generic, had the result been say, a double, I would have used this method to detect numbers.
while((strchr("1234567890.",token))
{
/* add token to a "temp" string */
/* etc. etc. */
}
result = atof(temp.c_str());
But since result is generic, I can't use any method like atof and atoi etc.
What do I do?
Boost has this functionality built-in:
#include <boost/lexical_cast.hpp>
void Parser<T>::get_token() {
std::string token = ...;
result = boost::lexical_cast<T>(token);
}
Add exception handling as required.
Or, perhaps you don't want to use Boost for some reason:
void Parser<T>::get_token() {
std::string token = ...;
std::stringstream ss;
ss << token;
ss >> result;
}
Check the error state of ss as required.
More expansive answers may be found on this related question, though it discusses only int specifically.
Another generic template based Numeric To String converter. It takes ints and doubles.
#include <sstream>
#include <iostream>
#include <string>
using namespace std;
template <class T>
inline std::string Numeric_To_String (const T& t)
{
std::stringstream ss;
ss << t;
return ss.str();
}
int main(int argc, char *argv[])
{
int i = 9;
double d = 1.2345;
string s;
cout <<"Generic Numeric_To_String( anyDatatype ) \n\n";
s = Numeric_To_String( i );
cout <<"int i to string : "<< s <<" "<< endl;
s = Numeric_To_String( d );
cout <<"double d to string : "<< s <<" "<< endl;
cout <<" \n";
return 0;
}
If you only have a hand full of types you want to parse, you can use template specialization:
template<>
void Parser<int>::parse(const string&)
{
result = atoi(string.c_str());
}
template<>
void Parser<float>::parse(const string&)
{
result = atof(string.c_str());
}
...
But this only works if you implement every convertion you need, of course.
With C++17 you can use the templated std::from_chars.
https://en.cppreference.com/w/cpp/utility/from_chars
#include <charconv>
#include <iostream>
template <typename Number>
auto stringTo(std::string_view str)
{
Number number;
std::from_chars(str.data(), str.data() + str.size(), number);
return number;
}
int main()
{
const auto str = std::string("42");
std::cout << stringTo<long>(str) << '\n';
std::cout << stringTo<double>(str) << '\n';
}
Check the return value of std::from_chars to detect errors.
const auto result = std::from_chars(...);
if (result.ec == std::errc::invalid_argument || result.ec == std::errc::result_out_of_range)
{
std::cout << "string to number error" << '\n';
}
More info and examples: https://www.bfilipek.com/2018/12/fromchars.html
GCC and clang don't yet support the floating point version of std::from_chars (August 2019).

How do you use a variable stored in a boost spirit closure as input to a boost spirit loop parser?

I would like to use a parsed value as the input to a loop parser.
The grammar defines a header that specifies the (variable) size of the following string. For example, say the following string is the input to some parser.
12\r\nTest Payload
The parser should extract the 12, convert it to an unsigned int and then read twelve characters. I can define a boost spirit grammar that compiles, but an assertion in the boost spirit code fails at runtime.
#include <iostream>
#include <boost/spirit.hpp>
using namespace boost::spirit;
struct my_closure : public closure<my_closure, std::size_t> {
member1 size;
};
struct my_grammar : public grammar<my_grammar> {
template <typename ScannerT>
struct definition {
typedef rule<ScannerT> rule_type;
typedef rule<ScannerT, my_closure::context_t> closure_rule_type;
closure_rule_type header;
rule_type payload;
rule_type top;
definition(const my_grammar &self)
{
using namespace phoenix;
header = uint_p[header.size = arg1];
payload = repeat_p(header.size())[anychar_p][assign_a(self.result)];
top = header >> str_p("\r\n") >> payload;
}
const rule_type &start() const { return top; }
};
my_grammar(std::string &p_) : result(p_) {}
std::string &result;
};
int
main(int argc, char **argv)
{
const std::string content = "12\r\nTest Payload";
std::string payload;
my_grammar g(payload);
if (!parse(content.begin(), content.end(), g).full) {
std::cerr << "there was a parsing error!\n";
return -1;
}
std::cout << "Payload: " << payload << std::endl;
return 0;
}
Is it possible to tell spirit that the closure variable should be evaluated lazily? Is this behaviour supported by boost spirit?
This is much easier with the new qi parser available in Spirit 2. The following code snippet provides a full example that mostly works. An unexpected character is being inserted into the final result.
#include <iostream>
#include <string>
#include <boost/version.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/qi_repeat.hpp>
#include <boost/spirit/include/qi_grammar.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
using boost::spirit::qi::repeat;
using boost::spirit::qi::uint_;
using boost::spirit::ascii::char_;
using boost::spirit::ascii::alpha;
using boost::spirit::qi::_1;
namespace phx = boost::phoenix;
namespace qi = boost::spirit::qi;
template <typename P, typename T>
void test_parser_attr(
char const* input, P const& p, T& attr, bool full_match = true)
{
using boost::spirit::qi::parse;
char const* f(input);
char const* l(f + strlen(f));
if (parse(f, l, p, attr) && (!full_match || (f == l)))
std::cout << "ok" << std::endl;
else
std::cout << "fail" << std::endl;
}
static void
straight_forward()
{
std::string str;
int n;
test_parser_attr("12\r\nTest Payload",
uint_[phx::ref(n) = _1] >> "\r\n" >> repeat(phx::ref(n))[char_],
str);
std::cout << "str.length() == " << str.length() << std::endl;
std::cout << n << "," << str << std::endl; // will print "12,Test Payload"
}
template <typename P, typename T>
void
test_phrase_parser(char const* input, P const& p,
T& attr, bool full_match = true)
{
using boost::spirit::qi::phrase_parse;
using boost::spirit::qi::ascii::space;
char const* f(input);
char const* l(f + strlen(f));
if (phrase_parse(f, l, p, space, attr) && (!full_match || (f == l)))
std::cout << "ok" << std::endl;
else
std::cout << "fail" << std::endl;
}
template <typename Iterator>
struct test_grammar
: qi::grammar<Iterator, std::string(), qi::locals<unsigned> > {
test_grammar()
: test_grammar::base_type(my_rule)
{
using boost::spirit::qi::_a;
my_rule %= uint_[_a = _1] >> "\r\n" >> repeat(_a)[char_];
}
qi::rule<Iterator, std::string(), qi::locals<unsigned> > my_rule;
};
static void
with_grammar_local_variable()
{
std::string str;
test_phrase_parser("12\r\nTest Payload", test_grammar<const char*>(), str);
std::cout << str << std::endl; // will print "Test Payload"
}
int
main(int argc, char **argv)
{
std::cout << "boost version: " << BOOST_LIB_VERSION << std::endl;
straight_forward();
with_grammar_local_variable();
return 0;
}
What you are looking for is lazy_p, check the example here: http://www.boost.org/doc/libs/1_35_0/libs/spirit/doc/the_lazy_parser.html