I want to parse string which consists of CC[n], where 1 <= n <= 4 or from SERVICE[k], where 1 <= k <= 63.
Valid strings: "CC1", "CC2", "CC3", "CC4", "SERVICE1", "SERVICE2", ..., "SERVICE63".
I wrote the next expression:
( '"' >> (qi::raw["CC" >> qi::uint_] | qi::raw["SERVICE" >> qi::uint_]) >> '"' >> qi::eoi)
But how I can limit n and k?
In output I need to got full string CC1, CC2, ... SERVICE63
The simplest way would be to use symbols<>.
The elaborate way is to validate the numbers in semantic actions.
My recommendation is is either symbols OR separate semantic validation from parsing (i.e. parse the numbers raw and validate the AST after the parse)
Symbols
This is likely the more flexible, most efficient, and allows you to be strongtyped in your AST domain. It sidesteps the compilation overhead and complexity of semantic actions: Boost Spirit: "Semantic actions are evil"?
Live On Coliru
#include <boost/spirit/include/qi.hpp>
#include <iomanip>
namespace qi = boost::spirit::qi;
int main() {
qi::symbols<char> cc, service;
cc += "CC1", "CC2", "CC3", "CC4";
service += "SERVICE1", "SERVICE2", "SERVICE3", "SERVICE4", "SERVICE5",
"SERVICE6", "SERVICE7", "SERVICE8", "SERVICE9", "SERVICE10",
"SERVICE11", "SERVICE12", "SERVICE13", "SERVICE14", "SERVICE15",
"SERVICE16", "SERVICE17", "SERVICE18", "SERVICE19", "SERVICE20",
"SERVICE21", "SERVICE22", "SERVICE23", "SERVICE24", "SERVICE25",
"SERVICE26", "SERVICE27", "SERVICE28", "SERVICE29", "SERVICE30",
"SERVICE31", "SERVICE32", "SERVICE33", "SERVICE34", "SERVICE35",
"SERVICE36", "SERVICE37", "SERVICE38", "SERVICE39", "SERVICE40",
"SERVICE41", "SERVICE42", "SERVICE43", "SERVICE44", "SERVICE45",
"SERVICE46", "SERVICE47", "SERVICE48", "SERVICE49", "SERVICE50",
"SERVICE51", "SERVICE52", "SERVICE53", "SERVICE54", "SERVICE55",
"SERVICE56", "SERVICE57", "SERVICE58", "SERVICE59", "SERVICE60",
"SERVICE61", "SERVICE62", "SERVICE63";
for (std::string const input : {
// valid:
"CC1",
"CC2",
"CC3",
"CC4",
"SERVICE1",
"SERVICE2",
"SERVICE63",
// invalid:
"CC0",
"CC5",
"SERVICE0",
"SERVICE64",
}) {
bool valid = parse(begin(input), end(input), service|cc);
std::cout << std::quoted(input) << " -> "
<< (valid ? "valid" : "invalid") << "\n";
}
}
Prints
"CC1" -> valid
"CC2" -> valid
"CC3" -> valid
"CC4" -> valid
"SERVICE1" -> valid
"SERVICE2" -> valid
"SERVICE63" -> valid
"CC0" -> invalid
"CC5" -> invalid
"SERVICE0" -> invalid
"SERVICE64" -> invalid
Bonus: the strongtyped idea: http://coliru.stacked-crooked.com/a/2cb07d4da9aad39e
Semantic Actions
In a nutshell:
qi::rule<It, intmax_t(intmax_t min, intmax_t max)> constrained_num =
qi::uint_[_pass = (_1 >= _r1 && _1 <= _r2)];
qi::rule<It> cc = "CC" >> constrained_num(1, 4),
service = "SERVICE" >> constrained_num(1, 63);
Live On Coliru
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <iomanip>
namespace qi = boost::spirit::qi;
using It = std::string::const_iterator;
int main() {
using namespace qi::labels;
qi::rule<It, intmax_t(intmax_t min, intmax_t max)> constrained_num =
qi::uint_[_pass = (_1 >= _r1 && _1 <= _r2)];
qi::rule<It> cc = "CC" >> constrained_num(1, 4),
service = "SERVICE" >> constrained_num(1, 63);
for (std::string const input : {
// valid:
"CC1",
"CC2",
"CC3",
"CC4",
"SERVICE1",
"SERVICE2",
"SERVICE63",
// invalid:
"CC0",
"CC5",
"SERVICE0",
"SERVICE64",
}) {
bool valid = parse(begin(input), end(input), service|cc);
std::cout << std::quoted(input) << " -> "
<< (valid ? "valid" : "invalid") << "\n";
}
}
Prints the same as above
To limit uint_ range, you can perform a range-checking in a semantic action. It can be implemented, for example, as lambda or, more concisely, as a Boost.Phenix expression.
The following code parses these numbers into a vector (omitting the strings):
#include <iostream>
#include <string>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/include/qi.hpp>
int main()
{
std::string input = "CC1 CC2 CC3 CC4 SERVICE1 SERVICE2";
std::vector<unsigned int> out;
using namespace boost::spirit::qi;
phrase_parse(
input.begin(),
input.end(),
*(lexeme[lit("CC") >> uint_ [ _pass = (_1>=1 && _1<=4) ]] |
lexeme[lit("SERVICE") >> uint_ [ _pass = (_1>=1 && _1<=63) ]]),
ascii::space,
out
);
for (auto i : out)
std::cout << i << std::endl;
}
Related
With a simple Boost qi grammar, how can I make it rounding my number?
This is the parser:
factor =
float_ [_val = _1]
| ('-' >> factor [_val = -_1])
| ('+' >> factor [_val = _1])
;
This can parse a float and it can be also negative.
I want to round the float, so I would add something like this to the grammar:
| ('~' >> factor [_val = round(_1)])
But this results a compile-time error:
no type named ‘__type’ in ‘struct __gnu_cxx::__enable_if<false, double>’
This error is not too informative for me, can you please help? I want to be able to round a number, ie:
~1.8 -> 2
~1.2 -> 1
Note: I'm parsing with phrase_parse.
Semantic actions require Phoenix Actors, which are deferred functions.
Options:
Adaptation macros https://www.boost.org/doc/libs/1_73_0/libs/phoenix/doc/html/phoenix/modules/function/adapting_functions.html
phoenix::function<>
phoenix::bind
write your own, see for more details https://www.boost.org/doc/libs/1_68_0/libs/spirit/doc/html/spirit/qi/tutorials/semantic_actions.html#spirit.qi.tutorials.semantic_actions.examples_of_semantic_actions
Simplified Test Bed
Just parsing a number:
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
namespace qi = boost::spirit::qi;
int main() {
std::string s = "1.75";
double v;
if (qi::parse(begin(s), end(s), qi::double_, v)) {
std::cout << "Parsed: " << v << "\n";
}
}
Prints Live On Coliru:
Parsed: 1.75
Adaptation
Using the macros:
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <cmath>
namespace qi = boost::spirit::qi;
int main() {
std::string s = "1.75";
double v;
if (qi::parse(begin(s), end(s), qi::double_, v)) {
std::cout << "Parsed: " << v << "\n";
std::cout << "Rounded: " << round(v) << "\n";
}
}
Prints Live On Coliru:
Parsed: 2
function<>
You can get away with hardcoding a signature here:
boost::phoenix::function<double(*)(double)> round_(::round);
However the real power comes with polymorphic calleables:
struct round_f {
template <typename T> auto operator()(T const& v) const {
using std::round; // activate ADL
return round(v);
}
};
boost::phoenix::function<round_f> round_{};
Now you can use the round_ actor on any type that has a free-function overload round overload that is compatible. Handy if tomorrow you decide to parse long double, float or boost::multiprecision::cpp_dec_float.
See it Live On Coliru
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <cmath>
namespace qi = boost::spirit::qi;
struct round_f {
template <typename T> auto operator()(T const& v) const {
using std::round; // activate ADL
return round(v);
}
};
boost::phoenix::function<round_f> round_{};
int main() {
std::string s = "1.75";
double v;
using namespace qi::labels;
if (qi::parse(begin(s), end(s), qi::double_ [ _val = round_(_1) ], v)) {
std::cout << "Parsed: " << v << "\n";
}
}
Prints
Parsed: 2
Using phoenix::bind
As a lower-level building block, you can bind unwrapped calleables:
if (qi::parse(begin(s), end(s), qi::double_
[ _val = phoenix::bind(round_f{}, _1) ], v))
{
std::cout << "Parsed: " << v << "\n";
}
If you don't mind ugly:
if (qi::parse(begin(s), end(s), qi::double_
[ _val = phoenix::bind(static_cast<double(&)(double)>(std::round), _1) ], v))
{
std::cout << "Parsed: " << v << "\n";
}
See both Live On Coliru
Using boost::spirit, if I have a recursive rule to parse parentheses
rule<std::string::iterator, std::string()> term;
term %= string("(") >> *term >> string(")");
how do I limit the maximum amount of recursion? For example, if I try to parse a million nested parentheses, I get a segfault because the stack size has been exceeded. To be concrete, here is a complete sample.
#include <iostream>
#include <string>
#include <boost/spirit/include/qi.hpp>
int main(void)
{
using namespace boost::spirit;
using namespace boost::spirit::qi;
const size_t string_size = 1000000;
std::string str;
str.resize(string_size);
for (size_t s=0; s<str.size()/2; ++s)
{
str[s]='(';
str[str.size() - s -1] = ')';
}
rule<std::string::iterator, std::string()> term;
term %= string("(") >> *term >> string(")");
std::string h;
parse(str.begin(), str.end(), term, h);
}
I compiled it with the command
g++ simple.cxx -o simple -std=c++11
It works fine if I set string_size to 1000 instead of 1000000.
Keep track of the depth in a qi::local<> or a phx::ref().
In this case an inherited attribute can take the role of the qi::local quite naturally:
qi::rule<std::string::const_iterator, std::string(size_t depth)> term;
qi::_r1_type _depth;
term %=
qi::eps(_depth < 32) >>
qi::string("(") >> *term(_depth + 1) >> qi::string(")");
term will now fail when depth exceeds 32.
Full Sample
Live On Coliru
#include <iostream>
#include <string>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
namespace qi = boost::spirit::qi;
int main(void) {
for (size_t n : { 2, 4, 8, 16, 32, 64 }) {
auto const str = [&n] {
std::string str;
str.reserve(n);
while (n--) { str.insert(str.begin(), '('); str.append(1, ')'); }
return str;
}();
std::cout << "Input length " << str.length() << "\n";
qi::rule<std::string::const_iterator, std::string(size_t depth)> term;
qi::_r1_type _depth;
term %=
qi::eps(_depth < 32) >>
qi::string("(") >> *term(_depth + 1) >> qi::string(")");
std::string h;
auto f = str.begin(), l = str.end();
bool ok = qi::parse(f, l, term(0u), h);
if (ok)
std::cout << "Ok: " << h << "\n";
else
std::cout << "Fail\n";
if (f != l)
std::cout << "Remaining unparsed: '" << std::string(f, std::min(f + 40, l)) << "'...\n";
}
}
Output:
Input length 4
Ok: (())
Input length 8
Ok: (((())))
Input length 16
Ok: (((((((())))))))
Input length 32
Ok: (((((((((((((((())))))))))))))))
Input length 64
Ok: (((((((((((((((((((((((((((((((())))))))))))))))))))))))))))))))
Input length 128
Fail
Remaining unparsed: '(((((((((((((((((((((((((((((((((((((((('...
Can you help me understand the difference between the a % b parser and its expanded a >> *(b >> a) form in Boost.Spirit? Even though the reference manual states that they are equivalent,
The list operator, a % b, is a binary operator that matches a list of one or more repetitions of a separated by occurrences of b. This is equivalent to a >> *(b >> a).
the following program produces different results depending on which is used:
#include <iostream>
#include <string>
#include <vector>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/spirit/include/qi.hpp>
struct Record {
int id;
std::vector<int> values;
};
BOOST_FUSION_ADAPT_STRUCT(Record,
(int, id)
(std::vector<int>, values)
)
int main() {
namespace qi = boost::spirit::qi;
const auto str = std::string{"1: 2, 3, 4"};
const auto rule1 = qi::int_ >> ':' >> (qi::int_ % ',') >> qi::eoi;
const auto rule2 = qi::int_ >> ':' >> (qi::int_ >> *(',' >> qi::int_)) >> qi::eoi;
Record record1;
if (qi::phrase_parse(str.begin(), str.end(), rule1, qi::space, record1)) {
std::cout << record1.id << ": ";
for (const auto& value : record1.values) { std::cout << value << ", "; }
std::cout << '\n';
} else {
std::cerr << "syntax error\n";
}
Record record2;
if (qi::phrase_parse(str.begin(), str.end(), rule2, qi::space, record2)) {
std::cout << record2.id << ": ";
for (const auto& value : record2.values) { std::cout << value << ", "; }
std::cout << '\n';
} else {
std::cerr << "syntax error\n";
}
}
Live on Coliru
1: 2, 3, 4,
1: 2,
rule1 and rule2 are different only in that rule1 uses the list operator ((qi::int_ % ',')) and rule2 uses its expanded form ((qi::int_ >> *(',' >> qi::int_))). However, rule1 produced 1: 2, 3, 4, (as expected) and rule2 produced 1: 2,. I cannot understand the result of rule2: 1) why is it different from that of rule1 and 2) why were 3 and 4 not included in record2.values even though phrase_parse returned true somehow?
Update X3 version added
First off, you fallen into a deep trap here:
Qi rules don't work with auto. Use qi::copy or just used qi::rule<>. Your program has undefined behaviour and indeed it crashed for me (valgrind pointed out where the dangling references originated).
So, first off:
const auto rule = qi::copy(qi::int_ >> ':' >> (qi::int_ % ',') >> qi::eoi);
Now, when you delete the redundancy in the program, you get:
Reproducing the problem
Live On Coliru
int main() {
test(qi::copy(qi::int_ >> ':' >> (qi::int_ % ',')));
test(qi::copy(qi::int_ >> ':' >> (qi::int_ >> *(',' >> qi::int_))));
}
Printing
1: 2, 3, 4,
1: 2,
The cause and the fix
What happened to 3, 4 which was successfully parsed?
Well, the attribute propagation rules indicate that qi::int_ >> *(',' >> qi::int_) exposes a tuple<int, vector<int> >. In a bid to magically DoTheRightThing(TM) Spirit accidentally misfires and "assigngs" the int into the attribute reference, ignoring the remaining vector<int>.
If you want to make container attributes parse as "an atomic group", use qi::as<>:
test(qi::copy(qi::int_ >> ':' >> qi::as<Record::values_t>() [ qi::int_ >> *(',' >> qi::int_)]));
Here as<> acts as a barrier for the attribute compatibility heuristics and the grammar knows what you meant:
Live On Coliru
#include <iostream>
#include <string>
#include <vector>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/spirit/include/qi.hpp>
struct Record {
int id;
using values_t = std::vector<int>;
values_t values;
};
BOOST_FUSION_ADAPT_STRUCT(Record, id, values)
namespace qi = boost::spirit::qi;
template <typename T>
void test(T const& rule) {
const std::string str = "1: 2, 3, 4";
Record record;
if (qi::phrase_parse(str.begin(), str.end(), rule >> qi::eoi, qi::space, record)) {
std::cout << record.id << ": ";
for (const auto& value : record.values) { std::cout << value << ", "; }
std::cout << '\n';
} else {
std::cerr << "syntax error\n";
}
}
int main() {
test(qi::copy(qi::int_ >> ':' >> (qi::int_ % ',')));
test(qi::copy(qi::int_ >> ':' >> (qi::int_ >> *(',' >> qi::int_))));
test(qi::copy(qi::int_ >> ':' >> qi::as<Record::values_t>() [ qi::int_ >> *(',' >> qi::int_)]));
}
Prints
1: 2, 3, 4,
1: 2,
1: 2, 3, 4,
Because it's time to get people started with X3 (the new version of Spirit), and because I like to challenge msyelf to do the corresponding tasks in Spirit X3, here is the Spirit X3 version.
There's no problem with auto in X3.
The "broken" case also behaves much better, triggering this static assertion:
// If you got an error here, then you are trying to pass
// a fusion sequence with the wrong number of elements
// as that expected by the (sequence) parser.
static_assert(
fusion::result_of::size<Attribute>::value == (l_size + r_size)
, "Attribute does not have the expected size."
);
That's nice, right?
The workaround seems a bit less readable:
test(int_ >> ':' >> (rule<struct _, Record::values_t>{} = (int_ >> *(',' >> int_))));
But it would be trivial to write your own as<> "directive" (or just a function), if you wanted:
namespace {
template <typename T>
struct as_type {
template <typename Expr>
auto operator[](Expr&& expr) const {
return x3::rule<struct _, T>{"as"} = x3::as_parser(std::forward<Expr>(expr));
}
};
template <typename T> static const as_type<T> as = {};
}
DEMO
Live On Coliru
#include <iostream>
#include <string>
#include <vector>
#include <boost/fusion/adapted/std_tuple.hpp>
#include <boost/spirit/home/x3.hpp>
struct Record {
int id;
using values_t = std::vector<int>;
values_t values;
};
namespace x3 = boost::spirit::x3;
template <typename T>
void test(T const& rule) {
const std::string str = "1: 2, 3, 4";
Record record;
auto attr = std::tie(record.id, record.values);
if (x3::phrase_parse(str.begin(), str.end(), rule >> x3::eoi, x3::space, attr)) {
std::cout << record.id << ": ";
for (const auto& value : record.values) { std::cout << value << ", "; }
std::cout << '\n';
} else {
std::cerr << "syntax error\n";
}
}
namespace {
template <typename T>
struct as_type {
template <typename Expr>
auto operator[](Expr&& expr) const {
return x3::rule<struct _, T>{"as"} = x3::as_parser(std::forward<Expr>(expr));
}
};
template <typename T> static const as_type<T> as = {};
}
int main() {
using namespace x3;
test(int_ >> ':' >> (int_ % ','));
//test(int_ >> ':' >> (int_ >> *(',' >> int_))); // COMPILER asserts "Attribute does not have the expected size."
// "clumsy" x3 style workaround
test(int_ >> ':' >> (rule<struct _, Record::values_t>{} = (int_ >> *(',' >> int_))));
// using an ad-hoc `as<>` implementation:
test(int_ >> ':' >> as<Record::values_t>[int_ >> *(',' >> int_)]);
}
Prints
1: 2, 3, 4,
1: 2, 3, 4,
1: 2, 3, 4,
I am trying to use Boost Spirit X3 with semantic actions while parsing the structure to an AST. If I use a rule without separate definition and instantiation it works just fine, for example:
#include <vector>
#include <string>
#include <iostream>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/spirit/home/x3.hpp>
namespace ast
{
struct ast_struct
{
int number;
std::vector<int> numbers;
};
}
BOOST_FUSION_ADAPT_STRUCT(
ast::ast_struct,
(int, number)
(std::vector<int>, numbers)
)
namespace x3 = boost::spirit::x3;
using namespace std;
void parse( const std::string &data )
{
string::const_iterator begin = data.begin();
string::const_iterator end = data.end();
unsigned n(0);
auto f = [&n]( auto &ctx )
{
n = x3::_attr(ctx);
};
ast::ast_struct ast;
bool r = x3::parse( begin, end,
x3::int_[f] >> +( x3::omit[+x3::blank] >> x3::int_ ), ast );
if ( r && begin == end )
{
cout << "n: " << n << ", ";
std::copy(ast.numbers.begin(), ast.numbers.end(),
std::ostream_iterator<int>(std::cout << ast.numbers.size() << " elements: ", " "));
cout << endl;
}
else
cout << "Parse failed" << endl;
}
int main()
{
parse( "3 1 2 3" );
parse( "4 1 2 3 4" );
return 0;
}
Running the code above (compiled with flags -std=c++14) outputs the expected result:
n: 3, 3 elements: 1 2 3
n: 4, 4 elements: 1 2 3 4
Now I am trying to have my Spirit X3 parser organized more or less the same way as the calc 9 example from Boost Spirit X3, but it does not work:
ast.hxx: defines the abstract syntax tree.
grammar.hxx: user interface exposing the parser methods.
grammar.cxx: instantiates the rules.
grammar_def.hxx: parser grammar definition.
config.hxx: parser configuration.
main.cxx: parser usage example.
ast.hxx:
#ifndef AST_HXX
#define AST_HXX
#include <vector>
#include <boost/fusion/include/adapt_struct.hpp>
namespace ast
{
struct ast_struct
{
int number;
std::vector<int> numbers;
};
}
BOOST_FUSION_ADAPT_STRUCT(
ast::ast_struct,
(int, number)
(std::vector<int>, numbers)
)
#endif
grammar.hxx:
#ifndef GRAMMAR_HXX
#define GRAMMAR_HXX
#include "ast.hxx"
#include <boost/spirit/home/x3.hpp>
namespace parser
{
namespace x3 = boost::spirit::x3;
using my_rule_type = x3::rule<class my_rule_class, ast::ast_struct>;
BOOST_SPIRIT_DECLARE( my_rule_type );
const my_rule_type &get_my_rule();
}
#endif
grammar.cxx:
#include "grammar_def.hxx"
#include "config.hxx"
namespace parser
{
BOOST_SPIRIT_INSTANTIATE( my_rule_type, iterator_type, context_type )
}
grammar_def.hxx:
#ifndef GRAMMAR_DEF_HXX
#define GRAMMAR_DEF_HXX
#include <iostream>
#include <boost/spirit/home/x3.hpp>
#include "grammar.hxx"
#include "ast.hxx"
namespace parser
{
namespace x3 = boost::spirit::x3;
const my_rule_type my_rule( "my_rule" );
unsigned n;
auto f = []( auto &ctx )
{
n = x3::_attr(ctx);
};
auto my_rule_def = x3::int_[f] >> +( x3::omit[+x3::blank] >> x3::int_ );
BOOST_SPIRIT_DEFINE( my_rule )
const my_rule_type &get_my_rule()
{
return my_rule;
}
}
#endif
config.hxx:
#ifndef CONFIG_HXX
#define CONFIG_HXX
#include <string>
#include <boost/spirit/home/x3.hpp>
namespace parser
{
namespace x3 = boost::spirit::x3;
using iterator_type = std::string::const_iterator;
using context_type = x3::unused_type;
}
#endif
main.cxx:
#include "ast.hxx"
#include "grammar.hxx"
#include "config.hxx"
#include <iostream>
#include <boost/spirit/home/x3.hpp>
#include <string>
namespace x3 = boost::spirit::x3;
using namespace std;
void parse( const std::string &data )
{
parser::iterator_type begin = data.begin();
parser::iterator_type end = data.end();
ast::ast_struct ast;
cout << "Parsing [" << string(begin,end) << "]" << endl;
bool r = x3::parse( begin, end, parser::get_my_rule(), ast );
if ( r && begin == end )
{
std::copy(ast.numbers.begin(), ast.numbers.end(),
std::ostream_iterator<int>(std::cout << ast.numbers.size() << " elements: ", " "));
cout << endl;
}
else
cout << "Parse failed" << endl;
}
int main()
{
parse( "3 1 2 3" );
parse( "4 1 2 3 4" );
return 0;
}
Compiling main.cxx and grammar.cxx (flags: -std=c++14) and running the code above prints:
Parsing [3 1 2 3]
0 elements:
Parsing [4 1 2 3 4]
0 elements:
I apologize for the long source code, I tried to make it as small as possible.
Please notice I have some usage for the unsigned n global variable, it will be used with a custom repeat directive (see question here and one of the solutions here). In order to keep the question focused I removed the repeat part from this question, so even though I could remove the semantic action in this example, it is not a possible solution.
I would appreciate some help to get this issue uncovered, it is not clear to me why the code above does not work. Thank you in advance.
I must admit actually reconstructing your sample was a bit too much work for me (call me lazy...).
However, I know the answer and a trick to make your life simpler.
The Answer
Semantic actions on a rule definition inhibit automatic attribute propagation. From the Qi docs (the same goes for X3, but I always lose the link to the docs):
r = p; Rule definition
This is equivalent to r %= p (see below) if there are no semantic actions attached anywhere in p.
r %= p; Auto-rule definition
The attribute of p should be compatible with the synthesized attribute of r. When p is successful, its attribute is automatically propagated to r's synthesized attribute.
The Trick
You can inject state (your n reference, in this case) using the x3::with<> directive. That way you don't have the namespace global (n) and can make the parser reentrant, threadsafe etc.
Here's my "simplist" take on things, in a single file:
namespace parsing {
x3::rule<struct parser, ast::ast_struct> parser {"parser"};
struct state_tag { };
auto record_number = [](auto &ctx) {
unsigned& n = x3::get<state_tag>(ctx);
n = x3::_attr(ctx);
};
auto parser_def = x3::rule<struct parser_def, ast::ast_struct> {}
%= x3::int_[record_number] >> +(x3::omit[+x3::blank] >> x3::int_);
BOOST_SPIRIT_DEFINE(parser)
}
Tip: run the demo with = instead of the %= to see the difference in behaviour!
Note that get<state_tag>(ctx) returns a reference_wrapper<unsigned> just because we use the parser as follows:
void parse(const std::string &data) {
using namespace std;
ast::ast_struct ast;
unsigned n;
auto parser = x3::with<parsing::state_tag>(ref(n)) [parsing::parser] >> x3::eoi;
if (x3::parse(data.begin(), data.end(), parser, ast)) {
cout << "n: " << n << ", ";
copy(ast.numbers.begin(), ast.numbers.end(), ostream_iterator<int>(cout << ast.numbers.size() << " elements: ", " "));
cout << "\n";
} else
cout << "Parse failed\n";
}
Live Demo
Live On Coliru
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/spirit/home/x3.hpp>
#include <iostream>
namespace ast {
struct ast_struct {
int number;
std::vector<int> numbers;
};
}
BOOST_FUSION_ADAPT_STRUCT(ast::ast_struct, number, numbers)
namespace x3 = boost::spirit::x3;
namespace parsing {
x3::rule<struct parser, ast::ast_struct> parser {"parser"};
struct state_tag { };
auto record_number = [](auto &ctx) {
unsigned& n = x3::get<state_tag>(ctx); // note: returns reference_wrapper<T>
n = x3::_attr(ctx);
};
auto parser_def = x3::rule<struct parser_def, ast::ast_struct> {}
%= x3::int_[record_number] >> +(x3::omit[+x3::blank] >> x3::int_);
BOOST_SPIRIT_DEFINE(parser)
}
void parse(const std::string &data) {
using namespace std;
ast::ast_struct ast;
unsigned n = 0;
auto parser = x3::with<parsing::state_tag>(ref(n)) [parsing::parser] >> x3::eoi;
if (x3::parse(data.begin(), data.end(), parser, ast)) {
cout << "n: " << n << ", ";
copy(ast.numbers.begin(), ast.numbers.end(), ostream_iterator<int>(cout << ast.numbers.size() << " elements: ", " "));
cout << "\n";
} else
cout << "Parse failed\n";
}
int main() {
parse("3 1 2 3");
parse("4 1 2 3 4");
}
Prints
n: 3, 3 elements: 1 2 3
n: 4, 4 elements: 1 2 3 4
Given the input string: A = 23; B = 5, I currently get the (expected) output:
Output: 0xa0000023
Output: 0xa0010005
-------------------------
I would like to see this instead:
Output: 0xa0000023 // A = 23
Output: 0xa0010005 // B = 5
-------------------------
The core line of code is:
statement = eps[_val = 0x50000000] >> identifier[_val += _1<<16] >>
"=" >> hex[_val += (_1 & 0x0000FFFF)];
Where identifier is a qi::symbols table lookup.
The rest of my code looks like this:
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_object.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>
#include <iostream>
#include <iomanip>
#include <ios>
#include <string>
#include <complex>
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
struct reg16_ : qi::symbols<char,unsigned> {
reg16_() {
add ("A", 0) ("B", 1) ("C", 2) ("D", 3) ;
}
} reg16;
template <typename Iterator>
struct dash_script_parser : qi::grammar<Iterator, std::vector<unsigned>(), ascii::space_type> {
dash_script_parser() : dash_script_parser::base_type(start) {
using qi::hex;
using qi::_val;
using qi::_1;
using qi::eps;
identifier %= reg16;
start %= (statement % ";" );
statement = eps[_val = 0x50000000] >> identifier[_val += _1<<16]>> "=" >> hex[_val += (_1 & 0x0000FFFF)];
}
qi::rule<Iterator, std::vector<unsigned>(), ascii::space_type> start;
qi::rule<Iterator, unsigned(), ascii::space_type> statement;
qi::rule<Iterator, unsigned()> identifier;
};
int
main()
{
std::cout << "\t\tA parser for Spirit...\n\n" << "Type [q or Q] to quit\n\n";
dash_script_parser<std::string::const_iterator> g;
std::string str;
while (getline(std::cin, str))
{
if (str.empty() || str[0] == 'q' || str[0] == 'Q') break;
std::string::const_iterator iter = str.begin();
std::string::const_iterator end = str.end();
std::vector<unsigned> strs;
bool r = phrase_parse(iter, end, g, boost::spirit::ascii::space, strs);
if (r && iter == end) {
for(std::vector<unsigned>::const_iterator it=strs.begin(); it<strs.end(); ++it)
std::cout << "Output: 0x" << std::setw(8) << std::setfill('0') << std::hex <<*it << "\n";
} else
std::cout << "Parsing failed\n";
}
return 0;
}
Update A newer answer brought iter_pos to my attention (from Boost Spirit Repository):
How do I capture the original input into the synthesized output from a spirit grammar?
This basically does the same as below, but without 'abusing' semantic actions (making it a much better fit, especially with automatic attribute propagation.
My gut feeling says that it will probably be easier to isolate statements into raw source iterator ranges first, and then parse the statements in isolation. That way, you'll have the corresponding source text at the start.
With that out of the way, here is an approach I tested to work without subverting your sample code too much:
1. Make the attribute type a struct
Replace the primitive unsigned with a struct that also contains the source snippet, verbatim, as a string:
struct statement_t
{
unsigned value;
std::string source;
};
BOOST_FUSION_ADAPT_STRUCT(statement_t, (unsigned, value)(std::string, source));
2. Make the parser fill both fields
The good thing is, you were already using semantic actions, so it is merely building onto that. Note that the result is not very pretty, and would benefit hugely from being converted into a (fused) functor. But it shows the technique very clearly:
start %= (statement % ";" );
statement = qi::raw [
raw[eps] [ at_c<0>(_val) = 0x50000000 ]
>> identifier [ at_c<0>(_val) += _1<<16 ]
>> "=" >> hex [ at_c<0>(_val) += (_1 & 0x0000FFFF) ]
]
[ at_c<1>(_val) = construct<std::string>(begin(_1), end(_1)) ]
;
3. Print
So, at_c<0>(_val) corresponds to statement::value, and at_c<1>(_val) corresponds to statement::source. This slightly modified output loop:
for(std::vector<statement_t>::const_iterator it=strs.begin(); it<strs.end(); ++it)
std::cout << "Output: 0x" << std::setw(8) << std::setfill('0') << std::hex << it->value << " // " << it->source << "\n";
outputs:
Output: 0x50000023 // A = 23
Output: 0x50010005 // B = 5
Full sample
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_object.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>
#include <iostream>
#include <iomanip>
#include <ios>
#include <string>
#include <complex>
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
#include <boost/spirit/include/phoenix_fusion.hpp>
#include <boost/spirit/include/phoenix_stl.hpp>
namespace phx = boost::phoenix;
struct reg16_ : qi::symbols<char,unsigned> {
reg16_() {
add ("A", 0) ("B", 1) ("C", 2) ("D", 3) ;
}
} reg16;
struct statement_t
{
unsigned value;
std::string source;
};
BOOST_FUSION_ADAPT_STRUCT(statement_t, (unsigned, value)(std::string, source));
template <typename Iterator>
struct dash_script_parser : qi::grammar<Iterator, std::vector<statement_t>(), ascii::space_type> {
dash_script_parser() : dash_script_parser::base_type(start) {
using qi::hex;
using qi::_val;
using qi::_1;
using qi::eps;
using qi::raw;
identifier %= reg16;
using phx::begin;
using phx::end;
using phx::at_c;
using phx::construct;
start %= (statement % ";" );
statement = raw [
raw[eps] [ at_c<0>(_val) = 0x50000000 ]
>> identifier [ at_c<0>(_val) += _1<<16 ]
>> "=" >> hex [ at_c<0>(_val) += (_1 & 0x0000FFFF) ]
]
[ at_c<1>(_val) = construct<std::string>(begin(_1), end(_1)) ]
;
}
qi::rule<Iterator, std::vector<statement_t>(), ascii::space_type> start;
qi::rule<Iterator, statement_t(), ascii::space_type> statement;
qi::rule<Iterator, unsigned()> identifier;
};
int
main()
{
std::cout << "\t\tA parser for Spirit...\n\n" << "Type [q or Q] to quit\n\n";
dash_script_parser<std::string::const_iterator> g;
std::string str;
while (getline(std::cin, str))
{
if (str.empty() || str[0] == 'q' || str[0] == 'Q') break;
std::string::const_iterator iter = str.begin();
std::string::const_iterator end = str.end();
std::vector<statement_t> strs;
bool r = phrase_parse(iter, end, g, boost::spirit::ascii::space, strs);
if (r && iter == end) {
for(std::vector<statement_t>::const_iterator it=strs.begin(); it<strs.end(); ++it)
std::cout << "Output: 0x" << std::setw(8) << std::setfill('0') << std::hex << it->value << " // " << it->source << "\n";
} else
std::cout << "Parsing failed\n";
}
return 0;
}