The following program has been reduced from the original. I get a segmentation fault when it runs. If I remove line 24 with ArithmeticUnaryExpression then the program no longer crashes. How do I get rid of the segmentation fault?
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
#include <boost/spirit/include/qi_expect.hpp>
#include <boost/spirit/home/x3/directive/expect.hpp>
#include <iostream>
#include <string>
namespace wctl_parser {
namespace x3 = boost::spirit::x3;
namespace ascii = x3::ascii;
namespace qi = boost::spirit::qi;
using x3::ulong_;
using x3::lexeme;
//--- Ast structures
struct ArithmeticUnaryExpression;
using AtomicProp = std::string;
using ArithmeticExpression = x3::variant<
x3::forward_ast<ArithmeticUnaryExpression>,
unsigned long
>;
struct ArithmeticUnaryExpression {
std::string op;
ArithmeticExpression operand;
};
using Expression = x3::variant<
ArithmeticExpression
>;
template <typename T> auto rule = [](const char* name = typeid(T).name()) {
struct _{};
return x3::rule<_, T> {name};
};
template <typename T> auto as = [](auto p) { return rule<T>() = p; };
//--- Rules
x3::rule<struct aTrivRule, ArithmeticExpression> aTriv("aTriv");
x3::rule<struct exprRule, Expression> expr("expression");
auto const aTriv_def = rule<ArithmeticExpression>("aTriv")
= ulong_
// | '(' > expr > ')'
;
auto const primitive = rule<Expression>("primitive")
= aTriv
;
auto const expr_def
= primitive
;
BOOST_SPIRIT_DEFINE(aTriv)
BOOST_SPIRIT_DEFINE(expr)
auto const entry = x3::skip(ascii::space) [expr];
} //End namespace
int main() {
std::string str("prop");
namespace x3 = boost::spirit::x3;
wctl_parser::Expression root;
auto iter = str.begin();
auto end = str.end();
bool r = false;
r = parse(iter, end, wctl_parser::entry, root);
if (r) {
std::cout << "Parses OK:" << std::endl << str << std::endl;
if (iter != end) std::cout << "Partial match" << std::endl;
std::cout << std::endl << "----------------------------\n";
}
else {
std::cout << "!! Parsing failed:" << std::endl << str << std::endl << std::endl << "----------------------------\n";
}
return 0;
}
Your variant
using ArithmeticExpression = x3::variant<
x3::forward_ast<ArithmeticUnaryExpression>,
unsigned long
>;
will default-construct to the first element type. The first element type contains ArithmeticExpression which is also default constructed. Can you see the problem already?
Just make sure the default constructed state doesn't lead to infinite recursion:
using ArithmeticExpression = x3::variant<
unsigned long,
x3::forward_ast<ArithmeticUnaryExpression>
>;
Related
I am trying to parse a sequence of characters separated by a "," into an std::map<char,int> of pairs where the key is the character and the value just the a count of parsed characters.
For example, if the input is
a,b,c
The map should contain the pairs:
(a,1) , (b,2) , (c,3)
Here's the code I am using :
namespace myparser
{
std::map<int, std::string> mapping;
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace phoenix = boost::phoenix;
int i = 0;
template <typename Iterator>
bool parse_numbers(Iterator first, Iterator last, std::map<char,int>& v)
{
using qi::double_;
using qi::char_;
using qi::phrase_parse;
using qi::_1;
using ascii::space;
using phoenix::push_back;
bool r = phrase_parse(first, last,
// Begin grammar
(
char_[v.insert(std::make_pair(_1,0)]
>> *(',' >> char_[v.insert(std::make_pair(_1,0)])
)
,
// End grammar
space);
if (first != last) // fail if we did not get a full match
return false;
return r;
}
//]
}
Then I try to print the pair in main like this:
int main() {
std::string str;
while (getline(std::cin, str))
{
if (str.empty() || str[0] == 'q' || str[0] == 'Q')
break;
std::map<char,int> v;
std::map<std::string, int>::iterator it = v.begin();
if (myparser::parse_numbers(str.begin(), str.end(), v))
{
std::cout << "-------------------------\n";
std::cout << "Parsing succeeded\n";
std::cout << str << " Parses OK: " << std::endl;
while (it != v.end())
{
// Accessing KEY from element pointed by it.
std::string word = it->first;
// Accessing VALUE from element pointed by it.
int count = it->second;
std::cout << word << " :: " << count << std::endl;
// Increment the Iterator to point to next entry
it++;
}
std::cout << "\n-------------------------\n";
}
else
{
std::cout << "-------------------------\n";
std::cout << "Parsing failed\n";
std::cout << "-------------------------\n";
}
}
return 0;
}
I am a beginner and I don't know how to fix this code . I also want to use strings instead of characters so I enter a sequence of strings separated by a "," and store them in a map similar to the one mentioned above. I would appreciate any help !
You cannot use Phoenix place holders outside Phoenix deferred actors. E.g. the type of std::make_pair(qi::_1, 0) is std::pair<boost::phoenix::actor<boost::phoenix::argument<0>>, int>.
Nothing interoperates with such a thing. Certainly not std::map<>::insert.
What you'd need to do is wrap all the operations in semantic actions as Phoenix actors.
#include <boost/phoenix.hpp>
namespace px = boost::phoenix;
Then you can:
#include <boost/phoenix.hpp>
#include <boost/spirit/include/qi.hpp>
namespace qi = boost::spirit::qi;
namespace px = boost::phoenix;
namespace myparser {
using Map = std::map<char, int>;
template <typename Iterator>
bool parse_numbers(Iterator first, Iterator last, Map& m) {
auto action = px::insert(px::ref(m), px::end(px::ref(m)),
px::construct<std::pair<char, int>>(qi::_1, 0));
bool r = qi::phrase_parse( //
first, last,
// Begin grammar
qi::char_[action] >> *(',' >> qi::char_[action]),
// End grammar
qi::space);
return r && first == last;
}
} // namespace myparser
See it Live
Easy peasy. Right.
I spent half an hour on that thing debugging why it wouldn't work. Why is this so hard?
It's because someone invented a whole meta-DSL to write "normal C++" but with defferred execution. Back when that happened it was pretty neat, but it is the mother of all leaky abstractions, with razor sharp edges.
So, what's new? Using C++11 you could:
Live
template <typename Iterator>
bool parse_numbers(Iterator first, Iterator last, Map& m) {
struct action_f {
Map& m_;
void operator()(char ch) const { m_.emplace(ch, 0); }
};
px::function<action_f> action{{m}};
bool r = qi::phrase_parse( //
first, last,
// Begin grammar
qi::char_[action(qi::_1)] >> *(',' >> qi::char_[action(qi::_1)]),
// End grammar
qi::space);
return r && first == last;
}
Or using c++17:
Live
template <typename Iterator>
bool parse_numbers(Iterator first, Iterator last, Map& m) {
px::function action{[&m](char ch) { m.emplace(ch, 0); }};
bool r = qi::phrase_parse( //
first, last,
// Begin grammar
qi::char_[action(qi::_1)] >> *(',' >> qi::char_[action(qi::_1)]),
// End grammar
qi::space);
return r && first == last;
}
On a tangent, you probably wanted to count things, so, maybe use
Live
px::function action{[&m](char ch) { m[ch] += 1; }};
By this time, you could switch to Spirit X3 (which requires C++14):
Live
#include <boost/spirit/home/x3.hpp>
#include <map>
namespace x3 = boost::spirit::x3;
namespace myparser {
using Map = std::map<char, int>;
template <typename Iterator>
bool parse_numbers(Iterator first, Iterator last, Map& m) {
auto action = [&m](auto& ctx) { m[_attr(ctx)] += 1; };
return x3::phrase_parse( //
first, last,
// Begin grammar
x3::char_[action] >> *(',' >> x3::char_[action]) >> x3::eoi,
// End grammar
x3::space);
}
} // namespace myparser
Now finally, let's simplify. p >> *(',' >> p) is just a clumsy way of saying p % ',':
Live
template <typename Iterator>
bool parse_numbers(Iterator first, Iterator last, Map& m) {
auto action = [&m](auto& ctx) { m[_attr(ctx)] += 1; };
return x3::phrase_parse( //
first, last, //
x3::char_[action] % ',', //
x3::space);
}
And you wanted words, not characters:
Live
#include <boost/spirit/home/x3.hpp>
#include <map>
namespace x3 = boost::spirit::x3;
namespace myparser {
using Map = std::map<std::string, int>;
template <typename Iterator>
bool parse_numbers(Iterator first, Iterator last, Map& m) {
auto action = [&m](auto& ctx) { m[_attr(ctx)] += 1; };
auto word_ = (*~x3::char_(','))[action];
return phrase_parse(first, last, word_ % ',', x3::space);
}
} // namespace myparser
#include <iomanip>
#include <iostream>
int main() {
for (std::string const str : {"foo,c++ is strange,bar,qux,foo,c++ is strange ,cuz"}) {
std::map<std::string, int> m;
std::cout << "Parsing " << std::quoted(str) << std::endl;
if (myparser::parse_numbers(str.begin(), str.end(), m)) {
std::cout << m.size() << " words:\n";
for (auto& [word,count]: m)
std::cout << " - " << std::quoted(word) << " :: " << count << std::endl;
} else {
std::cerr << "Parsing failed\n";
}
}
}
Prints
Parsing "foo,c++ is strange,bar,qux,foo,c++ is strange ,cuz"
5 words:
- "bar" :: 1
- "c++isstrange" :: 2
- "cuz" :: 1
- "foo" :: 2
- "qux" :: 1
Note the behaviour of the x3::space (like qi::space and qi::ascii::space above).
I have some troubles getting the error handler to work with boost spirit x3. I was looking at the documentation (https://www.boost.org/doc/libs/1_70_0/libs/spirit/doc/x3/html/spirit_x3/tutorials/error_handling.html) but I don't understand the part:
"Notice too that we subclass the employee_class from our error_handler. By doing so, we tell X3 that we want to call our error_handler whenever an exception is thrown somewhere inside the employee rule and whatever else it calls (i.e. the person and quoted_string rules)."
What has the type <ruleID>_class todo with the registration?
I started with the the functional composition described in the answer Spirit X3: parser with internal state. But I cannot register the error handler. Here is my example:
#include <iostream>
#include <iomanip>
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/position_tagged.hpp>
#include <boost/spirit/home/x3/support/utility/error_reporting.hpp>
#include <boost/spirit/home/x3/support/utility/annotate_on_success.hpp>
namespace Parser {
namespace x3 = boost::spirit::x3;
namespace ascii = boost::spirit::x3::ascii;
struct error_handler {
template<typename Iterator, typename Exception, typename Context>
x3::error_handler_result on_error(
Iterator &first, Iterator const &last, Exception const &x, Context const &context) {
auto &error_handler = x3::get<x3::error_handler_tag>(context).get();
std::string message = "Error! Expecting: " + x.which() + " here:";
error_handler(x.where(), message);
return x3::error_handler_result::fail;
}
};
struct CSVLine;
static inline auto line_parser() {
auto delim = ',' | &(x3::eoi | x3::eol);
return x3::rule<CSVLine>{"line"} = (as_parser(x3::int_) > delim > as_parser(x3::int_) > x3::eps);
}
struct CSVLine_class : error_handler, x3::annotate_on_success {};
}
void parse(std::string const &input) {
using iterator_type = std::string::const_iterator;
iterator_type iter = input.begin();
iterator_type const end = input.end();
using boost::spirit::x3::with;
using boost::spirit::x3::error_handler_tag;
using error_handler_type = boost::spirit::x3::error_handler<iterator_type>;
// Our error handler
error_handler_type error_handler(iter, end, std::cout);
// Our parser
const auto p = Parser::line_parser();
auto const parser =
// we pass our error handler to the parser so we can access
// it later in our on_error and on_sucess handlers
with<error_handler_tag>(std::ref(error_handler))
[
p
];
if (parse(iter, end, parser))
std::cout << "Parsed" << std::endl;
else
std::cout << "Failed" << std::endl;
if (iter!=end)
std::cout << "Remaining: " << std::quoted(std::string(iter,end)) << std::endl;
}
int main() {
parse("1,x2.6");
return 0;
}
Since the error handler is not recognized it yields:
terminate called after throwing an instance of 'boost::exception_detail::clone_impl<boost::exception_detail::error_info_injector<boost::spirit::x3::expectation_failure<__gnu_cxx::__normal_iterator<char const*, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > > > >'
what(): boost::spirit::x3::expectation_failure
I tried to run some simple parser that will parse [ 1, 11, 3, 6-4]. Basically, integer list with range notation.
I want to put everything into AST without semantic action. So I use x3::variant. My code 'seems' very similar to the expression example. However, it can't compile under g++ 6.2. It indeed compile ok with clang++ 6.0 but yield wrong result.
The boost version is 1.63.
It seems that I have some 'move' or initialization issue.
#include <iostream>
#include <list>
#include <vector>
#include <utility>
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/adapted/std_pair.hpp>
#include <boost/fusion/include/io.hpp>
namespace ns
{
namespace ast
{
namespace x3 = boost::spirit::x3;
// forward definition
class uintObj;
struct varVec;
// define type
using uintPair_t = std::pair<unsigned int, unsigned int>;
using uintVec_t = std::vector<uintObj>;
// general token value:
class uintObj : public x3::variant <
unsigned int,
uintPair_t
>
{
public:
using base_type::base_type;
using base_type::operator=;
};
struct varVec
{
uintVec_t valVector;
};
}
}
BOOST_FUSION_ADAPT_STRUCT(
ns::ast::varVec,
valVector
)
namespace ns
{
namespace parser
{
// namespace x3 = boost::spirit::x3;
// using namespace x3;
using namespace boost::spirit::x3;
// definition of the range pair:
rule<class uintPair, ast::uintPair_t> const uintPair = "uintPair";
auto const uintPair_def =
uint_
>> '-'
>> uint_
;
rule<class uintObj, ast::uintObj> const uintObj = "uintObj";
auto const uintObj_def =
uint_
| uintPair
;
// define rule definition : rule<ID, attrib>
// more terse definition :
// struct varVec_class;
// using varVec_rule_t = x3::rule<varVec_class, ast::varVec>;
// varVec_rule_t const varVec = "varVec";
// varVec is the rule, "varVec" is the string name of the rule.
rule<class varVec, ast::varVec> const varVec = "varVec";
auto const varVec_def =
'['
>> uintObj % ','
>> ']'
;
BOOST_SPIRIT_DEFINE(
varVec,
uintObj,
uintPair
);
}
}
int main()
{
std::string input ("[1, 11, 3, 6-4]\n");
std::string::const_iterator begin = input.begin();
std::string::const_iterator end = input.end();
ns::ast::varVec result; // ast tree
using ns::parser::varVec; // grammar
using boost::spirit::x3::ascii::space;
bool success = phrase_parse(begin, end, varVec, space, result);
if (success && begin == end)
std::cout << "good" << std::endl;
else
std::cout << "bad" << std::endl;
return 0;
}
Swap the alternative order for the uintObj_def
auto const uintObj_def =
uintPair
| uint_
;
The formulation you have now will always match on a uint_ because the uintPair begins with a valid uint_.
mjcaisse's answer calls out the main problem I think you had. There were a few missing pieces, so I decided to make a simplified version that shows parsing results:
Live On Wandbox
#include <iostream>
#include <iomanip>
//#include <boost/fusion/adapted.hpp>
//#include <boost/fusion/include/io.hpp>
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
namespace x3 = boost::spirit::x3;
namespace ns { namespace ast {
// forward definition
struct uintObj;
//struct varVec;
// define type
using uintPair_t = std::pair<unsigned int, unsigned int>;
using uintVec_t = std::vector<uintObj>;
// general token value:
struct uintObj : x3::variant<unsigned int, uintPair_t> {
using base_type::base_type;
using base_type::operator=;
friend std::ostream& operator<<(std::ostream& os, uintObj const& This) {
struct {
std::ostream& os;
void operator()(unsigned int v) const { os << v; }
void operator()(uintPair_t v) const { os << v.first << "-" << v.second; }
} vis { os };
boost::apply_visitor(vis, This);
return os;
}
};
using varVec = uintVec_t;
} }
namespace ns { namespace parser {
using namespace boost::spirit::x3;
template <typename T> auto as = [](auto p) { return rule<struct _, T> {} = p; };
auto const uintPair = as<ast::uintPair_t> ( uint_ >> '-' >> uint_ );
auto const uintObj = as<ast::uintObj> ( uintPair | uint_ );
auto const varVec = as<ast::varVec> ( '[' >> uintObj % ',' >> ']' );
} }
int main() {
using namespace ns;
std::string const input("[1, 11, 3, 6-4]\n");
auto begin = input.begin(), end = input.end();
ast::varVec result; // ast tree
bool success = phrase_parse(begin, end, parser::varVec, x3::ascii::space, result);
if (success) {
std::cout << "good\n";
for (auto& r : result)
std::cout << r << "\n";
}
else
std::cout << "bad\n";
if (begin != end)
std::cout << "Remaining unparsed: " << std::quoted(std::string(begin, end)) << std::endl;
}
Prints
good
1
11
3
6-4
In Boost Spirit QI it was easy to template the parser so that it could be instantiated for various attribute types. It is unclear to me how to do this with X3. Consider this stripped down version of the roman numerals parser example:
#include <iostream>
#include <iterator>
#include <string>
#include <boost/spirit/home/x3.hpp>
namespace parser {
namespace x3 = boost::spirit::x3;
struct numbers_ : x3::symbols<unsigned> {
numbers_() {
add
("I" , 1)
("II" , 2)
("III" , 3)
("IV" , 4)
("V" , 5)
("VI" , 6)
("VII" , 7)
("VIII" , 8)
("IX" , 9)
;
}
} numbers;
x3::rule<class roman, unsigned> const roman = "roman";
auto init = [](auto &x) { x3::_val(x) = 0; };
auto add = [](auto &x) { x3::_val(x) += x3::_attr(x); };
auto const roman_def = x3::eps [init] >> numbers [add];
BOOST_SPIRIT_DEFINE(roman);
}
int main()
{
std::string input = "V";
auto iter = input.begin();
auto end = input.end();
unsigned result;
bool r = parse(iter, end, parser::roman, result);
if (r && iter == end) {
std::cout << "Success :) Result = " << result << '\n';
} else {
std::cout << "Failed :(\n";
}
}
I'd like to template the parser on the attribute type which is currently hardcoded as unsigned. My first guess was to replace
namespace parser {
// ...
}
with
template < typename int_t >
struct parser {
// ...
};
which is obviously too naïve. How to do this correctly?
In X3 there's not so much pain in combining parsers dynamically. So I'd write your sample as:
template <typename Attribute>
auto make_roman() {
using namespace boost::spirit::x3;
struct numbers_ : symbols<Attribute> {
numbers_() { this-> add
("I", Attribute{1}) ("II", Attribute{2}) ("III", Attribute{3}) ("IV", Attribute{4})
("V", Attribute{5}) ("VI", Attribute{6}) ("VII", Attribute{7}) ("VIII", Attribute{8})
("IX", Attribute{9}) ;
}
} numbers;
return rule<class roman, Attribute> {"roman"} =
eps [([](auto &x) { _val(x) = 0; })]
>> numbers [([](auto &x) { _val(x) += _attr(x); })];
}
See it Live On Coliru
When I remove x3::eps in the below rule, the string result from the first partial match is still in the second match, resulting in a string with duplicated content.
If I add another case in between I still only get 1 duplicate instead of two.
Why do I need to use x3::eps, or, what am I misunderstanding in the evaluation of rules and synthesized attributes?
Should I use lookahead instead?
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/include/qi_char_class.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <iostream>
#include <string>
#include <vector>
namespace x3 = boost::spirit::x3;
namespace ascii = x3::ascii;
struct AstChannel {
std::string label;
bool complement;
};
x3::rule<class AstLabel, std::string> const astLabel = "astLabel";
auto const astLabel_def = ascii::lower >> *(ascii::alnum);
BOOST_SPIRIT_DEFINE(astLabel)
x3::rule<class AstChannel, AstChannel> const astChannel = "astChannel";
auto const astChannel_def = astLabel >> '!' >> x3::attr(true)
| astLabel >> x3::eps >> x3::attr(false) ;
BOOST_SPIRIT_DEFINE(astChannel)
BOOST_FUSION_ADAPT_STRUCT(
AstChannel,
(std::string, label)
(bool, complement)
)
int main() {
std::string str("hello");
auto iter = str.begin();
auto end = str.end();
AstChannel channel;
bool r = phrase_parse(iter, end, astChannel, ascii::space, channel);
if (r) {
std::cout << channel.label << ',' << channel.complement << std::endl;
}
return 0;
}
The real answer is: force atomic attribute propagation of container attributes (e.g. with x3::hold or semantic actions).
The better answer is: use x3::matches:
Live On Coliru
#include <boost/spirit/home/x3.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <iostream>
using namespace std::string_literals;
struct AstChannel {
std::string label;
bool complement;
};
BOOST_FUSION_ADAPT_STRUCT(AstChannel, label, complement)
namespace Grammar {
namespace x3 = boost::spirit::x3;
namespace ascii = x3::ascii;
auto const label = x3::rule<struct labelRule, std::string> {"label" }
= x3::lexeme[ascii::lower >> *(ascii::alnum)];
auto const channel = label >> x3::matches['!'];
auto const entry = x3::skip(ascii::space) [ channel ];
}
int main() {
auto const str = "hello"s;
AstChannel channel;
if (parse(str.begin(), str.end(), Grammar::entry, channel)) {
std::cout << channel.label << ',' << std::boolalpha << channel.complement << "\n";
}
}
Prints
hello,false