Here json
{"ts":1827908701,"updates":[[4,30623409,17,81282347579,1425823449632," ... ","tfs"],[80,1,0],[7,81282347579,30652308]]}
How can I get the value 4 of updates using the library boost?
I know how to take such a value of "ts". But I do not understand how to take the value of the two brackets
Technically, this answer shows how to use Boost to extract that value:
Live On Coliru
#include <boost/fusion/adapted/std_pair.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <map>
namespace qi = boost::spirit::qi;
std::string const sample = R"({"ts":1827908701,"updates":[[4,30623409,17,81282347579,1425823449632," ... ","tfs"],[80,1,0],[7,81282347579,30652308]]})";
namespace qd_json { // quick and dirty JSON handling
struct null {};
using text = std::string;
using value = boost::make_recursive_variant<
null,
text, // "string" (roughly!)
double, // number
std::map<text, boost::recursive_variant_>, // object
std::vector<boost::recursive_variant_>, // array
bool
>::type;
using member = std::pair<text, value>;
using object = std::map<text, value>;
using array = std::vector<value>;
template <typename It, typename Skipper = qi::space_type>
struct grammar : qi::grammar<It, value(), Skipper>
{
grammar() : grammar::base_type(value_) {
using namespace qi;
text_ = '"' >> raw [*('\\' >> char_ | ~char_('"'))] >> '"';
null_ = "null" >> attr(null{});
bool_ = "true" >> attr(true) | "false" >> attr(false);
value_ = null_ | bool_ | text_ | double_ | object_ | array_;
member_ = text_ >> ':' >> value_;
object_ = '{' >> -(member_ % ',') >> '}';
array_ = '[' >> -(value_ % ',') >> ']';
////////////////////////////////////////
// Bonus: properly decoding the string:
text_ = lexeme [ '"' >> *ch_ >> '"' ];
ch_ = +(
~char_("\"\\")) [ _val += _1 ] |
qi::lit("\x5C") >> ( // \ (reverse solidus)
qi::lit("\x22") [ _val += '"' ] | // " quotation mark U+0022
qi::lit("\x5C") [ _val += '\\' ] | // \ reverse solidus U+005C
qi::lit("\x2F") [ _val += '/' ] | // / solidus U+002F
qi::lit("\x62") [ _val += '\b' ] | // b backspace U+0008
qi::lit("\x66") [ _val += '\f' ] | // f form feed U+000C
qi::lit("\x6E") [ _val += '\n' ] | // n line feed U+000A
qi::lit("\x72") [ _val += '\r' ] | // r carriage return U+000D
qi::lit("\x74") [ _val += '\t' ] | // t tab U+0009
qi::lit("\x75") // uXXXX U+XXXX
>> _4HEXDIG [ append_utf8(qi::_val, qi::_1) ]
);
BOOST_SPIRIT_DEBUG_NODES((text_)(value_)(member_)(object_)(array_)(null_)(bool_))
}
private:
qi::rule<It, text()> text_, ch_;
qi::rule<It, null()> null_;
qi::rule<It, bool()> bool_;
qi::rule<It, value(), Skipper> value_;
qi::rule<It, member(), Skipper> member_;
qi::rule<It, object(), Skipper> object_;
qi::rule<It, array(), Skipper> array_;
struct append_utf8_f {
template <typename...> struct result { typedef void type; };
template <typename String, typename Codepoint>
void operator()(String& to, Codepoint codepoint) const {
auto out = std::back_inserter(to);
boost::utf8_output_iterator<decltype(out)> convert(out);
*convert++ = codepoint;
}
};
boost::phoenix::function<append_utf8_f> append_utf8;
qi::uint_parser<uint32_t, 16, 4, 4> _4HEXDIG;
};
template <typename Range, typename It = typename boost::range_iterator<Range const>::type>
value parse(Range const& input) {
grammar<It> g;
It first(boost::begin(input)), last(boost::end(input));
value parsed;
bool ok = qi::phrase_parse(first, last, g, qi::space, parsed);
if (ok && (first == last))
return parsed;
throw std::runtime_error("Remaining unparsed: '" + std::string(first, last) + "'");
}
namespace accessors {
static double dbl_(qd_json::value const&v) { return boost::get<double>(v); }
static int int_(qd_json::value const&v) { return boost::get<double>(v); }
static std::string txt_(qd_json::value const&v) { return boost::get<qd_json::text>(v); }
static qd_json::array arr_(qd_json::value const&v) { return boost::get<qd_json::array>(v); }
static qd_json::object obj_(qd_json::value const&v) { return boost::get<qd_json::object>(v); }
}
}
using It = std::string::const_iterator;
int main()
{
using namespace qd_json::accessors;
auto root = obj_(qd_json::parse(sample));
for(auto& updates : arr_(root["updates"]))
for(auto& first : arr_(updates))
{
std::cout << int_(first) << "\n";
return 0;
}
}
Prints:
4
The parser was of course courtesy older answers (Getting values from a json file using boost/property_tree, with multiple elements/arrays/sub-arrays C++) and if you need a more specific answer, I suggest you show the code that you had already got.
Related
The facts:
I am using VS2013 and developing in C++.
I am using boost API to get the values from a standard/legit json file.
I can't extract name4.js, name5.js and name6.js names.
I have searched all over stackoverflow/Google and didn't find any explanation for the below json file I am working with.
json file:
{
"background": {
"scripts": [ "name1.js", "name2.js", "name3.js" ]
},
"default_popup": "popup.html",
"default_title": "__MSG_name__",
"content_scripts": [ {
"all_frames": true,
"js": [ "name4.js", "name5.js", "name6.js" ],
"match_about_blank": true,
"matches": [ "http://*/*", "https://*/*" ],
"run_at": "document_start"
}, {
"all_frames": true,
"js": [ "include.postload.js" ],
"match_about_blank": true,
"matches": [ "http://*/*", "https://*/*" ],
"run_at": "document_end"
} ]
}
What works:
As you can see in the code below, I was able to extract "name1.js", "name2.js" and "name3.js" using "background.scripts" (the example at boost website):
boost::property_tree::ptree doc;
boost::property_tree::read_json("C:/Temp\\manifest.json", doc);
std::vector<string> jsFiles;
try{
BOOST_FOREACH(boost::property_tree::ptree::value_type& framePair, doc.get_child("background.scripts")){
jsFiles.push_back(framePair.second.data());
}
}
catch (boost::exception const &ex){}
What doesn't work:
I want to extract the rest of the js names which are:
name4.js
name5.js
name6.js
I couldn't get it to work using the below code:
BOOST_FOREACH(boost::property_tree::ptree::value_type& framePair2, doc.get_child("content_scripts")){
jsFiles.push_back(framePair2.second.data());
What I get is "" in the vector string.
I even tried jsFiles.push_back(framePair2.second.get<std::string>("js")); but it still doesn't work.
I have also tried other methods with no success.
I'd appreciate if I could get a working code because I am out of ideas.
get<std::string>("js") can't work because "js" has an array value.
for(auto& e : pt.get_child("content_scripts"))
for(auto& r : e.second.get_child("js"))
std::cout << r.second.get_value<std::string>() << "\n";
should do
Live On Coliru
#include <sstream>
#include <boost/property_tree/ptree.hpp>
#include <boost/property_tree/json_parser.hpp>
#include <iostream>
std::string const sample = R"(
{
"background": {
"scripts": [ "name1.js", "name2.js", "name3.js" ]
},
"default_popup": "popup.html",
"default_title": "__MSG_name__",
"content_scripts": [ {
"all_frames": true,
"js": [ "name4.js", "name5.js", "name6.js" ],
"match_about_blank": true,
"matches": [ "http://*/*", "https://*/*" ],
"run_at": "document_start"
}, {
"all_frames": true,
"js": [ "include.postload.js" ],
"match_about_blank": true,
"matches": [ "http://*/*", "https://*/*" ],
"run_at": "document_end"
} ]
})";
using boost::property_tree::ptree;
namespace j = boost::property_tree::json_parser;
int main() {
std::istringstream iss(sample);
ptree pt;
j::read_json(iss, pt);
for(auto& e : pt.get_child("content_scripts"))
for(auto& r : e.second.get_child("js"))
std::cout << r.second.get_value<std::string>() << "\n";
}
Prints
name4.js
name5.js
name6.js
include.postload.js
In case you like to have an alternative method, you could use the ad-hoc parser I posted in an earlier answer (Reading JSON file with C++ and BOOST):
Live On Coliru
#include <boost/fusion/adapted/std_pair.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <map>
namespace qi = boost::spirit::qi;
std::string const sample = R"(
{
"background": {
"scripts": [ "name1.js", "name2.js", "name3.js" ]
},
"default_popup": "popup.html",
"default_title": "__MSG_name__",
"content_scripts": [ {
"all_frames": true,
"js": [ "name4.js", "name5.js", "name6.js" ],
"match_about_blank": true,
"matches": [ "http://*/*", "https://*/*" ],
"run_at": "document_start"
}, {
"all_frames": true,
"js": [ "include.postload.js" ],
"match_about_blank": true,
"matches": [ "http://*/*", "https://*/*" ],
"run_at": "document_end"
} ]
})";
namespace qd_json { // quick and dirty JSON handling
struct null {};
using text = std::string;
using value = boost::make_recursive_variant<
null,
text, // "string" (roughly!)
double, // number
std::map<text, boost::recursive_variant_>, // object
std::vector<boost::recursive_variant_>, // array
bool
>::type;
using member = std::pair<text, value>;
using object = std::map<text, value>;
using array = std::vector<value>;
template <typename It, typename Skipper = qi::space_type>
struct grammar : qi::grammar<It, value(), Skipper>
{
grammar() : grammar::base_type(value_) {
using namespace qi;
text_ = '"' >> raw [*('\\' >> char_ | ~char_('"'))] >> '"';
null_ = "null" >> attr(null{});
bool_ = "true" >> attr(true) | "false" >> attr(false);
value_ = null_ | bool_ | text_ | double_ | object_ | array_;
member_ = text_ >> ':' >> value_;
object_ = '{' >> -(member_ % ',') >> '}';
array_ = '[' >> -(value_ % ',') >> ']';
////////////////////////////////////////
// Bonus: properly decoding the string:
text_ = lexeme [ '"' >> *ch_ >> '"' ];
ch_ = +(
~char_("\"\\")) [ _val += _1 ] |
qi::lit("\x5C") >> ( // \ (reverse solidus)
qi::lit("\x22") [ _val += '"' ] | // " quotation mark U+0022
qi::lit("\x5C") [ _val += '\\' ] | // \ reverse solidus U+005C
qi::lit("\x2F") [ _val += '/' ] | // / solidus U+002F
qi::lit("\x62") [ _val += '\b' ] | // b backspace U+0008
qi::lit("\x66") [ _val += '\f' ] | // f form feed U+000C
qi::lit("\x6E") [ _val += '\n' ] | // n line feed U+000A
qi::lit("\x72") [ _val += '\r' ] | // r carriage return U+000D
qi::lit("\x74") [ _val += '\t' ] | // t tab U+0009
qi::lit("\x75") // uXXXX U+XXXX
>> _4HEXDIG [ append_utf8(qi::_val, qi::_1) ]
);
BOOST_SPIRIT_DEBUG_NODES((text_)(value_)(member_)(object_)(array_)(null_)(bool_))
}
private:
qi::rule<It, text()> text_, ch_;
qi::rule<It, null()> null_;
qi::rule<It, bool()> bool_;
qi::rule<It, value(), Skipper> value_;
qi::rule<It, member(), Skipper> member_;
qi::rule<It, object(), Skipper> object_;
qi::rule<It, array(), Skipper> array_;
struct append_utf8_f {
template <typename...> struct result { typedef void type; };
template <typename String, typename Codepoint>
void operator()(String& to, Codepoint codepoint) const {
auto out = std::back_inserter(to);
boost::utf8_output_iterator<decltype(out)> convert(out);
*convert++ = codepoint;
}
};
boost::phoenix::function<append_utf8_f> append_utf8;
qi::uint_parser<uint32_t, 16, 4, 4> _4HEXDIG;
};
template <typename Range, typename It = typename boost::range_iterator<Range const>::type>
value parse(Range const& input) {
grammar<It> g;
It first(boost::begin(input)), last(boost::end(input));
value parsed;
bool ok = qi::phrase_parse(first, last, g, qi::space, parsed);
if (ok && (first == last))
return parsed;
throw std::runtime_error("Remaining unparsed: '" + std::string(first, last) + "'");
}
namespace accessors {
static double dbl_(qd_json::value const&v) { return boost::get<double>(v); };
static int int_(qd_json::value const&v) { return boost::get<double>(v); };
static std::string txt_(qd_json::value const&v) { return boost::get<qd_json::text>(v); };
static qd_json::array arr_(qd_json::value const&v) { return boost::get<qd_json::array>(v); };
static qd_json::object obj_(qd_json::value const&v) { return boost::get<qd_json::object>(v); };
}
}
using It = std::string::const_iterator;
int main()
{
using namespace qd_json::accessors;
auto root = obj_(qd_json::parse(sample));
for(auto& o : arr_(root["content_scripts"]))
for(auto& f : arr_(obj_(o)["js"]))
std::cout << txt_(f) << "\n";
}
Prints
name4.js
name5.js
name6.js
include.postload.js
I need to parse following EBNF expression with Boost::Spirit.
period ::= date_part [time_part] , date_part [time_part]
time_part ::= hours:minutes[:seconds]
date_part ::= day.month.year
For example, 10.06.2014 10:00:15, 11.07.2014. I made my grammar in two ways, but can't exactly get working example.
1) First attempt
struct Parser: grammar<std::string::const_iterator, space_type>
{
Parser(): Parser::base_type(datetime_)
{
using boost::spirit::int_;
using boost::spirit::qi::_1;
using boost::spirit::qi::_2;
using boost::spirit::qi::_val;
datetime_ =
(date_ >> time_)
[
_val =
phoenix::construct<ptime>
(
date(_1[2]), _1[1], _1[0]),
hours(_2[0]) + minutes(_2[1]) + seconds[_2[0]]
)
|
_val =
phoenix::construct<ptime>
(
date(_1[2]), _1[1], _1[0]),
seconds(0)
)
];
date_ %= int_ % '.';
time_ %= int_ % ':';
BOOST_SPIRIT_DEBUG_NODE(datetime_);
BOOST_SPIRIT_DEBUG_NODE(date_);
BOOST_SPIRIT_DEBUG_NODE(time_);
}
rule<std::string::const_iterator, std::vector<int>(), space_type> date_, time_;
rule<std::string::const_iterator, ptime(), space_type> datetime_;
}
Parser parser;
std::string strTest("10.06.2014 10:00:15, 11.07.2014");
std::string::const_iterator it_begin(strTest.begin());
std::string::const_iterator it_end(strTest.end());
bool result = phrase_parse(it_begin, it_end, parser, space);
Errors:
/media/Data/Projects/Qt/Planner/parser.h:108: ошибка: no matching function for call to 'boost::gregorian::date::date(boost::phoenix::detail::make_index_composite<boost::phoenix::actor<boost::spirit::argument<0> >, int>::type)'
And so on. I can't cast boost::spirit::argument<0> to int or date::years_type. I tryed date((int)_1[2]), (int)_1[1], (int)_1[0])) and dynamic_cast<int>(_1[2]), but with no success (.
2) Second attempt
struct Parser: grammar<std::string::const_itearator, space_type>
{
Parser(ConditionTree& a_lTree):
Parser::base_type(time_period_),
m_lTree(a_lTree)
{
using boost::spirit::int_;
using boost::spirit::qi::_1;
using boost::spirit::qi::_2;
using boost::spirit::qi::_3;
using boost::spirit::qi::_4;
using boost::spirit::qi::_5;
using boost::spirit::qi::_val;
time_period_ = ( datetime_ > ',' > datetime_ ) [ _val = phoenix::construct<time_period>((int)_1, (int)_3) ];
datetime_ = (date_ >> time_duration_) [ _val = phoenix::construct<ptime>((int)_1, (int)_2) | _val = phoenix::construct<ptime>((int)_1, seconds(0)) ] ;
date_ = (int_ > '.' > int_ > '.' > int_) [ _val = phoenix::construct<date>((int)_5, (int)_3, (int)_1) ];
time_duration_ = (int_ > ':' > int_ > ':' > int_) [ _val = phoenix::construct<time_duration>((int)_1, (int)_3, (int)_5, 0)];
BOOST_SPIRIT_DEBUG_NODE(time_period_);
BOOST_SPIRIT_DEBUG_NODE(datetime_);
BOOST_SPIRIT_DEBUG_NODE(date_);
BOOST_SPIRIT_DEBUG_NODE(time_duration_);
}
rule<std::string::const_itarator, time_period(), space_type> time_period_;
rule<std::string::const_itarator, ptime(), space_type> datetime_;
rule<std::string::const_itarator, date(), space_type> date_;
rule<std::string::const_itarator, time_duration(), space_type> time_duration_;
ConditionTree& m_lTree;
};
Error:
/media/Data/Projects/Qt/Planner/parser.h:114: ошибка: invalid cast from type 'const _1_type {aka const boost::phoenix::actor<boost::spirit::argument<0> >}' to type 'int'...
Why I can't cast boost::spirit::argument<0> to int????
Better question, why would you be able to cast a placeholder type to a specific primitive type?
The place holder is a lazy actor only, so you should use Phoenix cast_ to cast it, if at all (hint: this should not be necessary): Live On Coliru
Output
<period_>
<try>10.06.2014 10:00:15,</try>
<date_>
<try>10.06.2014 10:00:15,</try>
<success> 10:00:15, 11.07.201</success>
<attributes>[[10, 6, 2014]]</attributes>
</date_>
<time_>
<try> 10:00:15, 11.07.201</try>
<success>, 11.07.2014</success>
<attributes>[[10, 0, 15]]</attributes>
</time_>
<date_>
<try> 11.07.2014</try>
<success></success>
<attributes>[[11, 7, 2014]]</attributes>
</date_>
<time_>
<try></try>
<fail/>
</time_>
<success></success>
<attributes>[[[[10, 6, 2014], [10, 0, 15]], [[11, 7, 2014], [empty]]]]</attributes>
</period_>
Parse success
Full Sample
#define BOOST_SPIRIT_DEBUG
#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;
namespace Ast {
using boost::optional;
struct date { unsigned day, month, year; };
struct time { unsigned hours, minutes, seconds; };
struct date_time { date date_part; optional<time> time_part; };
struct period { date_time start, end; };
}
BOOST_FUSION_ADAPT_STRUCT(Ast::date, (unsigned,day)(unsigned,month)(unsigned,year))
BOOST_FUSION_ADAPT_STRUCT(Ast::time, (unsigned,hours)(unsigned,minutes)(unsigned,seconds))
BOOST_FUSION_ADAPT_STRUCT(Ast::date_time, (Ast::date,date_part)(Ast::optional<Ast::time>, time_part))
BOOST_FUSION_ADAPT_STRUCT(Ast::period, (Ast::date_time,start)(Ast::date_time,end))
template <typename Iterator>
struct Parser : qi::grammar<Iterator, Ast::period(), qi::space_type>
{
int test;
Parser() : Parser::base_type(period_)
{
using namespace qi;
static const int_parser<unsigned, 10, 2, 2> _2digit = {};
static const int_parser<unsigned, 10, 4, 4> _4digit = {};
time_ = _2digit >> ":" >> _2digit >> ":" >> _2digit;
date_ = _2digit >> "." >> _2digit >> "." >> _4digit;
date_time_ = date_ >> -time_;
period_ = date_time_ >> "," >> date_time_;
BOOST_SPIRIT_DEBUG_NODES((period_)(time_)(date_))
}
private:
qi::rule<Iterator, Ast::period(), qi::space_type> period_;
qi::rule<Iterator, Ast::date(), qi::space_type> date_;
qi::rule<Iterator, Ast::time(), qi::space_type> time_;
qi::rule<Iterator, Ast::date_time(), qi::space_type> date_time_;
};
int main()
{
using It = std::string::const_iterator;
Parser<It> parser;
std::string input("10.06.2014 10:00:15, 11.07.2014");
It f(input.begin()), l(input.end());
Ast::period parsed;
bool ok = qi::phrase_parse(f, l, parser, qi::space, parsed);
if (ok)
{
std::cout << "Parse success\n";
}
else
{
std::cout << "Parse failed\n";
}
if (f!=l)
std::cout << "Remaining unparsed input: '" << std::string(f,l) << "'\n";
}
I am working on c++ string literal parser with boost spirit.
This is what I have so far:
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/home/support/iterators/line_pos_iterator.hpp>
#include <boost/spirit/repository/include/qi_confix.hpp>
#include <boost/spirit/include/phoenix_fusion.hpp>
#include <boost/spirit/include/phoenix_stl.hpp>
using namespace boost::spirit;
#include <boost/fusion/include/adapt_struct.hpp>
////////////////////////////////
// extra facilities
struct get_line_f
{
template <typename> struct result { typedef size_t type; };
template <typename It> size_t operator()(It const& pos_iter) const
{
return get_line(pos_iter);
}
};
namespace boost { namespace spirit { namespace traits
{
template <>
struct transform_attribute<uint16_t, std::string, qi::domain>
{
typedef std::string& type;
static std::string pre(uint16_t& d) { return "pre16"; }
static void post(uint16_t& val, std::string& attr) { attr = "unicode16"; }
static void fail(uint16_t&) {}
};
}}}
namespace boost { namespace spirit { namespace traits
{
template <>
struct transform_attribute<uint32_t, std::string, qi::domain>
{
typedef std::string& type;
static std::string pre(uint32_t& d) { return "pre32"; }
static void post(uint32_t& val, std::string& attr) { attr = "unicode32"; }
static void fail(uint32_t&) {}
};
}}}
//
////////////////////////////////
struct RangePosition
{
RangePosition()
: beginLine(-1)
, endLine(-1)
{
}
size_t beginLine;
size_t endLine;
};
struct String : public RangePosition
{
String()
: RangePosition()
, value()
, source()
{
}
std::string value;
std::string source;
};
BOOST_FUSION_ADAPT_STRUCT(String,
(std::string, value)
(std::string, source)
(size_t, beginLine)
(size_t, endLine)
)
template <typename Iterator>
struct source_string : qi::grammar<Iterator, String(), qi::space_type>
{
struct escape_symbols : qi::symbols<char, char>
{
escape_symbols()
{
add
("\\\'" , '\'')
("\\\"" , '\"')
("\\\?" , '\?')
("\\\\" , '\\')
("\\0" , '\0')
("\\a" , '\a')
("\\b" , '\b')
("\\f" , '\f')
("\\n" , '\n')
("\\r" , '\r')
("\\t" , '\t')
("\\v" , '\v')
;
}
} escape_symbol;
source_string() : source_string::base_type(start)
{
using qi::raw;
using qi::_val;
using qi::_1;
using qi::space;
using qi::omit;
using qi::no_case;
using qi::attr_cast;
using qi::print;
namespace phx = boost::phoenix;
using phx::at_c;
using phx::begin;
using phx::end;
using phx::construct;
using phx::ref;
escape %= escape_symbol;
character %= (no_case["\\x"] >> hex12)
| ("\\" >> oct123)
| escape
| (print - (lit('"') | '\\'));
unicode %= ("\\u" >> attr_cast(hex4))
| ("\\U" >> attr_cast(hex8));
string_section %= '"' >> *(unicode | character) >> '"';
string %= string_section % omit[*space];
start = raw[
string[at_c<0>(_val) = _1]
]
[
at_c<1>(_val) = construct<std::string>(begin(_1), end(_1)),
at_c<2>(_val) = get_line_(begin(_1)),
at_c<3>(_val) = get_line_(end(_1))
]
;
}
boost::phoenix::function<get_line_f> get_line_;
qi::rule<Iterator, String(), qi::space_type> start;
qi::rule<Iterator, std::string()> escape;
qi::uint_parser<char, 16, 1, 2> hex12;
qi::uint_parser<uint16_t, 16, 4, 4> hex4;
qi::uint_parser<uint32_t, 16, 8, 8> hex8;
qi::uint_parser<char, 8, 1, 3> oct123;
qi::rule<Iterator, std::string()> character;
qi::rule<Iterator, std::string()> unicode;
qi::rule<Iterator, std::string()> string_section;
qi::rule<Iterator, std::string()> string;
};
and my testing code is
std::string str[] =
{
"\"\\u1234\\U12345678\"",
"\"te\"\"st\"",
"\"te\" \"st\"",
"\"te\" \n \"st\"",
"\"\"",
"\"\\\"\"",
"\"test\"",
"\"test\" something",
"\"\\\'\\\"\\\?\\\\\\a\\b\\f\\n\\r\\t\\v\"",
"\"\\x61cd\\X3012\\x7z\"",
"\"\\141cd\\06012\\78\\778\"",
"\"te",
"\"te\nst\"",
"\"test\\\"",
"\"te\\st\"",
//
};
typedef line_pos_iterator<std::string::const_iterator> Iterator;
std::ostringstream result;
for (size_t i = 0; i < sizeof(str) / sizeof(str[0]); ++i)
{
source_string<Iterator> g;
Iterator iter(str[i].begin());
Iterator end(str[i].end());
String string;
bool r = phrase_parse(iter, end, g, qi::space, string);
if (r)
result << string.beginLine << "-" << string.endLine << ": " << string.value << " === " << string.source << "\n";
else
result << "Parsing failed\n";
}
Can somebody help me why in this rule:
unicode %= ("\\u" >> attr_cast(hex4))
| ("\\U" >> attr_cast(hex8));
attr_cast does not invoke transform_attribute that I have defined?
namespace boost { namespace spirit { namespace traits
{
template <>
struct transform_attribute<uint16_t, std::string, qi::domain>
{
typedef std::string& type;
static std::string pre(uint16_t& d) { return "pre16"; }
static void post(uint16_t& val, std::string& attr) { attr = "unicode16"; }
static void fail(uint16_t&) {}
};
}}}
namespace boost { namespace spirit { namespace traits
{
template <>
struct transform_attribute<uint32_t, std::string, qi::domain>
{
typedef std::string& type;
static std::string pre(uint32_t& d) { return "pre32"; }
static void post(uint32_t& val, std::string& attr) { attr = "unicode32"; }
static void fail(uint32_t&) {}
};
}}}
Making builtin primitives types behave "strangely" seems like a VeryBadIdea™.
Assuming you just wish to decode I suggest a simpler approach using semantic actions, e.g.
https://github.com/sehe/spirit-v2-json/blob/master/JSON.cpp#L102
char_ = +(
~encoding::char_(L"\"\\")) [ qi::_val += qi::_1 ] |
qi::lit(L"\x5C") >> ( // \ (reverse solidus)
qi::lit(L"\x22") [ qi::_val += L'"' ] | // " quotation mark U+0022
qi::lit(L"\x5C") [ qi::_val += L'\\' ] | // \ reverse solidus U+005C
qi::lit(L"\x2F") [ qi::_val += L'/' ] | // / solidus U+002F
qi::lit(L"\x62") [ qi::_val += L'\b' ] | // b backspace U+0008
qi::lit(L"\x66") [ qi::_val += L'\f' ] | // f form feed U+000C
qi::lit(L"\x6E") [ qi::_val += L'\n' ] | // n line feed U+000A
qi::lit(L"\x72") [ qi::_val += L'\r' ] | // r carriage return U+000D
qi::lit(L"\x74") [ qi::_val += L'\t' ] | // t tab U+0009
qi::lit(L"\x75") // uXXXX U+XXXX
>> _4HEXDIG [ qi::_val += qi::_1 ]
This appears easily adapted to your use case.
Now if you insist, firstly wrap the types (so you don't "redefine" essential types for Spirit) and secondly, customize the container insertion traits, since std::string (or rather std::vector<char>?) is a container type.
I wouldn't recommend this though. I like to keep things "simple" and the logic in one place. Obviously this is a "funny" thing to say when using a parser generator like Spirit, because so much appears to go on "magically" behind the scenes. However, that is the nature of abstraction. I don't think I'd want to "abstract" decoding unicode escapes here: they feel as they belong in the problem domain, not the tooling.
I use boost spirit to parse a color. That worked quite well,
but after I changed the the iterator type, the skipper stopped working.
"rgb(1.0,1.0,0.5)" // this works
" rgb(0.2,0.2,0.2)" // this fails
Here is the header:
struct ColorGrammar : public qi::grammar<StringIterator, Color(), chs::space_type>
{
//! Iterator type for this grammar
typedef StringIterator ItType;
//! Skipper type used in this grammar
typedef chs::space_type Skipper;
//! Rule to parse a number with up to 3 digits
qi::uint_parser<uint8, 10, 1, 3> number;
//! Rule to parse a hex digit
qi::uint_parser<uint8, 16, 1, 1> hexdigit;
ColorGrammar();
//! Rule for rgb(...)
qi::rule<ItType, Color(), qi::locals<float, float>, Skipper> rule_rgb;
//! Rule for rgba(...)
qi::rule<ItType, Color(), qi::locals<float, float, float>, Skipper> rule_rgba;
//! Mainrule
qi::rule<ItType, Color(), Skipper> rule_color;
};
Here is the cpp
ColorGrammar::ColorGrammar()
: ColorGrammar::base_type(rule_color, "color-grammar")
{
using namespace qi::labels;
using boost::phoenix::construct;
auto& _1 = qi::_1;
rule_rgb = '(' >> qi::float_[_a = _1] >> ',' >> qi::float_[_b = _1] >> ',' >> qi::float_[_val = phx::construct<Color>(_a, _b, _1)] >> ')';
rule_rgba = '(' >> qi::float_[_a = _1] >> ',' >> qi::float_[_b = _1] >> ',' >> qi::float_[_c = _1] >> ',' >> qi::float_[_val = phx::construct<Color>(_a, _b, _c, _1)] >> ')';
rule_color = (qi::lit("rgb") >> rule_rgb)
| (qi::lit("rgba") >> rule_rgba);
}
And the call:
Color out;
StringIterator begin = str.cbegin();
StringIterator end = str.cend();
bool result = qi::phrase_parse(begin, end, color_, chs::space, out);
I'm sure, it is only a little misstake, but I am not able to see it.
Maybe i watched too long at the source... can you see a misstake?
I can't see what's wrong: I've taken the effort to reconstruct your SSCCE.
http://liveworkspace.org/code/1pDtmn$1
In the process, it seems I must have removed the problem. I suggest you do the same.
Oh, and this is how I'd write this:
no more phoenix
no more constructors
no more qi::locals
no more needless copying
using expectation points
In short: no more fuss.
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/adapted/struct.hpp>
#include <cstdint>
namespace qi = boost::spirit::qi;
namespace chs = boost::spirit::ascii; //qi;
typedef std::string::const_iterator StringIterator;
struct Color
{
float r,g,b,a;
};
BOOST_FUSION_ADAPT_STRUCT(Color, (float, r)(float, g)(float, b)(float, a))
template <typename ItType, typename Skipper>
struct ColorGrammar : public qi::grammar<StringIterator, Color(), Skipper>
{
ColorGrammar()
: ColorGrammar::base_type(rule_color, "color-grammar")
{
using namespace qi;
rule_rgb = lit("rgb") >> '(' > float_ > ',' > float_ > ',' > float_ > attr(1.0f) > ')';
rule_rgba = lit("rgba") >> '(' > float_ > ',' > float_ > ',' > float_ > ',' > float_ > ')';
rule_color = rule_rgb | rule_rgba;
}
private:
qi::uint_parser<uint8_t, 10, 1, 3> number; // unused
qi::uint_parser<uint8_t, 16, 1, 1> hexdigit; // unused
qi::rule<ItType, Color(), Skipper> rule_rgb, rule_rgba, rule_color;
};
int main()
{
Color out;
std::string str = " rgb ( 0.3 , .4 , 0.5 )";
StringIterator begin = str.cbegin();
StringIterator end = str.cend();
ColorGrammar<StringIterator, chs::space_type> color_;
bool result = qi::phrase_parse(begin, end, color_, chs::space, out);
std::cout << std::boolalpha << result << '\n';
std::cout << "remains: '" << std::string(begin, end) << "'\n";
}
Live on http://liveworkspace.org/code/35htD$3
I have a map of string-rule pairs and I would like to create a "joint rule"(rule_t joint_rule;) of them somehow. If I do this this way:
joint_rule = convert_logformat["%h"] >> convert_logformat["%t"];
than the joint rule with the parse_phrase matches the string
std::string entry = "127.0.0.1 [16/Aug/2012:01:50:02 +0000]";
But if I create the joint rule this way:
for (it = convert_logformat.begin(); it != convert_logformat.end(); it++)
{
joint_rule = joint_rule.copy() >> (*it).second.copy();
}
It does not match the same string. Why? How could I achieve something similar to the latter?
Relevant code:
template <typename Iterator>
bool parse_logentry(Iterator first, Iterator last, std::vector<char>& ip, std::vector<char>& timestamp, std::vector<char>& req, unsigned int& status, unsigned int& transferred_bytes, std::vector<char>& referer, std::vector<char>& ua)
{
using boost::spirit::qi::char_;
using boost::spirit::qi::int_;
using boost::spirit::qi::uint_;
using boost::spirit::qi::phrase_parse;
using boost::spirit::ascii::space;
using boost::spirit::ascii::space_type;
using boost::phoenix::ref;
using boost::phoenix::push_back;
using boost::spirit::qi::_1;
using boost::spirit::qi::lexeme;
using boost::spirit::qi::rule;
typedef boost::spirit::qi::rule<Iterator, std::string(), space_type> rule_t;
rule_t ip_rule, timestamp_rule, user_rule, req_rule, ref_rule, ua_rule, bytes_rule, status_rule;
ip_rule %= lexeme[(+char_("0-9."))[ref(ip) = _1]];
timestamp_rule %= lexeme[('[' >> +(~char_(']')) >> ']')[ref(timestamp) = _1]];
user_rule %= lexeme[(+~char_(" "))];
req_rule %= lexeme[('"' >> +(~char_('"')) >> '"')[ref(req) = _1]];
ref_rule %= lexeme[('"' >> +(~char_('"')) >> '"')[ref(referer) = _1]];
ua_rule %= lexeme[('"' >> +(~char_('"')) >> '"')[ref(ua) = _1]];
bytes_rule %= uint_[ref(transferred_bytes) = _1];
status_rule %= uint_[ref(status) = _1];
std::map<std::string, rule_t> convert_logformat;
typename std::map<std::string, rule_t>::iterator it;
convert_logformat.insert(std::pair<std::string, rule_t>("%h", ip_rule));
convert_logformat.insert(std::pair<std::string, rule_t>("%t", timestamp_rule));
//convert_logformat.insert(std::pair<std::string, rule_t>("%r", req_rule));
//convert_logformat.insert(std::pair<std::string, rule_t>("%>s", status_rule));
//convert_logformat.insert(std::pair<std::string, rule_t>("%b", bytes_rule));
//convert_logformat.insert(std::pair<std::string, rule_t>("%u", user_rule));
//convert_logformat.insert(std::pair<std::string, rule_t>("%{User-agent}i", ua_rule));
//convert_logformat.insert(std::pair<std::string, rule_t>("%{Referer}i", ref_rule));
rule_t joint_rule;
//joint_rule = convert_logformat["%h"] >> convert_logformat["%t"];
for (it = convert_logformat.begin(); it != convert_logformat.end(); it++)
{
joint_rule = joint_rule.copy() >> (*it).second.copy();
std::cout << (*it).first << ": " << typeid((*it).second).name() << "\n";
}
std::cout << "convert_logformath: " << typeid(convert_logformat["%h"]).name() << "\n";
bool r = phrase_parse(first, last, joint_rule, space);
if (first != last)
return false;
return r;
}
Ahem. It is really quite simple. You should initialize your variables :)
rule_t joint_rule; // what is it initialized to?
for (auto it = convert_logformat.begin(); it != convert_logformat.end(); it++)
{
joint_rule = joint_rule.copy() >> (*it).second.copy();
}
Change the first line to
rule_t joint_rule = qi::eps;
And it works:
sehe#mint12:/tmp$ ./test
127.0.0.1
16/Aug/2012:01:50:02 +0000
Your parser lacks some (good) common practice. See below for tidied up source (C++11).
Note that using a map to store the rules looks odd, because maps iteration will order by the key, not insertion order.
See the code live at http://liveworkspace.org/code/a7f2f94840d63fce43d8c3f56236330e
// #define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <typeinfo>
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;
template <typename Iterator>
struct Grammar : qi::grammar<Iterator, std::string(), qi::space_type>
{
Grammar() : Grammar::base_type(joint_rule)
{
using namespace qi;
ip_rule %= lexeme[ (+char_("0-9."))[phx::ref(ip) = _1] ];
timestamp_rule %= lexeme[ ('[' >> +(~char_(']')) >> ']')[phx::ref(timestamp) = _1] ];
user_rule %= lexeme[ (+~char_(" ")) ];
req_rule %= lexeme[ ('"' >> +(~char_('"')) >> '"')[phx::ref(req) = _1] ];
ref_rule %= lexeme[ ('"' >> +(~char_('"')) >> '"')[phx::ref(referer) = _1] ];
ua_rule %= lexeme[ ('"' >> +(~char_('"')) >> '"')[phx::ref(ua) = _1] ];
bytes_rule %= uint_[phx::ref(transferred_bytes) = _1 ];
status_rule %= uint_[phx::ref(status) = _1 ];
auto convert_logformat = std::map<std::string, rule_t> {
{ "%h" , ip_rule } ,
{ "%t" , timestamp_rule },
// { "%r" , req_rule } ,
// { "%>s" , status_rule } ,
// { "%b" , bytes_rule } ,
// { "%u" , user_rule } ,
// { "%{User-agent}i", ua_rule } ,
// { "%{Referer}i" , ref_rule }
};
joint_rule = eps;
for (auto const& p: convert_logformat)
{
joint_rule = joint_rule.copy() >> p.second.copy();
}
BOOST_SPIRIT_DEBUG_NODE(ip_rule);
BOOST_SPIRIT_DEBUG_NODE(timestamp_rule);
BOOST_SPIRIT_DEBUG_NODE(user_rule);
BOOST_SPIRIT_DEBUG_NODE(req_rule);
BOOST_SPIRIT_DEBUG_NODE(ref_rule);
BOOST_SPIRIT_DEBUG_NODE(ua_rule);
BOOST_SPIRIT_DEBUG_NODE(bytes_rule);
BOOST_SPIRIT_DEBUG_NODE(status_rule);
}
typedef qi::rule<Iterator, std::string(), qi::space_type> rule_t;
rule_t ip_rule, timestamp_rule, user_rule, req_rule, ref_rule, ua_rule, bytes_rule, status_rule;
rule_t joint_rule;
std::vector<char> ip;
std::vector<char> timestamp;
std::vector<char> req;
unsigned int status;
unsigned int transferred_bytes;
std::vector<char> referer;
std::vector<char> ua;
};
template <typename Iterator>
bool parse_logentry(Iterator first, Iterator last,
Grammar<Iterator>& parser)
{
bool r = phrase_parse(first, last, parser, qi::space);
return (r && (first == last));
}
int main(void)
{
std::string entry = "127.0.0.1 [16/Aug/2012:01:50:02 +0000]";
//std::string entry = "127.0.0.1 [16/Aug/2012:01:50:02 +0000] \"GET /check.htm HTTP/1.1\" 200 17 \"-\" \"AgentName/0.1 libwww-perl/5.833\"";
Grammar<std::string::iterator> parser;
if (parse_logentry(entry.begin(), entry.end(), parser))
{
for (auto i : parser.ip)
std::cout << i;
std::cout << "\n";
for (auto ts: parser.timestamp)
std::cout << ts;
std::cout << "\n";
}
else
{
std::cout << "not ok\n";
}
return 0;
}
Note that, among other things, this setup allows you to enable debugging of your grammar by simply defining BOOST_SPIRIT_DEBUG at the start.