Boost Spirit: parse a section of an input - c++

I have thousands of lines of input, that each line consists of 3 ints and a comma at the and that look like this:
5 6 10,
8 9 45,
.....
How can I create a grammar that parses only a certain section of an input, for example first 100 lines or from line 1000 to 1200 and ignores the rest.
My grammar currently looks like this:
qi::int_ >> qi::int_ >> qi::int_ >> qi::lit(",");
But obviously it parses the whole input.

You could just seek up to the interesting point and parse 100 lines there.
A sketch on how to skip 100 lines from just spirit:
Live On Coliru
#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/adapted/std_tuple.hpp>
#include <tuple>
namespace qi = boost::spirit::qi;
int main() {
using It = boost::spirit::istream_iterator;
using Tup = std::tuple<int, int, int>;
It f(std::cin >> std::noskipws), l;
std::vector<Tup> data;
using namespace qi;
if (phrase_parse(f, l,
omit [ repeat(100) [ *(char_ - eol) >> eol ] ] >> // omit 100 lines
repeat(10) [ int_ >> int_ >> int_ >> ',' >> eol ], // parse 10 3-tuples
blank, data))
{
int line = 100;
for(auto tup : data)
std::cout << ++line << "\t" << boost::fusion::as_vector(tup) << "\n";
}
}
When tested with some random input like
od -Anone -t d2 /dev/urandom -w6 | sed 's/$/,/g' | head -200 | tee log | ./test
echo ============== VERIFY WITH sed:
nl log | sed -n '101,110p'
It'll print something expected, like:
101 (15400 5215 -20219)
102 (26426 -17361 -6618)
103 (-15311 -6387 -5902)
104 (22737 14339 16074)
105 (-28136 21003 -11594)
106 (-11020 -32377 -4866)
107 (-24024 10995 22766)
108 (3438 -19758 -10931)
109 (28839 22032 -7204)
110 (-25237 23224 26189)
============== VERIFY WITH sed:
101 15400 5215 -20219,
102 26426 -17361 -6618,
103 -15311 -6387 -5902,
104 22737 14339 16074,
105 -28136 21003 -11594,
106 -11020 -32377 -4866,
107 -24024 10995 22766,
108 3438 -19758 -10931,
109 28839 22032 -7204,
110 -25237 23224 26189,

Just because I want to learn more about Spirit X3, and because the worlds would like to know more about this upcoming version of the library, here's a more intricate version that shows a way to dynamically filter lines according to some expression.
In this case the lines are handled by this handler:
auto handle = [&](auto& ctx) mutable {
using boost::fusion::at_c;
if (++line_no % 10 == 0)
{
auto& attr = x3::_attr(ctx);
data.push_back({ at_c<0>(attr), at_c<1>(attr), at_c<2>(attr) });
}
};
As you'd expect every 10th line is included.
Live On Coliru
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/include/support_istream_iterator.hpp>
#include <iostream>
namespace x3 = boost::spirit::x3;
int main() {
using It = boost::spirit::istream_iterator;
It f(std::cin >> std::noskipws), l;
struct Tup { int a, b, c; };
std::vector<Tup> data;
size_t line_no = 0;
auto handle = [&](auto& ctx) mutable {
using boost::fusion::at_c;
if (++line_no % 10 == 0)
{
auto& attr = x3::_attr(ctx);
data.push_back({ at_c<0>(attr), at_c<1>(attr), at_c<2>(attr) });
}
};
if (x3::phrase_parse(f, l, (x3::int_ >> x3::int_ >> x3::int_) [ handle ] % (',' >> x3::eol), x3::blank))
{
for(auto tup : data)
std::cout << tup.a << " " << tup.b << " " << tup.c << "\n";
}
}
Prints e.g.
g++ -std=c++1y -O2 -Wall -pedantic -pthread main.cpp -o test
od -Anone -t d2 /dev/urandom -w6 | sed 's/$/,/g' | head -200 | tee log | ./test
echo ============== VERIFY WITH perl:
nl log | perl -ne 'print if $. % 10 == 0'
-8834 -947 -8151
13789 -20056 -11874
6919 -27211 -19472
-7644 18021 13523
-20120 16923 -11419
27772 31149 14005
3540 4894 -24790
10698 10223 -30397
-22533 -32437 -13665
25813 3264 -16414
11453 11955 18268
5092 27052 17930
10915 6493 20432
-14380 -6085 -25430
18599 6710 17279
22049 22259 -32189
1048 14621 6452
-24996 10856 29429
3537 -26338 19623
-4117 6617 14009
============== VERIFY WITH perl:
10 -8834 -947 -8151,
20 13789 -20056 -11874,
30 6919 -27211 -19472,
40 -7644 18021 13523,
50 -20120 16923 -11419,
60 27772 31149 14005,
70 3540 4894 -24790,
80 10698 10223 -30397,
90 -22533 -32437 -13665,
100 25813 3264 -16414,
110 11453 11955 18268,
120 5092 27052 17930,
130 10915 6493 20432,
140 -14380 -6085 -25430,
150 18599 6710 17279,
160 22049 22259 -32189,
170 1048 14621 6452,
180 -24996 10856 29429,
190 3537 -26338 19623,
200 -4117 6617 14009,

Related

leetcode 295 median in stream, runtime error?

Leetcode 295 is to find median in a data stream.
I want to use two heaps to implement it. which can make add a data from stream in O(logn), get the percentile in O(1).
left_heap is a min_heap which used to save the left data of requied percentile.
right_heap used to save data which is larger than percentile.
In class SortedStream, which can make add data o(logn) and make findMedian o(1)
#include <iostream>
#include <vector>
#include <climits>
#include <algorithm>
using namespace std;
class SortedStream {
public:
SortedStream(double percent, size_t rsize = 65536*16) : percent_(percent), reserve_size_(rsize) {
init();
}
void push(double v) { // time complexity, o(logn)
++size_;
double left_top = left_data_.back();
if (left_data_.empty() || v <= left_top) { left_data_.push_back(v); std::push_heap(left_data_.begin(), left_data_.end(), std::less<double>{}); }
else { right_data_.push_back(v); std::push_heap(right_data_.begin(), right_data_.end(), std::greater<double>{}); }
size_t idx = size_ * percent_ + 1;
size_t left_size = left_data_.size();
if (idx < left_size) {
// pop left top into right
std::pop_heap(left_data_.begin(), left_data_.end(), std::less<double>{});
double left_top = left_data_.back();
left_data_.pop_back();
right_data_.push_back(left_top);
std::push_heap(right_data_.begin(), right_data_.end(), std::less<double>{});
} else if (idx > left_size) {
// pop right top into left
std::pop_heap(right_data_.begin(), right_data_.end(), std::greater<double>{});
double right_top = right_data_.back();
right_data_.pop_back();
left_data_.push_back(right_top);
std::push_heap(left_data_.begin(), left_data_.end(), std::greater<double>{});
}
}
void init() {
size_t lsize = reserve_size_ * percent_ + 2;
left_data_.reserve(lsize);
right_data_.reserve(reserve_size_ - lsize + 2);
max_ = INT_MIN;
min_ = INT_MAX;
std::make_heap(left_data_.begin(), left_data_.end(), std::less<double>{});
std::make_heap(right_data_.begin(), right_data_.end(), std::greater<double>{});
size_ = 0;
}
size_t size() const { return size_; }
double max() const { return max_; }
double min() const { return min_; }
double percentile() const { // time complexity o(1)
return left_data_.back();
}
public:
double percent_;
size_t size_;
double max_, min_;
std::vector<double> left_data_, right_data_;
size_t reserve_size_;
};
class MedianFinder {
public:
MedianFinder() : ss(0.5){}
void addNum(int num) {ss.push(num);}
double findMedian() {return ss.percentile();}
SortedStream ss;
};
int main() {
MedianFinder* obj = new MedianFinder();
for (size_t i = 0; i< 15; ++i) {
obj->addNum(i);
double param_2 = obj->findMedian();
cout << "i = " << i << " median = " << param_2 << endl;
}
}
it's ok to run in my laptop, but when i submit in leetcode, it comes out:
Line 863: Char 45: runtime error: applying non-zero offset 18446744073709551608 to null pointer (stl_iterator.h)
SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior /usr/bin/../lib/gcc/x86_64-linux-gnu/9/../../../../include/c++/9/bits/stl_iterator.h:868:45
I never see this error.
can you help on this?
I like your (OP's) idea that Heap can be used to solve the task, heap of smaller and larger values. Also as #ArminMontigny suggested one can use std::priority_queue instead of plain heap, because priority queue is based on heap and adds easy to use helper methods. Regular heap is a kind of low-level backend for priority queue.
Based on these two suggestions and inspired by your interesting question I decide to implement short (30 lines) solution for your task (it uses random numbers as example input):
Try it online!
#include <queue>
#include <random>
#include <iostream>
int main() {
std::mt19937_64 rng{123};
std::priority_queue<int> smaller;
std::priority_queue<int, std::vector<int>, std::greater<int>> larger;
for (size_t i = 0; i < 100; ++i) {
int n = rng() % 1000;
if (smaller.empty() || n <= smaller.top())
smaller.push(n);
else
larger.push(n);
while (smaller.size() + 1 < larger.size()) {
smaller.push(larger.top());
larger.pop();
}
while (larger.size() + 1 < smaller.size()) {
larger.push(smaller.top());
smaller.pop();
}
double median = smaller.size() == larger.size() ?
(smaller.top() + larger.top()) / 2.0 :
smaller.size() < larger.size() ? larger.top() : smaller.top();
std::cout << "n = " << n << " med = " << median << " | ";
if ((i + 1) % 4 == 0)
std::cout << std::endl;
}
}
Output:
n = 504 med = 504 | n = 771 med = 637.5 | n = 101 med = 504 | n = 790 med = 637.5 |
n = 380 med = 504 | n = 388 med = 446 | n = 888 med = 504 | n = 406 med = 455 |
n = 53 med = 406 | n = 240 med = 397 | n = 749 med = 406 | n = 438 med = 422 |
n = 566 med = 438 | n = 238 med = 422 | n = 741 med = 438 | n = 817 med = 471 |
n = 810 med = 504 | n = 376 med = 471 | n = 816 med = 504 | n = 503 med = 503.5 |
n = 599 med = 504 | n = 264 med = 503.5 | n = 704 med = 504 | n = 132 med = 503.5 |
n = 740 med = 504 | n = 391 med = 503.5 | n = 563 med = 504 | n = 778 med = 533.5 |
n = 768 med = 563 | n = 136 med = 533.5 | n = 964 med = 563 | n = 368 med = 533.5 |
n = 653 med = 563 | n = 941 med = 564.5 | n = 976 med = 566 | n = 680 med = 582.5 |
n = 546 med = 566 | n = 200 med = 564.5 | n = 387 med = 563 | n = 698 med = 564.5 |
n = 562 med = 563 | n = 251 med = 562.5 | n = 257 med = 562 | n = 735 med = 562.5 |
n = 822 med = 563 | n = 212 med = 562.5 | n = 576 med = 563 | n = 368 med = 562.5 |
n = 783 med = 563 | n = 964 med = 564.5 | n = 234 med = 563 | n = 805 med = 564.5 |
n = 952 med = 566 | n = 162 med = 564.5 | n = 936 med = 566 | n = 493 med = 564.5 |
n = 88 med = 563 | n = 313 med = 562.5 | n = 580 med = 563 | n = 274 med = 562.5 |
n = 353 med = 562 | n = 701 med = 562.5 | n = 882 med = 563 | n = 249 med = 562.5 |
n = 19 med = 562 | n = 482 med = 554 | n = 327 med = 546 | n = 402 med = 525 |
n = 379 med = 504 | n = 521 med = 512.5 | n = 977 med = 521 | n = 550 med = 533.5 |
n = 434 med = 521 | n = 82 med = 512.5 | n = 581 med = 521 | n = 134 med = 512.5 |
n = 532 med = 521 | n = 860 med = 526.5 | n = 562 med = 532 | n = 225 med = 526.5 |
n = 907 med = 532 | n = 837 med = 539 | n = 671 med = 546 | n = 785 med = 548 |
n = 593 med = 550 | n = 533 med = 548 | n = 471 med = 546 | n = 352 med = 539.5 |
n = 388 med = 533 | n = 532 med = 532.5 | n = 310 med = 532 | n = 135 med = 532 |
n = 323 med = 532 | n = 81 med = 526.5 | n = 849 med = 532 | n = 577 med = 532 |
n = 643 med = 532 | n = 956 med = 532.5 | n = 204 med = 532 | n = 383 med = 532 |
Regarding your question about Sanitizer error - this sanitizer is a part of CLang. You can download Clang yourself and try it out on your home laptop, to reproduce exactly same error.
To get same error add option -fsanitize=undefined, when compiling using CLang at home.
For Windows CLang can be downloaded from this page. Also on Windows if you have great package manager Chocolatey, then you can install CLang + LLVM through short command choco install llvm.
For Linux CLang can be installed through sudo apt install clang.
Also you can use great online website GodBolt, by this link, at given link I already chosen CLang for compilation and put necessary options -std=c++11 -O0 -fsanitize=undefined, so you have just to start coding in the window to the left-handside when you open the link.
You have one minor problem
In the line
double left_top = left_data_.back();
At the very beginning, the std::vector "left_data" will be empty. If you try to access the last element of an empty vector, you will get an runtime error.
If you modify this line to for example:
double left_top = left_data_.empty()?0.0:left_data_.back();
Then your program will work as you expect it to work.
I personally find the approach a a little bit too complicated. Maybe you could use a std::multiset or a std::priority_queue. Especially the std::priority_queue will also implement a max-heap and a min-hep for you, without out the overhead of calling std::vectors heap functions.
But I am still in favor of the std::multiset . . .

Interpreting / Reading text files written for Assembly application

I am just starting out in C++.
I am writing a console application, to "read in" an .evt file (custom, not to be confused with Event viewer files in Windows) and its contents but now I need to write a method to.
a) Store each block of 'EVENT X' including but also ending at 'END'.
b) Make the contents of each block searchable/selectable.
If the content wasn't so 'wildly' varied, I would be happy to put this into some SQL table or experiment with an array but I don't know a starting point to do this as the number of 'fields' or parameters varies. The maximum number of lines I have seen in a block is around 20, the maximum number of parameters per line I have seen is around 13.
I'm not asking for an explicit answer or the whole code to do it although it is welcome, just a generic sample of code to get started that might be appropriate.
This my function to just load the data as it is.
void event_data_loader()
{
string evt_data;
string response2;
cout << "You have chosen to Create/Load Soma events\n\n";
ifstream named_EVT("C:/evts/1.evt");
while (getline(named_EVT, evt_data))
{
// Output the text from the file
cout << evt_data << "\n"; // Iterate out each line of the EVT file including spaces
//name_EVT.close();*/
}
cout << "Does the output look ok?(Y/N)";
cin >> response2;
if (response2 == "Y")
{
// Vectors? Dynamic array? to re-arrange the data?
}
}
The files themselves have content like this. I know what most of the functions do, less so all of the parameters. For some reason putting this on the page it puts them into a single line.
EVENT 01
A CHECK_HUMAN
A CHECK_POSITION 1 250 90 350 90
E BBS_OPEN 1 0
END
EVENT 02
E SELECT_MSG 336 363 314 337 03 338 12 -1 -1
END
EVENT 03
E RUN_EVENT 761
E RUN_EVENT 04
E RUN_EVENT 05
END
EVENT 761
A EXIST_ITEM 373 1
E SELECT_MSG 857 315 762 316 763 -1 -1 -1 -1
E RETURN
END
EVENT 762
A EXIST_ITEM 373 1
E ROB_ITEM 373 1
E SHOW_MAGIC 6
E CHANGE_HP 1 10000
E CHANGE_MP 1 10000
E MESSAGE_NONE 858
E RETURN
END
EVENT 1862
A ABSENT_EVENT 1582
A EXIST_ITEM 1800 1
A EXIST_ITEM 1801 1
A EXIST_ITEM 1802 1
A EXIST_ITEM 1803 1
A EXIST_ITEM 1804 1
A EXIST_ITEM 1805 1
A EXIST_ITEM 1806 1
A EXIST_ITEM 1807 1
A WEIGHT 365 1854 1 1832 1 -1 1 -1 -1 -1 -1
A CHECK_ITEMSLOT 393 1854 1 1832 1 -1 1 -1 -1 -1 -1
A GENDER 1
E ADD_EVENT 1582
E MESSAGE_NONE 3237
E ROB_ITEM 1800 1
E ROB_ITEM 1801 1
E ROB_ITEM 1802 1
E ROB_ITEM 1803 1
E ROB_ITEM 1804 1
E ROB_ITEM 1805 1
E ROB_ITEM 1806 1
E ROB_ITEM 1807 1
E GIVE_ITEM 1854 1
E GIVE_ITEM 1832 1
E RETURN
END
I would do something like this:
struct Subevent {
std::string selector;
std::string name;
std::vector<int> params;
};
struct Event {
int id;
std::vector<Subevent> subevents;
};
std::vector<Event> load_events(std::istream& input_stream) {
std::vector<Event> out;
Event current_event {}; // current event being built
std::string line;
bool inside_event = false; // are we inside the scope of an event?
while (std::getline(input_stream, line)) {
// strip trailing whitespace
while (isspace(line.back())) {
line.pop_back();
}
// skip empty lines
if (line.size() == 0) {
continue;
}
// read first token (until first space)
std::stringstream ss(line);
std::string first_token;
ss >> first_token;
bool is_new_event_line = first_token == "EVENT";
bool is_end_line = first_token == "END";
if (is_new_event_line) {
// line: EVENT <id>
if (inside_event) {
// error: "not expecting new event"
// choose your own error messaging method
}
int id;
ss >> id; // read <id>
// setup new event
current_event.id = id;
inside_event = true;
}
else if (is_end_line) {
// line: END
if (!inside_event) {
// error: "unexpected END"
}
// record and clear current event
out.push_back(current_event);
inside_event = false;
current_event = Event();
}
else {
// line: <selector> <name> <params...>
// e.g.: A GENDER 1
if (!inside_event) {
// error: "unexpected property entry"
}
// read subevent
Subevent subevent {};
subevent.selector = first_token;
ss >> subevent.name;
// copy over the int params from the line
std::copy(
std::istream_iterator<int>(ss),
std::istream_iterator<int>(),
std::back_inserter(subevent.params)
);
// push back subevent
event.subevents.push_back(subevent);
}
}
return out;
}

How to write binary operator with two post operands syntax with Boost Spirit x3?

I am following this example: https://github.com/boostorg/spirit/blob/develop/example/x3/calc/calc9/expression_def.hpp
What I am trying to accomplish is to write a rule that parses and generates like min{x}{y}. Mostly the code is using expression grammar like x + y, but now I want to place and parse both operands to the rhs of the operator.
I added the following code in expression_def.hpp file:
...
x3::symbols<ast::optoken> additive_op;
x3::symbols<ast::optoken> multiplicative_op;
x3::symbols<ast::optoken> binarypost_op;
x3::symbols<ast::optoken> unary_op;
x3::symbols<> keywords;
...
binarypost_op.add
("min", ast::op_divide) // Dummy operation usage for now
;
...
struct binarypost_expr_class;
struct unary_expr_class;
...
typedef x3::rule<binarypost_expr_class, ast::expression>
binarypost_expr_type;
...
binarypost_expr_type const binarypost_expr = "binarypost_expr";
...
auto const multiplicative_expr_def =
binarypost_expr
>> *(multiplicative_op > binarypost_expr)
;
auto const binarypost_expr_def = // See the chaining operation
('{' > unary_expr > '}')
>> *(binarypost_op > ('{' > unary_expr > '}'))
;
auto const unary_expr_def =
primary_expr
| (unary_op > primary_expr)
;
This works fine. But it can only parse something like , {x} min {y}. I want to be able to parse min {x} {y}. I tried the many combinations such as :
binarypost_op >> ('{' > unary_expr > '}') > ('{' > unary_expr > '}') etc. But I cant seem to figure it out as to what is the right way to write this? Any suggestions / comments ?
Ok, here's the changes. The hard part is actually code-generating the builtin function.
Parsing
Step 1: extend AST
Always start with the AST. We want operands that can be function calls:
In ast.hpp:
struct function_call; // ADDED LINE
// ...
struct operand :
x3::variant<
nil
, unsigned int
, variable
, x3::forward_ast<unary>
, x3::forward_ast<expression>
, x3::forward_ast<function_call> // ADDED LINE
>
{
using base_type::base_type;
using base_type::operator=;
};
// ...
enum funtoken
{
fun_min,
fun_max,
};
// ...
struct function_call : x3::position_tagged
{
funtoken fun;
std::list<operand> args;
};
In ast_adapted.hpp:
BOOST_FUSION_ADAPT_STRUCT(client::ast::function_call,
fun, args
)
Step 2: extend grammar
(This is all in expression_def.hpp)
Let's be generic, so parse function name tokens using a symbol table:
x3::symbols<ast::funtoken> functions;
Which we have to initialize in add_keywords:
functions.add
("min", ast::fun_min)
("max", ast::fun_max)
;
Now declare a rule for function calls:
struct function_call_class;
typedef x3::rule<function_call_class, ast::function_call> function_call_type;
function_call_type const function_call = "function_call";
That's all red-tape. The "interesting thing" is the rule definition:
auto const function_call_def =
functions
>> '(' >> expression % ',' >> ')'
;
Well. That's underwhelming. Let's integrate into our primary expression rule:
auto const primary_expr_def =
uint_
| bool_
| function_call
| (!keywords >> identifier)
| ('(' > expression > ')')
;
Note the ordering. If you want to be able to add function names that collide with a keyword, you'll need to add precautions.
Also, lets make AST annotation work for our node:
struct function_call_class : x3::annotate_on_success {};
Code generation
It's easy to find where to add support for the new AST node:
In compiler.hpp:
bool operator()(ast::function_call const& x) const;
Now comes the hard part.
What's really required for general n-ary is an accumulator. Since we don't have registers, this would need to be a temporary (local). However, since the VM implementation doesn't have these, I've limited the implementation to a fixed binary function call only.
Note that the VM already has support for function calls. Functions can have locals. So, if you code-gen a variable-argument built-in function you can implement a left-fold recursive solution.
In compiler.cpp:
bool compiler::operator()(ast::function_call const& x) const
{
auto choice = [&](int opcode) {
BOOST_ASSERT(x.args.size() == 2); // TODO FIXME hardcoded binary builtin
auto it = x.args.begin();
auto& a = *it++;
if (!boost::apply_visitor(*this, a))
return false;
auto& b = *it++;
if (!boost::apply_visitor(*this, b))
return false;
program.op(opcode); // the binary fold operation
program.op(op_jump_if, 0);
size_t const branch = program.size()-1;
if (!boost::apply_visitor(*this, a))
return false;
program.op(op_jump, 0);
std::size_t continue_ = program.size()-1;
program[branch] = int(program.size()-branch);
if (!boost::apply_visitor(*this, b))
return false;
program[continue_] = int(program.size()-continue_);
return true;
};
switch (x.fun) {
case ast::fun_min: return choice(op_lt);
case ast::fun_max: return choice(op_gt);
default: BOOST_ASSERT(0); return false;
}
return true;
}
I've just taken inspiration from the surrounding code on how to generate the jump labels.
Trying It Out
A simplistic example would be: var x = min(1,3);
Assembler----------------
local x, #0
start:
op_stk_adj 1
op_int 1
op_int 3
op_lt
op_jump_if 13
op_int 1
op_jump 15
13:
op_int 3
15:
op_store x
end:
-------------------------
Results------------------
x: 1
-------------------------
Running it with some random contrived input:
./test <<< "var a=$(($RANDOM % 100)); var
b=$(($RANDOM % 100)); var contrived=min(max(27,2*a), 100+b);"
Prints e.g.:
Assembler----------------
local a, #0
local b, #1
local contrived, #2
start:
op_stk_adj 3
op_int 31
op_store a
op_int 71
op_store b
op_int 27
op_int 2
op_load a
op_mul
op_gt
op_jump_if 24
op_int 27
op_jump 29
24:
op_int 2
op_load a
op_mul
29:
op_int 100
op_load b
op_add
op_lt
op_jump_if 58
op_int 27
op_int 2
op_load a
op_mul
op_gt
op_jump_if 51
op_int 27
op_jump 56
51:
op_int 2
op_load a
op_mul
56:
op_jump 63
58:
op_int 100
op_load b
op_add
63:
op_store contrived
end:
-------------------------
Results------------------
a: 31
b: 71
contrived: 62
-------------------------

Take only the first five rows of a long list separated by empty lines (C++)

I am a bit stuck on what I want to do with my C++ code:
int main() {
char bid_price[512];
char bid_volume[512];
char ask_price[512];
char ask_volume[512];
const int MAX_LEN = 512;
ifstream in_stream;
in_stream.open("test.txt");
char current_string[MAX_LEN];
if (!in_stream) {
cout << "Could not open data.txt" << endl;
return false;
}
for (int i=0; i<150 &&
(in_stream.getline(current_string,MAX_LEN) && current_string.length()==0); i++) {
in_stream.getline(current_string, MAX_LEN);
get_word(current_string, 1, bid_price);
cout << "First word is: " << bid_price << endl;
get_word(current_string, 2, bid_volume);
cout << "Second word is: " << bid_volume << endl;
get_word(current_string, 4, ask_price);
cout << "Third word is: " << ask_price << endl;
get_word(current_string, 5, ask_volume);
cout << "Fourth word is: " << ask_volume << endl;
}
in_stream.close();
return 0;
}
What I would like to do is to take only the first five rows of such a list in a txt file such as the following:
383.80000 | 0.014 | 1461142717 || 383.67000 | 5.141 | 1461142798
383.61100 | 0.010 | 1461134871 || 383.60000 | 9.076 | 1461142798
383.51100 | 0.010 | 1461136836 || 383.46100 | 0.400 | 1461142794
383.41100 | 0.010 | 1461129820 || 383.35000 | 7.740 | 1461142798
383.31100 | 0.010 | 1461129821 || 383.30000 | 0.014 | 1461142637
383.21100 | 0.010 | 1461138430 || 383.20000 | 2.000 | 1461142787
383.16100 | 9.089 | 1461142763 || 383.11100 | 0.010 | 1461134135
"EMPTY LINE"
383.01100 | 8.573 | 1461138900 || 383.00000 | 50.037 | 1461142501
382.98300 | 5.000 | 1461135929 || 382.97000 | 0.150 | 1461142461
382.93400 | 3.476 | 1461138822 || 382.91100 | 0.010 | 1461128348
382.81900 | 8.762 | 1461136840 || 382.81100 | 0.010 | 1461128350
382.80000 | 0.014 | 1461141922 || 382.71100 | 0.010 | 1461142621
382.68000 | 15.936 | 1461142797 || 382.67000 | 2.000 | 1461141655
382.66900 | 4.305 | 1461130920 || 382.61100 | 0.010 | 1461136076
In this case I'd take only the rows number 1,2,3,4,5 and 9,10,11,12,13
Note: Nevermind the function get_word, it works very well.
Looks like you want to skip blank lines. A lot easier than worrying how many text lines to read:
std::string text_line;
while (getline(in, text_line))
{
// If text line is blank, skip it
if (text_line.empty())
{
continue;
}
// Otherwise process the text line.
// ...
}
I highly recommend you add in some more error detection for lines that don't meet the format.
Edit 1: Skipping specific lines
To skip specific lines, you will need to have a line counter and a container of lines to skip.
const unsigned int lines_to_skip[] = {8, 24, 25, 26, 101, 113, 125};
const unsigned int skip_list_size = sizeof(lines_to_skip) / sizeof(lines_to_skip[0]);
std::string text_line;
unsigned int line_counter = 0;
unsigned int skip_index = 0;
//...
while (getline(in, text_line))
{
++line_counter;
if (skip_index < skip_list_size)
{
if (line_counter == lines_to_skip[skip_index])
{
++skip_index;
continue;
}
}
// Perform other validations
// ...
// Process the text line.
}
Skipping blank and invalid lines is a lot more efficient than skipping lines by line number.
You want to read 5 lines and disregard empty lines.
So, one easy way to do it would be to first remove all empty lines from your dataset and then subsequently just grab the top 5 ones.
An alternative would be to keep track of how many lines you've successfully read and while that variable is less than 5, read one more. Then only increase the counter when you read a non-blank line and don't increment it when you skip a blank one. Continue until the counter is 5 or you run out of lines to read.

boost spirit qi match multiple elements

I would like to create a parser based on boost spirit qi that will be able to parse a list of integer values. That is obviously extremely easy and there are tons of examples. The list though is a bit smarter than a comma separated list and it could looks like:
17, 5, fibonacci(2, 4), 71, 99, range(5, 7)
the result of the parser should be a std::vector with the following values:
17, 5, 1, 2, 3, 71, 99, 5, 6, 7
Where fibonacci(2, 4) results in 1, 2, 3 and range(5, 7) results in 5, 6, 7
Edit: What I am looking for is if I already have parsers that have an attribute int (say int_) and parsers that have an attribute std::vector fibonacci and range, how I can combine the results in a single parser. Something like:
list %= *(int_ | elements [ fibonacci | range ] );
Where elements to be the magic that will do the necessary magic the results form fibonacci to fit in the list.
Note: I am not looking for solution that includes append functions like
list = *(int_[push_back(_val, _1)] | fibonacci[push_back(_val, _1)] | range[push_back(_val, _1)] ] );
Here's a simplist take: Live On Coliru
typedef std::vector<int64_t> data_t;
value_list = -value_expression % ',';
value_expression = macro | literal;
literal = int_;
macro = (_functions > '(' > value_list > ')')
[ _pass = phx::bind(_1, _2, _val) ];
Where _functions is a qi::symbols table of functions:
qi::symbols<char, std::function<bool(data_t const& args, data_t& into)> > _functions;
Now, note that the input "17, 5, fibonacci(2, 4), 71, 99, range(5, 7)" results in
parse success
data: 17 5 1 2 3 71 99 5 6 7
But you can even get more funky: "range(fibonacci(13, 14))" results in:
parse success
data: 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377
As you can see, it prints the range from [fib(13)..fib(14)] which is [233..377] (Wolfram Alpha).
Full code (including demo implementations of fibonacci and range :)):
//#define BOOST_SPIRIT_DEBUG
#define BOOST_SPIRIT_USE_PHOENIX_V3
#include <boost/fusion/adapted.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/karma.hpp>
#include <boost/spirit/include/phoenix.hpp>
namespace qi = boost::spirit::qi;
namespace karma = boost::spirit::karma;
namespace phx = boost::phoenix;
typedef std::vector<int64_t> data_t;
template <typename It, typename Skipper = qi::space_type>
struct parser : qi::grammar<It, data_t(), Skipper>
{
parser() : parser::base_type(value_list)
{
using namespace qi;
value_list = -value_expression % ',';
value_expression = macro | literal;
literal = int_;
macro = (_functions > '(' > value_list > ')')
[ _pass = phx::bind(_1, _2, _val) ];
_functions.add("fibonacci", &fibonacci);
_functions.add("range", &range);
BOOST_SPIRIT_DEBUG_NODES((value_list)(value_expression)(literal)(macro));
}
private:
static bool fibonacci(data_t const& args, data_t& into) {
// unpack arguments
if (args.size() != 2)
return false;
auto f = args[0], l = args[1];
// iterate
uint64_t gen0 = 0, gen1 = 1, next = gen0 + gen1;
for(auto i = 0u; i <= l; ++i)
{
switch(i) {
case 0: if (i>=f) into.push_back(gen0); break;
case 1: if (i>=f) into.push_back(gen1); break;
default:
{
next = gen0 + gen1;
if (i>=f) into.push_back(next);
gen0 = gen1;
gen1 = next;
break;
}
}
}
// done
return true;
}
static bool range(data_t const& args, data_t& into) {
// unpack arguments
if (args.size() != 2)
return false;
auto f = args[0], l = args[1];
if (l>f)
into.reserve(1 + l - f + into.size());
for(; f<=l; ++f)
into.push_back(f); // to optimize
return true;
}
qi::rule<It, data_t(), Skipper> value_list ;
qi::rule<It, data_t(), Skipper> value_expression, macro;
qi::rule<It, int64_t(), Skipper> literal;
qi::symbols<char, std::function<bool(data_t const& args, data_t& into)> > _functions;
};
bool doParse(const std::string& input)
{
typedef std::string::const_iterator It;
auto f(begin(input)), l(end(input));
parser<It, qi::space_type> p;
data_t data;
try
{
bool ok = qi::phrase_parse(f,l,p,qi::space,data);
if (ok)
{
std::cout << "parse success\n";
std::cout << "data: " << karma::format_delimited(karma::auto_, ' ', data) << "\n";
}
else std::cerr << "parse failed: '" << std::string(f,l) << "'\n";
if (f!=l) std::cerr << "trailing unparsed: '" << std::string(f,l) << "'\n";
return ok;
} catch(const qi::expectation_failure<It>& e)
{
std::string frag(e.first, e.last);
std::cerr << e.what() << "'" << frag << "'\n";
}
return false;
}
int main()
{
assert(doParse("range(fibonacci(13, 14))"));
}