I want to read data in an input file partially. For example, input file is 1GB, I want to read only 100MB each time, then store in a vector. How can I continue reading the next line after the first loop? As you can see in my code below, after the first loop of i, maybe the vector v stored 1000 lines from the input file. I'm not sure if the next loop of i, the command while(std::getline(infile, line)) will continue to read from line 1001 from the input file or not? If not, how can I modify my code to get lines from the input in several groups (1~1000), (1001~2000), (2001~3000)... then store in vector v?
#define FILESIZE 1000000000 // size of the file on disk
#define TOTAL_MEM 100000 // max items the memory buffer can hold
void ExternalSort(std::string infilepath, std::string outfilepath)
{
std::vector<std::string> v;
int runs_count;
std::ifstream infile;
if(!infile.is_open())
{
std::cout << "Unable to open file\n";
}
infile.open(infilepath, std::ifstream::in);
if(FILESIZE % TOTAL_MEM > 0)
runs_count = FILESIZE/TOTAL_MEM + 1;
else
runs_count = FILESIZE/TOTAL_MEM;
// Iterate through the elements in the file
for(i = 0; i < runs_count; i++)
{
// Step 1: Read M-element chunk at a time from the file
for (j = 0; j < (TOTAL_MEM < FILESIZE ? TOTAL_MEM : FILESIZE); j++)
{
while(std::getline(infile, line))
{
// If line is empty, ignore it
if(line.empty())
continue;
new_line = line + "\n";
// Line contains string of length > 0 then save it in vector
if(new_line.size() > 0)
v.push_back(new_line);
}
}
// Step 2: Sort M elements
sort(v.begin(), v.end()); //sort(v.begin(), v.end(), compare);
// Step 3: Create temporary files and write sorted data into those files.
std::ofstream tf;
tf.open(tfile + ToString(i) + ".txt", std::ofstream::out | std::ofstream::app);
std::ostream_iterator<std::string> output_iterator(tf, "\n");
std::copy(v.begin(), v.end(), output_iterator);
v.clear();
//for(std::vector<std::string>::iterator it = v.begin(); it != v.end(); ++it)
// tf << *it << "\n";
tf.close();
}
infile.close();
I didn’t have the patience to check the whole code. It was easier to write a splitter from scratch. Here are some observations, anyhow:
std::ifstream infile;
if (!infile.is_open())
{
std::cout << "Unable to open file\n";
}
infile.open(infilepath, std::ifstream::in);
You will always get the message since you check before opening the file. One correct way to open a file is:
std::ifstream infile(infilepath);
if (!infile)
throw "could not open the input file";
if (infile.peek() == std::ifstream::traits_type::eof())
This will be true, for instance, even for nonexistent files. The algorithm should work for empty files, too.
if(FILESIZE % TOTAL_MEM > 0)
runs_count = FILESIZE/TOTAL_MEM + 1;
else
runs_count = FILESIZE/TOTAL_MEM;
Why do you need the number of resulting files before generate them? You will never be able to calculate it correctly since it depends on how long lines are (you cannot read half of line just to fit it into TOTAL_MEM). You should read from input file at most TOTAL_MEM bytes (but a line, at least), sort & save and then continue from where you left (see the loop in execute, below).
How can I continue reading the next line after the first loop?
If you do not close the input stream, the next read will continue from exactly where you left.
A solution:
#include <iostream>
#include <fstream>
#include <string>
#include <algorithm>
#include <vector>
#include <iterator>
std::vector<std::string> split_file(const char* fn, std::size_t mem); // see the implementation below
int main()
{
const std::size_t max_mem = 8;
auto r = split_file("input.txt", max_mem);
std::cout << "generated files:" << std::endl;
for (const auto& fn : r)
std::cout << fn << std::endl;
}
class split_file_t
{
public:
split_file_t(std::istream& is, std::size_t mem) :is_{ is }, mem_{ mem }
{
// nop
}
std::vector<std::string> execute()
{
while (make_file())
;
return std::move(ofiles_);
}
protected:
std::istream& is_;
std::size_t mem_;
std::vector<std::string> ofiles_;
static std::string make_temp_file()
{
std::string fn(512, 0);
tmpnam_s(&fn.front(), fn.size()); // this might be system dependent
std::ofstream os(fn);
os.close();
return fn;
}
bool make_file()
{
using namespace std;
// read lines
vector<string> lines;
{
streamsize max_gpos = is_.tellg() + streamsize(mem_);
string line;
while (is_.tellg() < max_gpos && getline(is_, line))
lines.push_back(line);
}
//
if (lines.empty())
return false;
// sort lines
sort(lines.begin(), lines.end());
// save lines
{
string ofile = make_temp_file();
ofstream os{ ofile };
if (!os)
throw "could not open output file";
copy(lines.begin(), lines.end(), ostream_iterator<string>(os, "\n"));
ofiles_.push_back(ofile);
}
//
return bool(is_);
}
};
std::vector<std::string> split_file(const char* fn, std::size_t mem)
{
using namespace std;
ifstream is{ fn };
if (!is)
return vector<string>();
return split_file_t{ is, mem }.execute();
}
Related
I have made a tester class where I take questions from a question pool text file and put random questions from there to a docx file. I want to know why my code is giving me blank output in the docx file.
my random function is working fine. I am selecting two two questions from three questions file.
Here is my code - `
void test()
{
string line;
fstream question1("questiondesc.txt",ios::in | ios::out | ios::app);
fstream testgen("GeneratedTest.docx",ios::trunc | ios::in | ios::out);
testgen.open("GeneratedTest.docx");
if(!question1.is_open())
{
question1.open("questiondesc.txt");
}
int i,num;
for (i = 0; i < 2; i++) {
num = random(1,12);
for(int i =1;i<=num;i++)
{
getline(question1,line);
}
question1.clear();
question1.seekg(0, ios::beg);
testgen<<line<<endl;
}
question1.close();
ifstream question2("questionmcq.txt");
if(!question2.is_open())
{
question2.open("questionmcq.txt");
}
for (i = 0; i < 2; i++) {
num = random(1,26);
while(num%2==0)
{
num = random(1,26);
}
for(int i =1;i<=num;i++)
{
getline(question2,line);
}
testgen<<line<<endl;
getline(question2,line);
testgen<<line<<endl;
question2.clear();
question2.seekg(0, ios::beg);
}
question2.close();
ifstream question3("questionanalytical.txt");
if(!question3.is_open())
{
question3.open("questionanalytical.txt");
}
for (i = 0; i < 2; i++) {
num = random(1,12);
for(int i =1;i<=num;i++)
{
getline(question3,line);
}
question3.clear();
question3.seekg(0, ios::beg);
testgen<<line<<endl;
}
question3.close();
testgen.close();
}
There are errors in your code. I will show them as a comment in the below listing. Additionally I will show (onw of many, and maybe not the best ) solutions for your problem.
You should break down your problem into smaller pieces and design more functions. Then, life will be easier.
Additionally. You´should write comments. If you write comments, then you will detect the problems by yourself.
Your code with my remarks:
#include <iostream>
#include <fstream>
#include <string>
#include <random>
using namespace std; // NO NEVER USE
int random(int from, int to) {
std::random_device random_device;
std::mt19937 generator(random_device());
std::uniform_int_distribution<int> distribution(from, to);
return distribution(generator);
}
void test()
{
string line; // Line is not initialized an not needed here. Pollutes namespace
fstream question1("questiondesc.txt", ios::in | ios::out | ios::app); // Opening a file with these flags will fail. Use ifstream
fstream testgen("GeneratedTest.docx", ios::trunc | ios::in | ios::out);// Opening a file with these flags will fail. Use ofstream
testgen.open("GeneratedTest.docx"); // File was alread opened and failed. Reopening will not work. It failed alread
if (!question1.is_open()) // Use if "(!question1)" instead. There could be also other error bits
{ // Always check the status of any IO operation
question1.open("questiondesc.txt"); // Will never work. Failer already
}
int i, num; // Variable not initialized and not needed here. Name space pollution
for (i = 0; i < 2; i++) {
num = random(1, 12); // This function was not defined. I redefined it
for (int i = 1; i <= num; i++) // i=1 and i<= reaaly) not i=0 and i<num?
{
getline(question1, line); // Always check status of any IO function
}
question1.clear();
question1.seekg(0, ios::beg);
testgen << line << endl;
}
question1.close(); // The destructor of the fstream will close the file for you
ifstream question2("questionmcq.txt"); // Now you open the file as ifstream
if (!question2.is_open()) // Do check for all possible flags.: If (!question2)
{
question2.open("questionmcq.txt"); // Will not work, if it failed in the first time
}
for (i = 0; i < 2; i++) { // So 2 times
num = random(1, 26);
while (num % 2 == 0) // If numbers are equal
{
num = random(1, 26); // Get an odd number
}
for (int i = 1; i <= num; i++) // Usually from 0 to <num
{
getline(question2, line);
}
testgen << line << endl;
getline(question2, line);
testgen << line << endl;
question2.clear();
question2.seekg(0, ios::beg);
}
question2.close(); // No need to close. Destructor will do it for you
ifstream question3("questionanalytical.txt"); // Now you open the file as ifstream
if (!question3.is_open()) // Wrong check. Check for all flags
{
question3.open("questionanalytical.txt"); // Will not help in case of failure
}
// Now this is the 3rd time with the same code. So, put it into a function
for (i = 0; i < 2; i++) {
num = random(1, 12);
for (int i = 1; i <= num; i++)
{
getline(question3, line);
}
question3.clear();
question3.seekg(0, ios::beg);
testgen << line << endl;
}
question3.close();
testgen.close();
}
int main() {
test();
return 0;
}
And here one possible solution. With functions to handler similar parts of the code:
#include <iostream>
#include <string>
#include <fstream>
#include <random>
#include <vector>
#include <tuple>
// From the internet: https://en.cppreference.com/w/cpp/numeric/random/random_device
int random(int from, int to) {
std::random_device random_device;
std::mt19937 generator(random_device());
std::uniform_int_distribution<int> distribution(from, to);
return distribution(generator);
}
std::string readNthLineFromFile(std::ifstream& ifs, int n) {
// Reset file to the beginning
ifs.clear();
ifs.seekg(0, std::ios::beg);
// Default return string in case of error
std::string result{ "\n*** Error while reading a line from the source file\n" };
// If getline fails or ifs is in fail state, the string will be default
for (; std::getline(ifs, result) && (n != 0); n--);
// Give back the desired line
return result;
}
void generateQuestion(std::ifstream& sourceFileStream, std::ofstream& destinationFileStream, int n, const bool twoLines = false) {
// We want to prevent readin the same question again
int oldLineNumber = 0;
// For whatever reason, do this 2 times.
for (size_t i = 0U; i < 2; ++i) {
// If we want to read 2 consecutive lines, then we should not come up with the last kine in the file
if (twoLines & (n > 1)) --n;
// Get a random line number. But no duplicates in the 2 loops
int lineNumber{};
do {
lineNumber = random(1, n);
} while (lineNumber == oldLineNumber);
// For the next loop execution
oldLineNumber = lineNumber;
// Read the random line
std::string line{ readNthLineFromFile(sourceFileStream, lineNumber) };
// And write it to the destination file
destinationFileStream << line << "\n";
// If we want to read to lines in a row
if (twoLines) {
// Read next line
line = readNthLineFromFile(sourceFileStream, ++lineNumber);
// And write it to the destination file
destinationFileStream << line << "\n";
}
}
}
int main() {
const std::string destinationFilename{ "generatedTest.txt" };
const std::string questions1Filename{ "questiondesc.txt" };
const std::string questions2Filename{ "questionmcq.txt" };
const std::string questions3Filename{ "questionanalytical.txt" };
// Here we store the filenames and if one or 2 lines shall be read
std::vector<std::tuple<const std::string, const size_t, const bool>> source{
{ questions1Filename, 12U, false },
{ questions2Filename, 26U, true },
{ questions3Filename, 12U, false }
};
// Open the destination file and check, if it could be opened
if (std::ofstream destinationFileStream(destinationFilename); destinationFileStream) {
// Now open the first source file and generate the questions
for (const std::tuple<const std::string, const size_t, const bool>& t : source) {
// Open source file and check, if it could be opened
if (std::ifstream sourceFileStream(std::get<0>(t)); sourceFileStream) {
generateQuestion(sourceFileStream, destinationFileStream, std::get<1>(t), std::get<2>(t));
}
else {
std::cerr << "\n*** Error. Could not open source file '" << std::get<0>(t) << "'\n";
}
}
}
else {
std::cerr << "\n*** Error: Could not open destination file '" << destinationFilename << "'\n";
}
return 0;
}
I would like to read in a file like this:
13.3027 29.2191 2.39999
13.3606 29.1612 2.39999
13.3586 29.0953 2.46377
13.4192 29.106 2.37817
It has more than 1mio lines.
My current cpp code is:
loadCloud(const string &filename, PointCloud<PointXYZ> &cloud)
{
print_info("\nLoad the Cloud .... (this takes some time!!!) \n");
ifstream fs;
fs.open(filename.c_str(), ios::binary);
if (!fs.is_open() || fs.fail())
{
PCL_ERROR(" Could not open file '%s'! Error : %s\n", filename.c_str(), strerror(errno));
fs.close();
return (false);
}
string line;
vector<string> st;
while (!fs.eof())
{
getline(fs, line);
// Ignore empty lines
if (line == "")
{
std::cout << " this line is empty...." << std::endl;
continue;
}
// Tokenize the line
boost::trim(line);
boost::split(st, line, boost::is_any_of("\t\r "), boost::token_compress_on);
cloud.push_back(PointXYZ(float(atof(st[0].c_str())), float(atof(st[1].c_str())), float(atof(st[2].c_str()))));
}
fs.close();
std::cout<<" Size of loaded cloud: " << cloud.size()<<" points" << std::endl;
cloud.width = uint32_t(cloud.size()); cloud.height = 1; cloud.is_dense = true;
return (true);
}
Reading this file currently takes really long. I would like to speed this up any ideas how to do that?
You can just read the numbers instead of the whole line plus parsing, as long as the numbers always come in sets of three.
void readFile(const std::string& fileName)
{
std::ifstream infile(fileName);
float vertex[3];
int coordinateCounter = 0;
while (infile >> vertex[coordinateCounter])
{
coordinateCounter++;
if (coordinateCounter == 3)
{
cloud.push_back(PointXYZ(vertex[0], vertex[1], vertex[2]));
coordinateCounter = 0;
}
}
}
Are you running optimised code? On my machine your code reads a million values in 1800ms.
The trim and the split are probably taking most of the time. If there is white space at the beginning of the string trim has to copy the whole string contents to erase the first characters. split is creating new string copies, you can optimise this by using string_view to avoid the copies.
As your separators are white space you can avoid all the copies with code like this:
bool loadCloud(const string &filename, std::vector<std::array<float, 3>> &cloud)
{
ifstream fs;
fs.open(filename.c_str(), ios::binary);
if (!fs)
{
fs.close();
return false;
}
string line;
vector<string> st;
while (getline(fs, line))
{
// Ignore empty lines
if (line == "")
{
continue;
}
const char* first = &line.front();
const char* last = first + line.length();
std::array<float, 3> arr;
for (float& f : arr)
{
auto result = std::from_chars(first, last, f);
if (result.ec != std::errc{})
{
return false;
}
first = result.ptr;
while (first != last && isspace(*first))
{
first++;
}
}
if (first != last)
{
return false;
}
cloud.push_back(arr);
}
fs.close();
return true;
}
On my machine this code runs in 650ms. About 35% of the time is used by getline, 45% by parsing the floats, the remaining 20% is used by push_back.
A few notes:
I've fixed the while(!fs.eof()) issue by checking the state of the stream after calling getline
I've changed the result to an array as your example wasn't a mcve so I didn't have a definition of PointCloud or PointXYZ, its possible that these types are the cause of your slowness.
If you know the number of lines (or at least an approximation) in advance then reserving the size of the vector would improve performance
I want to write/read data from a file. Is it possible to divide the file (inside the code) in multiple Strings/Sections? Or read data untill a specific line?
Just like: "Read the Data untill line 32, put it inside a String, read the next 32 lines and put it into another string"
Im already know how to read and find data with seekp but i dont really like it because my code always gets to long.
I already found some code but i dont understand it how it works:
dataset_t* DDS::readFile(std::string filename)
{
dataset_t* dataset = NULL;
std::stringstream ss;
std::ifstream fs;
uint8_t tmp_c;
try
{
fs.open(filename.c_str(), std::ifstream::in);
if (!fs)
{
std::cout << "File not found: " << filename << std::endl;
return NULL;
}
while(fs.good())
{
fs.read((char*)&tmp_c, 1);
if (fs.good()) ss.write((char*)&tmp_c, 1);
}
fs.close();
dataset = new dataset_t();
const uint32_t bufferSize = 32;
char* buffer = new char[bufferSize];
uint32_t count = 1;
while(ss.good())
{
ss.getline(buffer, bufferSize);
dataitem_t dataitem;
dataitem.identifier = buffer;
dataitem.count = count;
dataset->push_back(dataitem);
count++;
}
return dataset;
}
catch(std::exception e)
{
cdelete(dataset);
return NULL;
}
}
The Code edits a binary save file.
Or can someone link me a website where i can learn more about buffers and stringstreams?
You could create some classes to model your requirement: a take<N> for 'grab 32 lines', and a lines_from to iterate over lines.
Your lines_from class would take any std::istream: something encoded, something zipped, ... as long as it gives you a series of characters. The take<N> would convert that into array<string, N> chunks.
Here's a snippet that illustrates it:
int main(){
auto lines = lines_from{std::cin};
while(lines.good()){
auto chunk = take<3>(lines);
std::cout << chunk[0][0] << chunk[1][0] << chunk[2][0] << std::endl;
}
}
And here are the supporting classes and functions:
#include <iostream>
#include <array>
class lines_from {
public:
std::istream ∈
using value_type = std::string;
std::string operator*() {
std::string line;
std::getline(in, line);
return line;
}
bool good() const {
return in.good();
}
};
template<int N, class T>
auto take(T &range){
std::array<typename T::value_type, N> value;
for (auto &e: value) { e = *range; }
return value;
}
(demo on cpp.sh)
When I try to read a file to a buffer, it always appends random characters to the end of the buffer.
char* thefile;
std::streampos size;
std::fstream file(_file, std::ios::in | std::ios::ate);
if (file.is_open())
{
size = file.tellg();
std::cout << "size: " << size;
thefile = new char[size]{0};
file.seekg(0, std::ios::beg);
file.read(thefile, size);
std::cout << thefile;
}
int x = 0;
While my original text in my file is: "hello"
The output becomes: "helloýýýý««««««««þîþîþ"
Could anyone help me as to what is happening here? Thanks
From the C++ docs: http://cplusplus.com/reference/istream/istream/read
"This function simply copies a block of data, without checking its contents nor appending a null character at the end."
So your string misses the trailing null character which indicates the end of the string. In this case cout will just continue printing characters from what is beyond thefile in memory.
Add a '\0' at the end of your string.
If the file is not opened with ios::binary mode, you cannot assume that the position returned by tellg() will give you the number of chars that you will read. Text mode operation may perform some transformations on the flow (f.ex: on windows, it will convert "\r\n" in the file in "\n", so you might find out a size of 2 but read only 1)
Anyway, read() doesn't add a null terminator.
Finally, you must allocate one more character than the size that you expect due to the null terminator that you have to add. Otherwise you risk a buffer overflow when you add it.
You should verify how many chars were really read with gcount(), and set a null terminator to your string accordingly.
thefile = new char[size + 1]{0}; // one more for the trailing null
file.seekg(0, std::ios::beg);
if (file.read(thefile, size))
thefile[size]=0; // successfull read: all size chars were read
else thefile[file.gcount()]=0; // or less chars were read due to text mode
Here's a better way of reading your collection:
#include <vector>
#include <fstream>
#include <iostream>
#include <sstream>
#include <string>
#include <cstdint>
#include <iterator>
template<class T>
void Write(std::string const & path, T const & value, std::ios_base::openmode mode)
{
if (auto stream = std::ofstream(path, mode))
{
Write(stream, value);
stream.close();
}
else
{
throw std::runtime_error("failed to create/open stream");
}
}
template<class T>
void Write(std::ostream & stream, T const & value)
{
std::copy(value.begin(), value.end(), std::ostreambuf_iterator<char>(stream));
if (!stream)
{
throw std::runtime_error("failed to write");
}
}
template<class T>
void Read(std::istream & stream, T & output)
{
auto eof = std::istreambuf_iterator<char>();
output = T(std::istreambuf_iterator<char>(stream), eof);
if(!stream)
{
throw std::runtime_error("failed to read stream");
}
}
template<class T>
void Read(std::string const & path, T & output)
{
if (auto stream = std::ifstream(path, std::ios::in | std::ios::binary))
{
Read(stream, output);
stream.close();
}
else
{
throw std::runtime_error("failed to create stream");
}
}
int main(void)
{
// Write and read back text.
{
auto const s = std::string("I'm going to write this string to a file");
Write("temp.txt", s, std::ios_base::trunc | std::ios_base::out);
auto t = std::string();
Read("temp.txt", t);
}
// Write and read back a set of ints.
{
auto const v1 = std::vector<int>() = { 10, 20, 30, 40, 50 };
Write("temp.txt", v1, std::ios_base::trunc | std::ios_base::out | std::ios_base::binary);
auto v2 = std::vector<int>();
Read("temp.txt", v2);
}
return 0;
}
Pass in an iterable container rather than using "new".
I have seen many posts but didn't find something like i want.
I am getting wrong output :
ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ...... // may be this is EOF character
Going into infinite loop.
My algorithm:
Go to end of file.
decrease position of pointer by 1 and read character by
character.
exit if we found our 10 lines or we reach beginning of file.
now i will scan the full file till EOF and print them //not implemented in code.
code:
#include<iostream>
#include<stdio.h>
#include<conio.h>
#include<stdlib.h>
#include<string.h>
using namespace std;
int main()
{
FILE *f1=fopen("input.txt","r");
FILE *f2=fopen("output.txt","w");
int i,j,pos;
int count=0;
char ch;
int begin=ftell(f1);
// GO TO END OF FILE
fseek(f1,0,SEEK_END);
int end = ftell(f1);
pos=ftell(f1);
while(count<10)
{
pos=ftell(f1);
// FILE IS LESS THAN 10 LINES
if(pos<begin)
break;
ch=fgetc(f1);
if(ch=='\n')
count++;
fputc(ch,f2);
fseek(f1,pos-1,end);
}
return 0;
}
UPD 1:
changed code: it has just 1 error now - if input has lines like
3enil
2enil
1enil
it prints 10 lines only
line1
line2
line3ÿine1
line2
line3ÿine1
line2
line3ÿine1
line2
line3ÿine1
line2
PS:
1. working on windows in notepad++
this is not homework
also i want to do it without using any more memory or use of STL.
i am practicing to improve my basic knowledge so please don't post about any functions (like tail -5 tc.)
please help to improve my code.
Comments in the code
#include <stdio.h>
#include <stdlib.h>
int main(void)
{
FILE *in, *out;
int count = 0;
long int pos;
char s[100];
in = fopen("input.txt", "r");
/* always check return of fopen */
if (in == NULL) {
perror("fopen");
exit(EXIT_FAILURE);
}
out = fopen("output.txt", "w");
if (out == NULL) {
perror("fopen");
exit(EXIT_FAILURE);
}
fseek(in, 0, SEEK_END);
pos = ftell(in);
/* Don't write each char on output.txt, just search for '\n' */
while (pos) {
fseek(in, --pos, SEEK_SET); /* seek from begin */
if (fgetc(in) == '\n') {
if (count++ == 10) break;
}
}
/* Write line by line, is faster than fputc for each char */
while (fgets(s, sizeof(s), in) != NULL) {
fprintf(out, "%s", s);
}
fclose(in);
fclose(out);
return 0;
}
There are a number of problems with your code. The most
important one is that you never check that any of the functions
succeeded. And saving the results an ftell in an int isn't
a very good idea either. Then there's the test pos < begin;
this can only occur if there was an error. And the fact that
you're putting the results of fgetc in a char (which results
in a loss of information). And the fact that the first read you
do is at the end of file, so will fail (and once a stream enters
an error state, it stays there). And the fact that you can't
reliably do arithmetic on the values returned by ftell (except
under Unix) if the file was opened in text mode.
Oh, and there is no "EOF character"; 'ÿ' is a perfectly valid
character (0xFF in Latin-1). Once you assign the return value
of fgetc to a char, you've lost any possibility to test for
end of file.
I might add that reading backwards one character at a time is
extremely inefficient. The usual solution would be to allocate
a sufficiently large buffer, then count the '\n' in it.
EDIT:
Just a quick bit of code to give the idea:
std::string
getLastLines( std::string const& filename, int lineCount )
{
size_t const granularity = 100 * lineCount;
std::ifstream source( filename.c_str(), std::ios_base::binary );
source.seekg( 0, std::ios_base::end );
size_t size = static_cast<size_t>( source.tellg() );
std::vector<char> buffer;
int newlineCount = 0;
while ( source
&& buffer.size() != size
&& newlineCount < lineCount ) {
buffer.resize( std::min( buffer.size() + granularity, size ) );
source.seekg( -static_cast<std::streamoff>( buffer.size() ),
std::ios_base::end );
source.read( buffer.data(), buffer.size() );
newlineCount = std::count( buffer.begin(), buffer.end(), '\n');
}
std::vector<char>::iterator start = buffer.begin();
while ( newlineCount > lineCount ) {
start = std::find( start, buffer.end(), '\n' ) + 1;
-- newlineCount;
}
std::vector<char>::iterator end = remove( start, buffer.end(), '\r' );
return std::string( start, end );
}
This is a bit weak in the error handling; in particular, you
probably want to distinguish the between the inability to open
a file and any other errors. (No other errors should occur,
but you never know.)
Also, this is purely Windows, and it supposes that the actual
file contains pure text, and doesn't contain any '\r' that
aren't part of a CRLF. (For Unix, just drop the next to the
last line.)
This can be done using circular array very efficiently.
No additional buffer is required.
void printlast_n_lines(char* fileName, int n){
const int k = n;
ifstream file(fileName);
string l[k];
int size = 0 ;
while(file.good()){
getline(file, l[size%k]); //this is just circular array
cout << l[size%k] << '\n';
size++;
}
//start of circular array & size of it
int start = size > k ? (size%k) : 0 ; //this get the start of last k lines
int count = min(k, size); // no of lines to print
for(int i = 0; i< count ; i++){
cout << l[(start+i)%k] << '\n' ; // start from in between and print from start due to remainder till all counts are covered
}
}
Please provide feedback.
int end = ftell(f1);
pos=ftell(f1);
this tells you the last point at file, so EOF.
When you read, you get the EOF error, and the ppointer wants to move 1 space forward...
So, i recomend decreasing the current position by one.
Or put the fseek(f1, -2,SEEK_CUR) at the beginning of the while loop to make up for the fread by 1 point and go 1 point back...
I believe, you are using fseek wrong. Check man fseek on the Google.
Try this:
fseek(f1, -2, SEEK_CUR);
//1 to neutrialize change from fgect
//and 1 to move backward
Also you should set position at the beginning to the last element:
fseek(f1, -1, SEEK_END).
You don't need end variable.
You should check return values of all functions (fgetc, fseek and ftell). It is good practise. I don't know if this code will work with empty files or sth similar.
Use :fseek(f1,-2,SEEK_CUR);to back
I write this code ,It can work ,you can try:
#include "stdio.h"
int main()
{
int count = 0;
char * fileName = "count.c";
char * outFileName = "out11.txt";
FILE * fpIn;
FILE * fpOut;
if((fpIn = fopen(fileName,"r")) == NULL )
printf(" file %s open error\n",fileName);
if((fpOut = fopen(outFileName,"w")) == NULL )
printf(" file %s open error\n",outFileName);
fseek(fpIn,0,SEEK_END);
while(count < 10)
{
fseek(fpIn,-2,SEEK_CUR);
if(ftell(fpIn)<0L)
break;
char now = fgetc(fpIn);
printf("%c",now);
fputc(now,fpOut);
if(now == '\n')
++count;
}
fclose(fpIn);
fclose(fpOut);
}
I would use two streams to print last n lines of the file:
This runs in O(lines) runtime and O(lines) space.
#include<bits/stdc++.h>
using namespace std;
int main(){
// read last n lines of a file
ifstream f("file.in");
ifstream g("file.in");
// move f stream n lines down.
int n;
cin >> n;
string line;
for(int i=0; i<k; ++i) getline(f,line);
// move f and g stream at the same pace.
for(; getline(f,line); ){
getline(g, line);
}
// g now has to go the last n lines.
for(; getline(g,line); )
cout << line << endl;
}
A solution with a O(lines) runtime and O(N) space is using a queue:
ifstream fin("file.in");
int k;
cin >> k;
queue<string> Q;
string line;
for(; getline(fin, line); ){
if(Q.size() == k){
Q.pop();
}
Q.push(line);
}
while(!Q.empty()){
cout << Q.front() << endl;
Q.pop();
}
Here is the solution in C++.
#include <iostream>
#include <string>
#include <exception>
#include <cstdlib>
int main(int argc, char *argv[])
{
auto& file = std::cin;
int n = 5;
if (argc > 1) {
try {
n = std::stoi(argv[1]);
} catch (std::exception& e) {
std::cout << "Error: argument must be an int" << std::endl;
std::exit(EXIT_FAILURE);
}
}
file.seekg(0, file.end);
n = n + 1; // Add one so the loop stops at the newline above
while (file.tellg() != 0 && n) {
file.seekg(-1, file.cur);
if (file.peek() == '\n')
n--;
}
if (file.peek() == '\n') // If we stop in the middle we will be at a newline
file.seekg(1, file.cur);
std::string line;
while (std::getline(file, line))
std::cout << line << std::endl;
std::exit(EXIT_SUCCESS);
}
Build:
$ g++ <SOURCE_NAME> -o last_n_lines
Run:
$ ./last_n_lines 10 < <SOME_FILE>