txt file parsing c++ in to vector more efficiently - c++

My program uses ifstream() and getline() to parse a text file in to objects that are two vectors deep. i.e vector inside vector. The inner vector contains over 250000 string objects once the text file is finished loading.
this is painfully slow. Is there an STD alternative that is more efficient than using ifstream() and getline() ?
Thanks
UPDATE:
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <regex>
using namespace std;
class Word
{
private:
string moniker = "";
vector <string> definition;
string type = "";
public:
void setMoniker(string m) { this->moniker = m; }
void setDefinition(string d) { this->definition.push_back(d); }
void setType(string t) { this->type = t; }
int getDefinitionSize() { return this->definition.size(); }
string getMoniker() { return this->moniker; }
void printDefinition()
{
for (int i = 0; i < definition.size(); i++)
{
cout << definition[i] << endl;
}
}
string getType() { return this->type; }
};
class Dictionary
{
private:
vector<Word> Words;
public:
void addWord(Word w) { this->Words.push_back(w); }
Word getWord(int i) { return this->Words[i]; }
int getTotalNumberOfWords() { return this->Words.size(); }
void loadDictionary(string f)
{
const regex _IS_DEF("[\.]|[\ ]"),
_IS_TYPE("^misc$|^n$|^adj$|^v$|^adv$|^prep$|^pn$|^n_and_v$"),
_IS_NEWLINE("\n");
string line;
ifstream dict(f);
string m, t, d = "";
while (dict.is_open())
{
while (getline(dict, line))
{
if (regex_search(line, _IS_DEF))
{
d = line;
}
else if (regex_search(line, _IS_TYPE))
{
t = line;
}
else if (!(line == ""))
{
m = line;
}
else
{
Word w;
w.setMoniker(m);
w.setType(t);
w.setDefinition(d);
this->addWord(w);
}
}
dict.close();
}
}
};
int main()
{
Dictionary dictionary;
dictionary.loadDictionary("dictionary.txt");
return 0;
}

You should reduce your memory allocations. Having a vector of vectors is usually not a good idea, because every inner vector does its own new and delete.
You should reserve() the approximate number of elements you need in the vector at the start.
You should use fgets() if you don't actually need to extract std::string to get your work done. For example if the objects can be parsed from char arrays, do that. Make sure to read into the same string buffer every time, rather than creating new buffers.
And most important of all, use a profiler.

Related

C++ Exception "Access violation reading location"

#include <iostream>
#include <fstream>
using namespace std;
struct review {
string text;
string date;
};
void getRegistry(int i) {
review* reg = new review;
ifstream file;
file.open("test.txt", ios::binary);
if (file) {
file.seekg(i * sizeof(review), ios::beg);
file.read(reinterpret_cast<char*>(reg), sizeof(review));
cout << reg->text;
file.close();
}
delete reg;
}
void generateBinary()
{
ofstream arq("test.txt", ios::binary);
review x;
x.text = "asdasdasd";
x.date = "qweqweqwe";
for (int i = 1; i <= 1000000; i++)
{
arq.write(reinterpret_cast<const char*>(&x), sizeof(review));
}
arq.close();
}
int main() {
generateBinary();
getRegistry(2);
return 0;
}
Hello, I'm trying to make a program which writes several "reviews" to a binary file, then reads a certain registry. The program seems to work, but, in the end, it always throws an exception: "Exception thrown at 0x00007FF628E58C95 in trabalho.exe: 0xC0000005: Access violation reading location 0xFFFFFFFFFFFFFFFF." How can I solve this? Thank you!
The problem is that you can't read/write std::string objects they way you are. std::string holds a pointer to variable-length character data that is stored elsewhere in memory. Your code is not accounting for that fact.
To be able to seek to a specific object in a file of objects the way you are attempting, you have to use fixed-sized objects, eg:
#include <iostream>
#include <fstream>
#include <cstring>
using namespace std;
struct review {
char text[12];
char date[12];
};
void getRegistry(int i) {
ifstream file("test.txt", ios::binary);
if (file) {
if (!file.seekg(i * sizeof(review), ios::beg)) throw ...;
review reg;
if (!file.read(reinterpret_cast<char*>(&reg), sizeof(reg))) throw ...;
cout << reg.text;
}
}
void generateBinary()
{
ofstream arq("test.txt", ios::binary);
review x = {};
strncpy(x.text, "asdasdasd", sizeof(x.text)-1);
strncpy(x.date, "qweqweqwe", sizeof(x.date)-1);
for (int i = 1; i <= 1000000; ++i) {
if (!arq.write(reinterpret_cast<char*>(&x), sizeof(x))) throw ...;
}
}
int main() {
generateBinary();
getRegistry(2);
return 0;
}
Otherwise, to deal with variable-length data, you need to (de)serialize each object instead, eg:
#include <iostream>
#include <fstream>
#include <cstdint>
using namespace std;
struct review {
string text;
string date;
};
string readStr(istream &is) {
string s;
uint32_t len;
if (!is.read(reinterpret_cast<char*>(&len), sizeof(len))) throw ...;
if (len > 0) {
s.resize(len);
if (!is.read(s.data(), len)) throw ...;
}
return s;
}
void skipStr(istream &is) {
uint32_t len;
if (!is.read(reinterpret_cast<char*>(&len), sizeof(len))) throw ...;
if (len > 0) {
if (!is.ignore(len)) throw ...;
}
}
void writeStr(ostream &os, const string &s) {
uint32_t len = s.size();
if (!os.write(reinterpret_cast<char*>(&len), sizeof(len)) throw ...;
if (!os.write(s.c_str(), len)) throw ...;
}
review readReview(istream &is) {
review r;
r.text = readStr(is);
r.date = readStr(is);
return r;
}
void skipReview(istream &is) {
skipStr(is);
skipStr(is);
}
void writeReview(ostream &os, const review &r) {
writeStr(is, r.text);
writeStr(is, r.date);
}
void getRegistry(int i) {
ifstream file("test.txt", ios::binary);
if (file) {
while (i--) skipReview(file);
review reg = readReview(file);
cout << reg.text;
}
}
void generateBinary()
{
ofstream arq("test.txt", ios::binary);
review x;
x.text = "asdasdasd";
x.date = "qweqweqwe";
for (int i = 1; i <= 1000000; ++i) {
writeReview(arq, x);
}
}
int main() {
generateBinary();
getRegistry(2);
return 0;
}
The operator sizeof (review) does not return the length of containing strings. This is due to the fact that string class contain pointers to real strings, which are located in a separated location of the memory, allocated dynamically. You should use explicitly the length of strings, and write explicitly the strings instead of the class. Same thing with reading from file. Read strings first, then attribute to review.

How to convert this vector code into a class?

I was trying to put this code into a class but I couldn't manage to do it. The job of the function is pulling team names from a .txt file and putting them in a vector. I think the main problem is I couldn't select the right function return type.
This is the teams.txt: (The names before the "-" symbol are teams. Other names are unrelated with my question but they are coachs of the teams.)
Trabzonspor-Abdullah Avcı+
Fenerbahçe-Vítor Pereira+
Beşiktaş-Sergen Yalçın+
Galatasaray-Fatih Terim+
İstanbul Başakşehir-Emre Belözeoğlu+
Alanyaspor-Bülent Korkmaz+
Fatih Karagümrük-Francesco Farioli+
Gaziantep-Erol Bulut+
Adana Demirspor-Vincenzo Montella+
Ankara Dinc-Nestor El Maestro+
Antalyaspor-Nuri Şahin+
Kayserispor-Hikmet Karaman+
Yeni Malatyaspor-Marius Sumudica+
Konyaspor-İlhan Palut+
Sivasspor-Rıza Çalımbay+
Hatayspor-Ömer Erdoğan+
Giresunspor-Hakan Keleş+
Kasımpaşa-Hakan Kutlu+
And this is the my code who does the putting them in a vector:
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
using namespace std; //I know that's a bad practice but i just need to do this for while
std::string process(std::string const& s) //A function to seperate teams from the coaches
{
string::size_type pos = s.find('-');
if (pos != string::npos)
{
return s.substr(0, pos);
}
else
{
return s;
}
}
int main() {
ifstream readTeam("teams.txt");
if (!readTeam) { //checking that successfully opened the file.
std::cerr << "Error while opening the file.\n";
return 1;
}
vector<std::string> teams;
string team;
while (getline(readTeam, team)) {
teams.push_back(process(team));
}
readTeam.close();
int g = 1;//for printing the teams, just for displaying it. doesn't have to in a class.
for (const auto& i : teams) {
cout << g;
cout << i << endl;
g++;
}
return 0;
}
And that's what i did(tried) to make it a class:
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
using namespace std;
std::string process(std::string const& s)
{
string::size_type pos = s.find('-');
if (pos != string::npos)
{
return s.substr(0, pos);
}
else
{
return s;
}
}
class readFile {
public:
void setTxtName(string);
vector<unsigned char> const& getTeam() const{
}
vector<string> teams;
private:
string fileName;
};
int main() {
readFile pullTeams;
pullTeams.setTxtName("teams.txt");
return 0;
}
void readFile::setTxtName(string txtName) {
fileName = txtName;
}
vector<string> const& readFile::getTeam { //problem is defining it(I think). So I couldn't add my main code int it..
return teams;
}
Anything helps, thank you!
I did a little different research based on the Botje's comment. And I manage to create an answer based on here. Thanks.
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
using namespace std;
std::string process(std::string const& s){
string::size_type pos = s.find('-');
if (pos != string::npos){
return s.substr(0, pos);
}
else{
return s;
}
}
class readFile {
public:
vector<string> getTxt();
bool read(string);
private:
vector<string> teams;
string team;
ifstream txt;
};
int main() {
vector<string> teams;
readFile team;
if (team.read("teaams.txt") == true)
teams = team.getTxt();
int g = 1;//for printing the teams, just for displaying it. doesn't have to in a class.
for (const auto& i : teams) {
cout << g;
cout << i << endl;
g++;
}
return 0;
}
bool readFile::read(string txtName) {
ifstream txt;
string team;
txt.open(txtName.c_str());
if (!txt.is_open())
return false;
while (getline(txt, team))
teams.push_back(process(team));
return true;
}
vector<string> readFile::getTxt() {
return teams;
}

Setter not changing the data from a vector within a class

In my program, I have a class that holds a vector of type integer. It is used to store distances. I have a function, that when called, should set values in the vector to 0's. (used for initializing). But when I go to check the size of the vector, it still says the vector is empty.
I have created multiple functions that check whether the vector is adding any elements, and it is not. I have a function, that within main, I call to see if the vector is empty, and it returns 0 (the vector has 0 elements in it).
int MLB::getDistanceSize()
{
return distances.size();
}
void MLB::setInitialDistances(int size)
{
for(int i = 0; i < size; i++)
{
this->distances.push_back(0);
}
}
class MLB
{
public:
//constructor
MLB();
~MLB();
int getDistanceSize();
void setInitialDistances(int size);
private:
vector<int> distances;
};
The input file is a csv file with each line consisting of:
stadium1,stadium2,distance
so sample input file is:
AT&T Park,Safeco Field,680
AT&T Park,Oakland–Alameda County Coliseum,50
Angel Stadium,Petco Park,110
Angel Stadium,Dodger Stadium,50
Busch Stadium,Minute Maid Park,680
Busch Stadium,Great American Ball Park,310
Busch Stadium,Target Field,465
Busch Stadium,Kauffman Stadium,235
etc...
I am using qt, and this is where I am calling the functions themselves. All information is stored into a map, and the other getters work perfectly fine. Sorry for making this a lot more confusing than the problem really is, any help is greatly appreciated.
// key and value, key is the team name, value is the MLB stadium information
struct entry
{
string key;
MLB value;
};
class Map
{
public:
//Public default constructor
Map();
//Public default destructor
~Map();
// returns entry of the map
entry atIndex(int index);
// Inserts a key and its value using linear algorithm
void insert(const string& theKey, const MLB& value);
private:
vector<entry> thisTable;
int currentSize; //Integer variable for current size
};
functions for Map:
Map::Map()
{
currentSize = 0;
}
Map::~Map()
{
}
void Map::insert(const string& theKey, const MLB& value)
{
entry thisEntry;
thisEntry.key = theKey;
thisEntry.value = value;
thisTable.push_back(thisEntry);
currentSize+=1;
}
entry Map::atIndex(int index)
{
return thisTable.at(index);
}
//mainwindow constructor
mainWindow::mainWindow()
{
//Reads in input from first csv file, all works fine all data stored and can access it
string iStadium1;
string iStadium2;
string iDistance;
string previous;
int distance;
int index1;
int index2;
bool found;
ifstream csvFile2;
csvFile2.open("inputDistance.csv");
getline(csvFile2, iStadium1, ',');
while(!csvFile2.eof())
{
index1 = 0;
found = false;
while(!found)
{
if(thisMap.atIndex(index1).value.getStadiumName() == iStadium1)
{
thisMap.atIndex(index1).value.setInitialDistances(thisMap.mapSize());
cout << "Distance Size Test 1: " << thisMap.atIndex(index1).value.getDistanceSize() << endl;
found = true;
}
else
{
index1++;
}
}
previous = iStadium1;
while(iStadium1 == previous)
{
getline(csvFile2, iStadium2, ',');
getline(csvFile2, iDistance, '\n');
distance = stoi(iDistance);
index2 = 0;
found = false;
while(!found)
{
if(thisMap.atIndex(index2).value.getStadiumName() == iStadium2)
{
found = true;
cout << "Distance Size Test 2: " << thisMap.atIndex(index1).value.getDistanceSize() << endl;
// crashes here. Index out of bounds, size is 0 for some reason
thisMap.atIndex(index1).value.setDistance(index2, distance);
}
else
{
index2++;
}
}
getline(csvFile2, iStadium1, ',');
}
}
csvFile2.close();
}
I expect the vector to hold 30 slots (assuming the desired size passed into the function is 30) of value 0, rather than having an empty vector.
The code in your question works as expected after adding constructor and destructor (doing both nothing) :
#include <iostream>
#include <vector>
using namespace std;
class MLB
{
public:
//constructor
MLB();
~MLB();
int getDistanceSize();
void setInitialDistances(int size);
private:
vector<int> distances;
};
int MLB::getDistanceSize()
{
return distances.size();
}
void MLB::setInitialDistances(int size)
{
for(int i = 0; i < size; i++)
{
this->distances.push_back(0);
}
}
MLB::MLB() {
}
MLB::~MLB() {
}
int main()
{
MLB mlb;
mlb.setInitialDistances(30);
cout << mlb.getDistanceSize() << endl;
}
pi#raspberrypi:/tmp $ g++ d.cc
pi#raspberrypi:/tmp $ ./a.out
30
the vector is not empty but contains 30 times 0
if thisMap.atIndex(index1).value.setDistance(index2, distance); does nothing this is probably because atIndex(index1) returns a copy rather than a reference, so you modify a copy and the original is unchanged
For instance :
#include <iostream>
#include <vector>
using namespace std;
class C {
public:
vector<int> getv() { return v; } // return a copy
vector<int> & getvref() { return v; } // return the ref to the vector, not a copy
int len() { return v.size(); }
private:
vector<int> v;
};
int main()
{
C c;
c.getv().push_back(0); // modify a copy of v
cout << c.len() << endl;
c.getvref().push_back(0); // modify v
cout << c.len() << endl;
}
Compilation and execution :
pi#raspberrypi:/tmp $ g++ vv.cc
pi#raspberrypi:/tmp $ ./a.out
0
1
you edited you question and this is what I supposed :
entry Map::atIndex(int index)
{
return thisTable.at(index);
}
return a copy, must be
entry & Map::atIndex(int index)
{
return thisTable.at(index);
}

comparing functions in c++, short way?

I've been recently working on a program which consists basically of 24 variations of one function(below). Everything gets executed perfectly apart from the part where I try to compare functions(with eachother). I found out that it is possible to be done by writing 24 if-else statements, yet I am certain there is a shorter way. I've also tried with vectors but no luck for now. Thanks for any help!
one of 24 functions:
int funk1()
{
ifstream myfile ("file.txt");
string line;
int i;
class1 obj1;
obj1.atr1= "Somename";
obj1.atr2="GAATTC";
while (getline(myfile, line))
{
i = countSubstring(line, obj1.atr2);
obj1.sum += i;
};
cout<<obj1.sum<<": "<<obj1.atr1<<"\n";
return obj1.sum;
}
The main function:
int main(){
funk1();
funk2();
funk3();
funk4();
funk5();
funk6();
funk7();
funk8();
funk9();
funk10();
funk11();
funk12();
funk13();
funk14();
funk15();
funk16();
funk17();
funk18();
funk19();
funk20();
funk21();
funk22();
funk23();
funk24();
//This is one way to do it
if (funk18() > funk1())
{
cout<<funk18<<" is the biggest";
}
//...
}
Here is a clean and elegant c++11 solution:
#include <iostream>
#include <functional>
#include <vector>
#include <limits>
#include <algorithm>
using namespace std;
using MyFunc = std::function<int()>;
int f1() { return 1; }
int f2() { return 15;}
int f3() { return 3; }
int main() {
std::vector<MyFunc> my_functions = {f1, f2, f3};
int max = std::numeric_limits<int>::min();
for (auto const &f : my_functions) {
max = std::max(max, f());
}
cout << max << endl;
return 0;
}
if you want to store the results from functions instead, you could do:
std::vector<int> my_results;
my_results.reserve(my_functions.size());
for (auto const &f : my_functions) {
my_results.push_back(f());
}
auto max_it = std::max_element(std::begin(my_results), std::end(my_results));
cout << *max_it << endl;

Fail to store values to the list

It seems the attribute test aisbn is successfully storing the data invoking setCode(), setDigit(). But The trouble starts failing while I attempt these values to store into list<test> simul
The list attribute takes the value of digit after setDigit() but the code. How can I put both code and digit into the list attribute? I can't see where the problem is. The code:
#include <iostream>
#include <stdlib.h>
#include <string>
#include <fstream>
#include <list>
using namespace std;
class test
{
private:
string code;
int digit;
public:
//constructor
test(): code(""), digit(0) { }
//copy constructor
test(const test &other):
digit(other.digit)
{
for(unsigned int i=0; i < code.length(); i++)
code[i] = other.code[i];
}
//set up the private values
void setCode(const string &temp, const int num);
void setCode(const string &temp);
void setDigit(const int &num);
//return the value of the pointer character
const string &getCode() const;
const unsigned int getDigit() const;
};
const string& test::getCode() const
{
return code;
}
const unsigned int test::getDigit() const
{
return digit;
}
void test::setCode(const string &temp, const int num)
{
if((int)code.size() <= num)
{
code.resize(num+1);
}
code[num] = temp[num];
}
void test::setCode(const string &temp)
{
code = temp;
}
void test::setDigit(const int &num)
{
digit = num;
}
int main()
{
const string contents = "dfskr-123";
test aisbn;
list<test> simul;
list<test>::iterator testitr;
testitr = simul.begin();
int count = 0;
cout << contents << '\n';
for(int i=0; i < (int)contents.length(); i++)
{
aisbn.setCode(contents);
aisbn.setDigit(count+1);
simul.push_back(aisbn);
count++;
}
cout << contents << '\n';
/*for(; testitr !=simul.end(); simul++)
{
cout << testitr->getCode() << "\n";
}*/
}
It looks like you are having issues with your for loop, you need to modify your for loop like so:
for(testitr = simul.begin(); testitr !=simul.end(); testitr++)
^^^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^
although, push_back does not invalidate iterators for std::list I think it is more readable to set the iterator where you are using it. Based on your response you also need to modify the copy constructor:
test(const test &other): code(other.code), digit(other.digit) {}
^^^^^^^^^^^^^^^^
how about using the vector
std::vector<test> simul;
for(int i=0; i < (int)contents.length(); i++)
{
aisbn.setCode(contents);
aisbn.setDigit(count+1);
simul.push_back(aisbn);
count++;
}
iterators, pointers and references related to the container are invalidated.
Otherwise, only the last iterator is invalidated.