I want to extract data from a csv file but I have to get the number of rows and columns of the table first.
What I have so far is the following:
std::ifstream myfile(filename); // filename is a string and passed in by the constructor
if (myfile.is_open())
{
// First step: Get number of rows and columns of the matrix to initialize it.
// We have to close and re-open the file each time we want to work with it.
int rows = getRows(myfile);
std::ifstream myfile1(filename);
int columns = getColumns(myfile1);
if (rows == columns) // Matrix has to be quadratic.
{
std::ifstream myfile2(filename);
abwicklungsdreieck.set_Matrix(QuantLib::Matrix(rows, columns, 0)); // abwicklungsdreieck is initialised before
//...
}
else
{
std::cout << "\nNumber of rows has to equal number of columns.";
}
}
// [...]
int getRows(std::ifstream &myfile)
{
std::string line;
int rows = 0;
while (std::getline(myfile, line)) // While-loop simply counts rows.
{
rows++;
}
myfile.close();
return rows - 1;
}
int getColumns(std::ifstream &myfile)
{
std::string line;
char delimiter = ';';
size_t pos = 0;
int columns = 0;
while (std::getline(myfile, line) && columns == 0) // Consider first line in the .csv file.
{
line = line + ";";
while ((pos = line.find(delimiter)) != std::string::npos) // Counts columns.
{
line.erase(0, pos + 1);
columns++;
}
}
myfile.close();
return columns - 1;
}
This code is working. However, I have to open the file for three times which I do not like. Is there a way to evade this?
I was thinking about working with tempfiles in getRows() and getColumns() but the copying streams isn't possible since it doesn't make sense as I learned recently.
So, is there another way do that? Or can I for example evade the getline() and the line.erase() methods?
You can read the file line by line, convert each line to a stream, then read the columns on the stream:
std::ifstream myfile(filename);
if(!myfile) return 0;
std::string line;
while(std::getline(myfile, line))
{
std::stringstream ss(line);
std::string column;
while(std::getline(ss, column, ';'))
{
cout << column;
}
cout << "\n";
}
getline(myfile, line) will copy each row in to line.
Convert line to ss stream.
getline(ss, column, ';') will break the line in to columns.
Use std::stoi to convert the string in to integer.
If your matrix is based on std::vector, you can grow the vector one row at a time, so you don't need to know the size in advance.
#include <iostream>
#include <fstream>
#include <string>
#include <sstream>
#include <vector>
void readfile(const std::string &filename)
{
std::vector<std::vector<int>> matrix;
std::ifstream myfile(filename);
if(!myfile) return;
std::string buf;
while(std::getline(myfile, buf))
{
int maxrow = matrix.size();
std::stringstream ss(buf);
matrix.resize(maxrow + 1);
cout << "break in to columns:" << buf << "\n";
while(std::getline(ss, buf, ';'))
{
try {
int num = std::stoi(buf);
matrix[maxrow].push_back(num);
}
catch(...) { }
}
}
for(auto &row : matrix) {
for(auto col : row)
cout << col << "|";
cout << "\n";
}
}
Related
I'm just new to C++ and am studying how to read data from csv file.
I want to read the following csv data into vector. Each row is a vector. The file name is path.csv:
0
0 1
0 2 4
0 3 6 7
I use the following function:
vector<vector<int>> read_multi_int(string path) {
vector<vector<int>> user_vec;
ifstream fp(path);
string line;
getline(fp, line);
while (getline(fp, line)) {
vector<int> data_line;
string number;
istringstream readstr(line);
while (getline(readstr, number, ',')) {
//getline(readstr, number, ',');
data_line.push_back(atoi(number.c_str()));
}
user_vec.push_back(data_line);
}
return user_vec;
}
vector<vector<int>> path = read_multi_int("C:/Users/data/paths.csv");
Print funtion:
template <typename T>
void print_multi(T u)
{
for (int i = 0; i < u.size(); ++i) {
if (u[i].size() > 1) {
for (int j = 0; j < u[i].size(); ++j) {
//printf("%d ", u[i][j]);
cout << u[i][j] << " ";
}
printf("\n");
}
}
printf("\n");
}
Then I get
0 0 0
0 1 0
0 2 4
0 3 6 7
Zeros are added at the end of the rows. Is possible to just read the data from the csv file without adding those extra zeros? Thanks!
Based on the output you are seeing and the code with ',' commas, I beleive that your actual input data really looks like this:
A,B,C,D
0,,,
0,1,,
0,2,4,
0,3,6,7
So the main change is to replace atoi with strtol, as atoi will always return 0 on a failure to parse a number, but with strtol we can check if the parse succeeded.
That means that the solution is as follows:
vector<vector<int>> read_multi_int(string path) {
vector<vector<int>> user_vec;
ifstream fp(path);
string line;
getline(fp, line);
while (getline(fp, line)) {
vector<int> data_line;
string number;
istringstream readstr(line);
while (getline(readstr, number, ',')) {
char* temp;
char numberA[30];
int numberI = strtol(number.c_str(), &temp, 10);
if (temp == number || *temp != '\0' ||
((numberI == LONG_MIN || numberI == LONG_MAX) && errno == ERANGE))
{
// Could not convert
}else{
data_line.emplace_back(numberI);
}
}
user_vec.emplace_back(data_line);
}
return user_vec;
}
Then to display your results:
vector<vector<int>> path = read_multi_int("C:/Users/data/paths.csv");
for (const auto& row : path)
{
for (const auto& s : row) std::cout << s << ' ';
std::cout << std::endl;
}
Give the expected output:
0
0 1
0 2 4
0 3 6 7
Already very good, but there is one obvious error and another error in your print function. Please see, how I output the values, with simple range based for loops.
If your source file does not contain a comma (','), but a different delimiter, then you need to call std::getline with this different delimiter, in your case a blank (' '). Please read here about std::getline.
If we then use the following input
Header
0
0 1
0 2 4
0 3 6 7
with the corrected program.
#include <vector>
#include <fstream>
#include <iostream>
#include <string>
#include <sstream>
using namespace std;
vector<vector<int>> read_multi_int(string path) {
vector<vector<int>> user_vec;
ifstream fp(path);
string line;
getline(fp, line);
while (getline(fp, line)) {
vector<int> data_line;
string number;
istringstream readstr(line);
while (getline(readstr, number, ' ')) {
//getline(readstr, number, ',');
data_line.push_back(atoi(number.c_str()));
}
user_vec.push_back(data_line);
}
return user_vec;
}
int main() {
vector<vector<int>> path = read_multi_int("C:/Users/data/paths.csv");
for (vector<int>& v : path) {
for (int i : v) std::cout << i << ' ';
std::cout << '\n';
}
}
then we receive this as output:
0
0 1
0 2 4
0 3 6 7
Which is correct, but unfortunately different from your shown output.
So, your output routine, or some other code, may also have some problem.
Besides. If there is no comma, then you can take advantage of formatted input functions using the extraction operator >>. This will read your input until the next space and convert it automatically to a number.
Additionally, it is strongly recommended, to initialize all variables during definition. You should do this always.
Modifying your code to use formatted input, initialization, and, maybe, better variable names, then it could look like the below.
#include <vector>
#include <fstream>
#include <iostream>
#include <string>
#include <sstream>
using namespace std;
vector<vector<int>> multipleLinesWithIntegers(const string& path) {
// Here we will store the resulting 2d vector
vector<vector<int>> result{};
// Open the file
ifstream fp{ path };
// Read header line
string line{};
getline(fp, line);
// Now read all lines with numbers in the file
while (getline(fp, line)) {
// Here we will store all numbers of one line
vector<int> numbers{};
// Put the line into an istringstream for easier extraction
istringstream sline{ line };
int number{};
while (sline >> number) {
numbers.push_back(number);
}
result.push_back(numbers);
}
return result;
}
int main() {
vector<vector<int>> values = multipleLinesWithIntegers("C:/Users/data/paths.csv");
for (const vector<int>& v : values) {
for (const int i : v) std::cout << i << ' ';
std::cout << '\n';
}
}
And, the next step would be to use a some more advanced style:
#include <vector>
#include <fstream>
#include <iostream>
#include <string>
#include <sstream>
#include <iterator>
auto multipleLinesWithIntegers(const std::string& path) {
// Here we will store the resulting 2d vector
std::vector<std::vector<int>> result{};
// Open the file and check, if it could be opened
if (std::ifstream fp{ path }; fp) {
// Read header line
if (std::string line{}; getline(fp, line)) {
// Now read all lines with numbers in the file
while (getline(fp, line)) {
// Put the line into an istringstream for easier extraction
std::istringstream sline{ line };
// Get the numbers and add them to the result
result.emplace_back(std::vector(std::istream_iterator<int>(sline), {}));
}
}
else std::cerr << "\n\nError: Could not read header line '" << line << "'\n\n";
}
else std::cerr << "\n\nError: Could not open file '" << path << "'\n\n'";
return result;
}
int main() {
const std::vector<std::vector<int>> values{ multipleLinesWithIntegers("C:/Users/data/paths.csv") };
for (const std::vector<int>& v : values) {
for (const int i : v) std::cout << i << ' ';
std::cout << '\n';
}
}
Edit
You have shown your output routine. That should be changed to:
void printMulti(const std::vector<std::vector<int>>& u)
{
for (int i = 0; i < u.size(); ++i) {
if (u[i].size() > 0) {
for (int j = 0; j < u[i].size(); ++j) {
std::cout << u[i][j] << ' ';
}
std::cout << '\n';
}
}
std::cout << '\n';
}
My project is supposed to basically sum up the value of all ints on a line and print the word in the next line that amount of times. However, something is causing it to skip over the string line where the word but I am getting the summed value right.
Input file:
1,2,3
word
2,3,4
word2
Here is my code:
int main() {
std::ifstream in;
std::ofstream out;
std::string line;
in.open("input.txt");
out.open("output.txt");
while(std::getline(in, line)){
std::stringstream ss(line);
while(ss){
std::string word;
std::string number;
int a = 0;
while(std::getline(ss, number, ',')){
a = a + atoi(number.c_str());}
std::getline(ss, word);
for (int z = 0; z < a; z++){
out << word << ",";}
out << "\n";
}
}
return(0);
}
Output I'm getting:
,,,,,,
,,,,,,,,,
What I should be getting:
word,word,word,word,word,word
word2,word2,word2,word2,word2,word2,word2,word2
Change:
std::getline(ss, word);
To this:
// Gets the next line in input.txt and stores it in the variable 'word'
std::getline(in, word);
This is because during the while loop, ss is emptied (exhausted) by the nested while loop (the 2nd while loop) and has no words when it's contents are inserted into the variable 'word'.
Final code:
#include <iostream>
#include <fstream>
#include <sstream>
int main() {
std::ifstream in;
std::ofstream out;
std::string line;
in.open("input.txt");
out.open("output.txt");
while (std::getline(in, line)) {
std::stringstream ss(line);
while (ss) {
std::string word;
std::string number;
int a = 0;
while (std::getline(ss, number, ',')) {
a = a + atoi(number.c_str());
}
std::getline(in, word); // Changed line
for (int z = 0; z < a; z++) {
out << word << ",";
}
out << "\n";
}
}
return(0);
}
Please can you advise, why the inner loop runs only once?
I'd like to add suffix to each line of input file and then store the result in output file.
thanks
For example:
Input file contains:
AA
AB
AC
Suffix file contains:
_1
_2
Output file should contain:
AA_1
AB_1
AC_1
AA_2
AB_2
AC_2
My result is :
AA_1
AB_1
AC_1
Code:
int main()
{
string line_in{};
string line_suf{};
string line_out{};
ifstream inFile{};
ofstream outFile{"outfile.txt"};
ifstream suffix{};
inFile.open("combined_test.txt");
suffix.open("suffixes.txt");
if (!inFile.is_open() && !suffix.is_open()) {
perror("Error open");
exit(EXIT_FAILURE);
}
while (getline(suffix, line_suf)) {
while (getline(inFile, line_in))
{
line_out = line_in + line_suf;
outFile << line_out << endl;
}
inFile.close();
outFile.close();
}
}
IMHO, a better method is to read the files into vectors, then iterate through the vectors:
std::ifstream word_base_file("combined_test.txt");
std::ifstream suffix_file("suffixes.txt");
//...
std::vector<string> words;
std::vector<string> suffixes;
std::string text;
while (std::getline(word_base_file, text))
{
words.push_back(text);
}
while (std::getline(suffix_file, text))
{
suffixes.push_back(text);
}
//...
const unsigned int quantity_words(words.size());
const unsigned int quantity_suffixes(suffixes.size());
for (unsigned int i = 0u; i < quantity_words; ++i)
{
for (unsigned int j = 0; j < quantity_suffixes; ++j)
{
std::cout << words[i] << suffix[j] << "\n";
}
}
Edit 1: no vectors
If you haven't learned about vectors or like to thrash your storage device you could try this:
std::string word_base;
while (std::getline(inFile, word_base))
{
std::string suffix_text;
while (std::getline(suffixes, suffix_text))
{
std::cout << word_base << suffix_text << "\n";
}
suffixes.clear(); // Clear the EOF condition
suffixes.seekg(0); // Seek to the start of the file (rewind).
}
Remember, after the inner while loop, the suffixes file is at the end; no more reads can occur. Thus the file needs to be positioned at the start before reading. Also, the EOF state needs to be cleared before reading.
#include <stdio.h>
#include <stdlib.h>
#include <iostream>
#include <fstream>
#include <vector>
#include <sstream>
using namespace std;
int main()
{
string line;
ifstream infile ("Input.csv");
vector<string> table;
string word;
if(infile.is_open())
{
getline(infile,line);
istringstream iss(line);
while(!iss.eof())
{
getline(iss,word, ',');
table.push_back(word);
}
}
for(int index=0; index<11; ++index)
{
cout<< "Element" << index << ":" << table.at(index) << endl ;
}
infile.close();
}
In the above program I am reading values from input file and splitting based on comma and finally storing the values into a vector.
when I print vector I am able to view only the first line of input file.
Input file:
CountryCode,Country,ItemCode,Item,ElementGroup,ElementCode,Element,Year,Unit,Value,Flag
100,India,3010,Population - Est. & Proj.,511,511,Total Population - Both sexes,1961,1000,456950,
100,India,3010,Population - Est. & Proj.,511,511,Total Population - Both sexes,1962,1000,466337,
100,India,3010,Population - Est. & Proj.,511,511,Total Population - Both sexes,1963,1000,476025,
100,India,3010,Population - Est. & Proj.,511,511,Total Population - Both sexes,1964,1000,486039,
Output:
Element0:CountryCode
Element1:Country
Element2:ItemCode
Element3:Item
Element4:ElementGroup
Element5:ElementCode
Element6:Element
Element7:Year
Element8:Unit
Element9:Value
Element10:Flag
Problem: Only 1st line is being printed
I suggest rewriting it like this:
int main()
{
string line;
ifstream infile ("Input.csv");
vector<string> table;
string word;
while(getline(infile, line))
{
istringstream iss(line);
while(getline(iss,word, ','))
{
table.push_back(word);
}
}
for(int index=0; index<11; ++index)
{
cout<< "Element" << index << ":" << table.at(index) << endl ;
}
infile.close();
}
The stream will return false from getline and most other operations whenever it is invalid. So if it didn't open, the while loop won't run. And when it reaches EOF, the while loop will stop. Much simpler to read this way I think.
You have several issues, addressed below:
int main()
{
std::string line;
std::ifstream infile ("Input.csv");
std::vector<std::string> table;
while (std::getline(infile, line)) // this is the loop you want to read the file
{
std::istringstream iss(line);
std::string word;
while (std::getline(iss, word, ',')) // there are better ways to do this, but this will work
{
table.push_back(word);
}
}
for(int index=0; index<table.size(); ++index) // loop through the whole size
{
std::cout<< "Element" << index << ":" << table[index] << std::endl ;
}
infile.close();
return 0;
}
Alternatively, you can avoid the use of nested while loops altogether:
struct csv_reader : std::ctype<char>
{
csv_reader() : std::ctype<char>(get_table()) {}
static std::ctype_base::mask const* get_table()
{
static std::vector<std::ctype_base::mask> rc(table_size, std::ctype_base::mask());
rc['\n'] = std::ctype_base::space;
rc[','] = std::ctype_base::space;
return &rc[0];
}
};
int main()
{
std::string line;
std::ifstream infile ("Input.csv");
csv_reader reader;
infile.imbue(std::locale(std::locale(), &reader);
std::vector<std::string> table{std::istream_iterator<std::string>(infile), std::istream_iterator<std::string>()};
// or
//std::vector<std::string> table;
//std::copy(std::istream_iterator<std::string>(infile), std::istream_iterator<std::string>(), std::back_inserter(table));
for(int index=0; index<table.size(); ++index) // loop through the whole size
{
std::cout<< "Element" << index << ":" << table[index] << std::endl ;
}
infile.close();
return 0;
}
You read only one line of the file
if(infile.is_open())
{
getline(infile,line);
istringstream iss(line);
while(!iss.eof())
{
getline(iss,word, ',');
table.push_back(word);
}
}
If you need to read all lines of the file then you can write instead
while (getline(infile,line))
{
istringstream iss(line);
while(!iss.eof())
{
getline(iss,word, ',');
table.push_back(word);
}
}
You only read the first line.
Add a loop with a condition like
while (getline(infile, line)) {
...
}
Your for loop only print the first 11 posts from the array.
You should it to the lenght of the array.
Not sure what the syntax is for c++ for length of the table but that should be the issue.
for(int index=0; index<table.size(); ++index)
{
cout<< "Element" << index << ":" << table.at(index) << endl ;
}
Currently,
I have this string us,muscoy,Muscoy,CA,,34.1541667,-117.3433333.
I need to parse US and CA. I was able to parse US correctly by this:
std::string line;
std::ifstream myfile ("/Users/settingj/Documents/Country-State Parse/worldcitiespop.txt");
while( std::getline( myfile, line ) )
{
while ( myfile.good() )
{
getline (myfile,line);
//std::cout << line << std::endl;
std::cout << "Country:";
for (int i = 0; i < 2/*line.length()*/; i++)
{
std::cout << line[i];
}
}
}
But i'm having an issue parsing to CA.
Heres some code I dug up to find the amount of ',' occurrences in a string but I am having issues saying "parse this string between the 3rd and 4th ',' occurrence.
// Counts the number of ',' occurrences
size_t n = std::count(line.begin(), line.end(), ',');
std::cout << n << std::endl;
You can use boost::split function (or boost::tokenizer) for this purpose. It will split string into vector<string>.
std::string line;
std::vector<std::string> results;
boost::split(results, line, boost::is_any_of(","));
std::string state = results[3];
It's not for a class... Haha... it seems like a class question though...
I have the solution:
int count = 0;
for (int i = 0; i < line.length(); i++)
{
if (line[i] == ',')
count++;
if (count == 3){
std::cout << line[i+1];
if (line[i+1] == ',')
break;
}
}
Just had to think about it more :P
This is STL version, works pretty well for simple comma separated input files.
#include<fstream>
#include <string>
#include <iostream>
#include<vector>
#include<sstream>
std::vector<std::string> getValues(std::istream& str)
{
std::vector<std::string> result;
std::string line;
std::getline(str,line);
std::stringstream lineS(line);
std::string cell;
while(std::getline(lineS,cell,','))
result.push_back(cell);
return result;
}
int main()
{
std::ifstream f("input.txt");
std::string s;
//Create a vector or vector of strings for rows and columns
std::vector< std::vector<std::string> > v;
while(!f.eof())
v.push_back(getValues(f));
for (auto i:v)
{
for(auto j:i)
std::cout<<j<< " ";
std::cout<<std::endl;
}
}