Reading data from multiple .txt files with c++ - c++

I need to extract a series of information from multiple files (64 files) in a directory. Each file consists of several lines like:
Pair of Operators: 0& ins: 0& rmv: 0& weight: 0.124354
Pair of Operators: 1& ins: 1& rmv: 0& weight: 0.00672458
Pair of Operators: 2& ins: 2& rmv: 0& weight: 0.000467531
...
The information is stored in a vector. I wrote a code, however, I receive a segmentation fault error. Here is my code. Could anyone indicate where the problem comes from?
#include <iostream>
#include <fstream>
#include <stdio.h>
#include <string.h>
#include <vector>
#include <dirent.h>
using namespace std;
struct info {
char* name;
vector<vector<double>> weights; // dest-const-weight
};
int main(int c, char* v[]) {
struct dirent* pDirent;
DIR* pDir;
char buffer[50];
strcpy(buffer, v[1]);
if (c < 2) {
printf ("Usage: testprog <dirname>\n");
return 1;
}
pDir = opendir (buffer);
if (pDir == NULL) {
printf ("Cannot open directory '%s'\n", v[1]);
return 1;
}
vector<info> Information;
for (int inst = 0; inst < 64; inst++) {
info temp;
temp.weights.resize(9);
for (int j = 0; j < 9; j++)
temp.weights[j].resize(4);
Information.push_back(temp);
}
int cmp = 0;
while ((pDirent = readdir(pDir)) != NULL) {
if (strcmp(pDirent->d_name, ".") != 0 &&
strcmp(pDirent->d_name, "..") != 0) {
char line[1024];
ifstream file(pDirent->d_name);
Information[cmp].name = pDirent->d_name;
int oper = -1;
int dest = -1;
int ins = -1;
double weight = -1;
for (int l = 0; l < 36; l++) {
file.getline(line, 1024);
// cout<<line<<endl;
sscanf(line,
"Pair of Operators: %d& ins: %d& rmv: %d& weight: %f",
&oper, &dest, &ins, &weight);
// sscanf(line, "%*s %d%*s %d%*s %d%*S %f", &oper, &dest, &ins,
// &weight);
// cout<<oper<<" "<<dest<<" "<<ins<<" "<<weight<<endl;
Information[cmp].weights[dest][ins] = weight;
}
cmp++;
}
}
closedir(pDir);
return 0;
}

I can see two obvious seg faults.
First of all you copy v[1] into buffer. Do you provide any arguments to your program?
Second one I see on the fly is that you try to call readdir on a uninitialized pointer of DIR.
Try DIR* pDir = opendir(...);
You should always print a usage message when the argument count doesnt match!
if (c != 1) {
fprintf(stderr, "USAGE: %s needs exactly one argument!\n" v[0]);
return 0;
}
One more thing I forgot to mention. You can compile your program with the -g flag and try to use a debugger to find these errors.

Related

Why does the solution to a coding challenge seem so much more complex than mine, while mine is simpler? [closed]

Closed. This question needs to be more focused. It is not currently accepting answers.
Want to improve this question? Update the question so it focuses on one problem only by editing this post.
Closed 12 months ago.
Improve this question
I am following a C++ course which has one challenge in each section. For the most recent challenge, we were to take out three different types of data from a text file, where in one line there are three data types each separated by a tab (only two tabs, you will see).
1\tBox of 64 Pixels\t64 RGB pixels in a decorative box
2\tSense of Humor\tEspecially dry. Imported from England.
3\tBeauty\tInner beauty. No cosmetic surgery required!
4\tBar Code\tUnused. In original packaging.
The first type is the SKU number, the second is the name, and the third is the description. You have to place all this information in a struct.
My solution:
#include <iostream>
#include <cstdio>
#include <cstring>
#include <vector>
using namespace std;
struct CI
{
int sku = 0;
string name = "";
string desc = "";
};
const int MAXSTRING = 1024;
int main()
{
const char *itemsPath = "items.txt";
FILE *items = fopen(itemsPath, "r");
CI changed1;
CI changed2;
CI changed3;
CI changed4;
vector<CI> changed = {changed1, changed2, changed3, changed4};
vector<string> lines = vector<string>();
char buffer[MAXSTRING];
int tab = 0;
int counter = 0;
while (fgets(buffer, MAXSTRING, items))
{
lines.push_back(buffer);
}
for (string &s : lines)
{
for (char &c : s)
{
if (c == '\t')
{
tab++;
continue;
}
if (tab == 0)
{
changed[counter].sku = c - '0';
}
else if (tab == 1)
{
changed[counter].name += c;
}
else if (tab == 2)
{
changed[counter].desc += c;
}
}
tab = 0;
counter++;
if (counter == 4)
{
break;
}
}
for (int i = 0; i < 4; i++)
{
cout << "sku: " << changed[i].sku << ", name: " << changed[i].name << ", desc: " << changed[i].desc;
}
fclose(items);
return 0;
}
The actual solution:
// 08_solution.cpp by Bill Weinman <http://bw.org/>
// updated 2002-07-23
#include <cstdio>
#include <cerrno>
#include <cstdlib>
#include <cstring>
constexpr size_t maxstring = 1024; // size of line buffer
constexpr size_t name_size = 32; // size of name string
constexpr size_t desc_size = 128; // size of description string
constexpr const char *filename = "/Users/billw/Desktop/ExerciseFiles/Chap08/items.txt";
constexpr size_t max_split = 15;
constexpr char tab_char = '\t';
struct Item
{
int sku; // stock keeping unit
char name[name_size]; // item name
char desc[desc_size]; // item description
};
// str_seps(s) -- returns an array where each element
// represents the position of a separator in the string
// first element is a count
size_t *str_seps(const char *s, size_t len)
{
static size_t indicies[max_split + 1];
size_t &count = indicies[0];
if (len < 3)
return indicies;
for (size_t &z : indicies)
z = 0; // zero out the array
for (size_t i = 0; i < len; ++i)
{
if (s[i] == tab_char)
{
++count;
indicies[count] = i;
if (count >= max_split)
break;
}
}
return indicies;
}
int main()
{
char buf[maxstring]; // buffer for reading lines in file
// open the file
FILE *fr = fopen(filename, "r");
if (!fr)
{
const char *errstr = strerror(errno);
printf("cannot open file (%d): %s\n", errno, errstr);
return 1;
}
// read loop
while (fgets(buf, maxstring, fr))
{
size_t len = strnlen(buf, maxstring);
if (len <= 5)
continue;
// trim the newline from the end of the string
if (buf[len - 1] == '\n')
{
buf[len - 1] = 0;
--len;
}
size_t *split3 = str_seps(buf, len);
if (split3[0] < 2)
break;
buf[split3[1]] = buf[split3[2]] = 0; // change separators to terminators
Item current_item;
current_item.sku = atoi(buf);
strncpy(current_item.name, buf + split3[1] + 1, name_size - 1);
strncpy(current_item.desc, buf + split3[2] + 1, desc_size - 1);
printf("sku: %d, name: %s, desc: %s\n", current_item.sku, current_item.name, current_item.desc);
}
return 0;
}
EDITED: To be specific, is my code unsafe, and is the check to see if the file has been opened or not:
if (!fr)
{
const char *errstr = strerror(errno);
printf("cannot open file (%d): %s\n", errno, errstr);
return 1;
}
needed.
One hint on why it looks odd is at the very beginning:
// updated 2002-07-23
#include <cstdio>
#include <cerrno>
#include <cstdlib>
#include <cstring>
So 20 year old code, using only the C library, might not be great C++.
On the other hand, code like:
CI changed1;
CI changed2;
CI changed3;
CI changed4;
vector<CI> changed = {changed1, changed2, changed3, changed4};
is probably also only obvious to the person writing it. :-)
Guess it means the same as vector<CI> changed(4);, but I had to think about it for a while.

How to use mmap for integer input?

I have coded a program that uses mmap as input to fill a integer 2D vector from a .txt file. The code is part of a larger program and will be submitted to a competition. Is there a way to improve the speed using mmap, or by using a different way all together? Here is the code:
#include <fstream>
#include <vector>
#include <algorithm>
#include <cstring>
#include <iostream>
// for mmap:
#include <sys/mman.h>
#include <sys/stat.h>
#include <fcntl.h>
using namespace std;
const char* map_file(const char* fname, size_t& length);
int main()
{
auto start = std::chrono::high_resolution_clock::now();
size_t length;
auto f = map_file("erasmus.in", length);
auto l = f + length;
int i = 0;
bool flag = false;
string lines;
vector<vector<int> > students(10000); //The number of lines is predefined
const char* temp;
while (f && f!=l) {
string element = "";
temp = static_cast<const char*>(memchr(f, '\n', l-f));
for(f = f; f<=temp; f++)
{
if(!isspace(*f))
{
element += *f;
flag = true;
}
if(isspace(*f) && flag == true)
{
flag = false;
int assigned_element = stoi(element);
students[i].push_back(assigned_element);
element = "";
}
}
i++;
temp++;
}
auto finish = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> elapsed1 = finish - start;
FILE* output = fopen("erasmus.out", "w");
for (int i = 0; i < students.size(); i++)
{
for (int j = 0; j < students[i].size(); j++)
{
fprintf(output, "%d ", students[i][j]);
}
fprintf(output, "\n");
}
std::cout << "Elapsed time: " << elapsed1.count() << " s\n";
return 0;
}
void handle_error(const char* msg) {
perror(msg);
exit(255);
}
const char* map_file(const char* directory, size_t& length)
{
int fileDirectory = open(directory, O_RDONLY);
if (fileDirectory == -1)
handle_error("open");
// obtain file size
struct stat sb;
if (fstat(fileDirectory, &sb) == -1)
handle_error("fstat");
length = sb.st_size;
const char* map = static_cast<const char*>(mmap(NULL, length, PROT_READ, MAP_PRIVATE, fileDirectory, 0u));
if (map == MAP_FAILED)
handle_error("mmap");
return map;
}
The file will be executed on a linux system, if this helps to find the optimal answer. At the end of each line of the .txt
there is a space character (' ') and a newline('\n')

How does one locate a pointer error?

I am attempting to create a program to create a Markov chain but I am having pointer problems. When I run the Program I get a segmentation fault.
#include <stdio.h>
#include <cstring>
#include <cstdlib>
struct word;
struct nextword
{
word* sourceword;
word* next = 0;
};
int wordcount;
struct word
{
char* wordstr;
struct word* next = 0;
nextword* followingword = 0;
int nextwordcount = 0;
};
int main()
{
word* firstword = 0;
char * buffer = 0;
long length;
FILE * f = fopen ("alice.txt", "rb");
if (f)
{
fseek (f, 0, SEEK_END);
length = ftell (f);
fseek (f, 0, SEEK_SET);
buffer = (char *)malloc (length);
if (buffer)
{
fread (buffer, 1, length, f);
}
fclose (f);
}
if (buffer)
{
char wordbuffer[500];
int fileindex = 0;
while(fileindex < length-1)
{
int wordindex = 0;
while(buffer[fileindex] != ' ')
{
wordbuffer[wordindex] = buffer[fileindex];
wordindex++;
fileindex++;
}
if(wordindex != 0)
{
wordbuffer[wordindex] = '\0';
word* newword = (word*)malloc(sizeof(word));
char* newwordstr = (char*)malloc((strlen(wordbuffer)+1)*sizeof(char));
strcpy(newword->wordstr, newwordstr);
if(!firstword)
{
firstword = newword;
}
else
{
word* testword = firstword;
while(!testword->next)
{
testword = (testword->next);
}
testword->next = newword;
printf(newword->wordstr);
}
}
return 0;
}
}
else
{
return 1;
}
}
I attempted to remove the file reading part and replace it with a hard coded string, but the problem remained.
You might want to read about STL and use a list. Or use a C list, see a couple of examples,
Adding node in front of linklist
How to pop element from tail in linked list?
Trying to make linkedlist in C
Several problems. Fixed some. compiles.
I have annotated the code with places where you need to fix bounds checking, and the big problem was likely the strcpy to the struct word->wordstr uninitialized char*,
#include <stdio.h>
#include <cstring>
#include <cstdlib>
struct word;
struct nextword
{
word* sourceword;
word* next = 0;
};
int wordcount;
struct word
{
char* wordstr; //what do you think this pointer points to?
struct word* next = 0;
nextword* followingword = 0;
int nextwordcount = 0;
};
int main()
{
FILE* fh = NULL;
word* firstword = 0;
char* buffer = 0;
char* fname = "alice.txt";
long length = 0; //you did not initialize length
if ( (fh = fopen ("alice.txt", "rb")) )
{
//why not use fstat to get file size?
//why not use mmap to read file?
fseek (fh, 0, SEEK_END);
length = ftell (fh); //ok, length set here
fseek (fh, 0, SEEK_SET);
if( (buffer = (char *)malloc (length)) )
{
fread (buffer, 1, length, fh);
}
fclose (fh);
}
else
{
printf("error: cannot open %s",fname);
exit(1);
}
printf("read %s, %ld\n",fname,length);
if (!buffer)
{
printf("error: cannot open %s",fname);
exit(1);
//use exit, to return from main() //return 1;
}
//already checked buffer
{
int fileindex = 0;
//put wordbuffer after fileindex, avoids stackoverflow overwrite
char wordbuffer[500]; //500 bytes on stack, initialize?
memset(wordbuffer,0,sizeof(wordbuffer));
while(fileindex < length-1)
{
int wordindex = 0;
//several errors in this line, check for null terminator,
//check for newline, tab, basically any whitespace
//while(buffer[fileindex] != ' ')
while( buffer[fileindex] && buffer[fileindex] != ' ' )
{
wordbuffer[wordindex] = buffer[fileindex];
wordindex++;
fileindex++;
//here is another error, do not overflow your stack based buffer
if( wordindex>sizeof(buffer)-1 ) break; //do not overflow buffer
}
wordbuffer[wordindex] = '\0'; //terminate wordbuffer
//since you chose wordindex signed, you want it > 0
if(wordindex > 0)
{
//use a constructor
word* newword = (word*)malloc(sizeof(word));
//use a constructor
//or just use strdup, since it is just a cstring
char* newwordstr = strdup(wordbuffer);
//no, just set pointer to the above allocated string
//strcpy(newword->wordstr, newwordstr);
newword->wordstr = newwordstr;
if(!firstword)
{
firstword = newword;
}
else
{
word* testword = firstword;
while(!testword->next)
{
testword = (testword->next);
}
testword->next = newword;
printf(newword->wordstr);
}
}
return 0;
}
}
exit(0); //done
}
This compiles and runs without error, you need to look up linked list handling. You should implement a linked list, and then add word elements to list.

Segmentation fault (core dumped), storing char * to string vector of struct

#include <iostream>
#include <fstream>
#include <string.h>
#include <string>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <semaphore.h>
#include <sys/shm.h>
#include <sys/sem.h>
#include <sys/ipc.h>
#include <vector>
#include <sstream>
#define SHMSIZE 1024
using namespace std;
namespace patch
{
template < typename T > std::string to_string( const T& n )
{
std::ostringstream stm ;
stm << n ;
return stm.str() ;
}
}
struct process
{
int r;
string name;
vector<string> lines;
};
int main(int argc, char * argv[])
{
int firstRun = 1; //Skipping First Line of Assign-1.ip.
int quantum = 0; //For taking input of quantum.
int count = 0; //For number of processes.
int pchtoint;
string c;
char * pch; //For tokenization.
string reading_file; //Reading a line from file.
char * readarr; //Converting "reading_file" to readarr for tokenization.
process * p;
//=== Quantum Input ===//
cout<<"Enter Quantum size [1-1000]: ";
cin>>quantum;
while(quantum < 1 || quantum > 1000)
{
cout<<"Wrong input!!! Enter Again [1-1000]: ";
cin>>quantum;
}
//=====================//
//===Filing===//
ifstream read("Assign-2.ip");
if(read.is_open())
{
while(!read.eof())
{
getline(read, reading_file);
readarr = new char[reading_file.size() + 1];
for(int i = 0; i < reading_file.length(); i++)
{
readarr[i] = reading_file[i];
}
if(firstRun > 1)
{
int countingline = 0; //counting the number of lines in a process.
pch = strtok (readarr," ,");
while (pch != NULL)
{
c = pch[1];
pchtoint = atoi(c.c_str());
p[pchtoint-1].r++;
p[pchtoint-1].lines.push_back(pch);
for(int i = 0; i < p[pchtoint-1].lines.size(); i++)
cout<<p[pchtoint-1].name<<"=="<<p[pchtoint-1].lines.at(i)<<endl;
pch = strtok (NULL, " ,");
}
}
else
{
pch = strtok (readarr,",.-");
while (pch != NULL)
{
count++;
pch = strtok (NULL, ",.-");
}
p = new process[count];
string s = "p";
for(int i = 0; i < count; i++)
{
s = s + patch::to_string(i+1);
p[i].name = s;
s = s[0];
}
firstRun++;
}
}
}
else
{
cout<<"Cannot open file!!!"<<endl;
}
read.close();
return 0;
}
Enter Quantum size [1-1000]: 2
p1==p1-l1
p2==p2-l1
p3==p3-l1
p1==p1-l1
p1==p1-l2
p2==p2-l1
p2==p2-l2
p3==p3-l1
p3==p3-l2
p1==p1-l1
p1==p1-l2
p1==p1-l3
p3==p3-l1
p3==p3-l2
p3==p3-l3
p1==p1-l1
p1==p1-l2
p1==p1-l3
p1==p1-l4
Segmentation fault (core dumped)
I am reading data from a cvs file. and storing it in struct that is p here. but I don't know why it is giving segmentation fault. I am compiling it on ubuntu terminal.
The input file contains data:
P1, P2, P3,
p1-l1, p2-l1, p3-l1
p1-l2, p2-l2, p3-l2
p1-l3, , p3-l3
p1-l4, ,

How to get file suffix in c++?

I want to get the suffix(.txt,.png etc.) of a file that I know that exists in some folder.
I know that the file name(prefix) is unique in this folder.
The language is c++.
thanks
Assuming that "suffix" is the filename extension, you can do this:
char * getfilextension(char * fullfilename)
{
int size, index;
size = index = 0;
while(fullfilename[size] != '\0') {
if(fullfilename[size] == '.') {
index = size;
}
size ++;
}
if(size && index) {
return fullfilename + index;
}
return NULL;
}
It's C code, but I believe that can easily ported to C++(maybe no changes).
getfilextension("foo.png"); /* output -> .png */
I hope this help you.
UPDATE:
You will need scan all files of directory and compare each file without extension if is equal to your target.
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <limits.h>
#include <dirent.h>
#include <string.h>
//.....
char * substr(char * string, int start, int end)
{
char * p = &string[start];
char * buf = malloc(strlen(p) + 1);
char * ptr = buf;
if(!buf) return NULL;
while(*p != '\0' && start < end) {
*ptr ++ = *p++;
start ++;
}
*ptr++ = '\0';
return buf;
}
char * getfilenamewithoutextension(char * fullfilename)
{
int i, size;
i = size = 0;
while(fullfilename[i] != '\0') {
if(fullfilename[i] == '.') {
size = i;
}
i ++;
}
return substr(fullfilename, 0, size);
}
char * getfilextension(char * fullfilename)
{
int size, index;
size = index = 0;
while(size ++, fullfilename[size]) {
if(fullfilename[size] == '.') {
index = size;
}
}
if(size && index) {
return fullfilename + index;
}
return NULL;
}
char*FILE_NAME;
int filefilter(const struct dirent * d)
{
return strcmp(getfilenamewithoutextension((char*)d->d_name), FILE_NAME) == 0;
}
and then:
void foo(char * path, char * target) {
FILE_NAME = target;
struct dirent ** namelist;
size_t dirscount;
dirscount = scandir(path, &namelist, filefilter, alphasort);
if(dirscount > 0) {
int c;
for(c = 0; c < dirscount; c++) {
printf("Found %s filename,the extension is %s.\n", target, getfilextension(namelist[c]->d_name));
free(namelist[c]);
}
free(namelist);
} else {
printf("No files found on %s\n", path);
}
}
and main code:
int main(int argc, char * argv[])
{
foo(".", "a"); /* The .(dot) scan the current path */
}
For a directory with this files:
a.c a.c~ a.out
a.o makefile test.cs
The output is:
Found a filename,the extension is .c.
Found a filename,the extension is .c~.
Found a filename,the extension is .o.
Found a filename,the extension is .out.
Note: the scandir() function is part of GNU extensions/GNU library,if you don't have this function available on your compiler,tell me that I will write an alias for that or use this implementation(don't forget to read the license).
If you're using Windows, use PathFindExtension.
There's no standard functionality to list directory contents in c++. So if you know allowed extensions in your app you can iterate through and find if file exists.
Other options are use OS specific API or use something like Boost. You can also use "ls | grep *filename" or "dir" command dump and parse the output.