I have been working with a program that will read through multiple text files, record the number of words in them, and write to a file all of the words and the frequency of them. However, I have encounter a segmentation fault somewhere in my code. I have tried using tools such as Valgrind to help me debug it, however it only points to where I say int i = 0 in the main loop. I apologize for posting a large portion of my code but I have spent hours trying to find where the bug is and cannot seem to find it for the life of me. The issues began when I started passing a structure in pthread_exit().
#include <iostream>
#include <fstream>
#include <string>
#include <pthread.h>
#include <vector>
#include <algorithm>
#include <sstream>
#include <iterator>
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <cstdio>
using namespace std;
// Create a structure that we can store information in
typedef struct info{
int words;
string dictionary[500000];
} info;
// Counts the number of words in the text file so we know how big to make our array
int countWord(char *arg){
char words[25000];
int count = 0;
ifstream check;
check.open(arg);
while(!check.eof()){
check>>words;
count++;
}
cout<<"Word Count: "<< count << '\n';
check.close();
return count;
}
// Checks to see if the word exists in our dictionary or not
int findWord(string array[], string target, int wordCount){
for(int i = 0; i < wordCount; ++i){
if(array[i] == target){
return 1;
}
}
return 0;
}
// Checks to see how many times a word is repeated
int checkWord(string array[], string target, int wordCount){
int number = 0;
for(int i = 0; i < wordCount; i++){
if(array[i] == target){
number++;
}
}
return number;
}
void *threads(void *arg){
info information;
char *fileName = (char *)arg;
ifstream myfile (fileName);
string line;
string fullText[15000];
string dictionary[500000];
int wordCount = countWord(fileName);
int i = 0;
int find;
int check;
int x = 0;
int checkingStart = 0;
// Opens and reads the file word by word removing any symbols that we dislike
if (myfile.is_open()){
while(myfile >> line){
transform(line.begin(), line.end(), line.begin(), ::tolower);
line.erase(remove(line.begin(), line.end(), ','), line.end());
fullText[i] = line;
i++;
}
}
else cout << "Unable to Open the File";
myfile.close();
// Goes through and adds all the words to our dictionary
for(i = 0; i < wordCount; ++i){
find = findWord(dictionary, fullText[i], wordCount);
if(find == 0){
dictionary[x] = {fullText[i]};
++x;
checkingStart = 1;
}
}
// Sets each section of dictionary equal to the one in the structure
for(i = 0; i < wordCount; ++i){
information.dictionary[i] = dictionary[i];
}
// Sets words equal to word count and then passes the structure information out of the thread
information.words = wordCount;
pthread_exit(&information);
return NULL;
}
int main(){
int i = 0;
int x = 0;
int y = 0;
int z = 0;
int a = 0;
int b = 0;
int add = 0;
int currentSize = 0;
int checkingStart = 0;
int wordCount;
int find;
string fullDictionary[500000];
string dict[500000];
ofstream writeFile;
info information;
char *fileName;
char *fileList[2];
pthread_t threadCount[2];
int frequency[500000];
int check;
fileList[0] = "text1";
fileList[1] = "text2";
// Creates a loop that creates and joins threads for each text file
for(a = 0; a < 1; ++a){
fileName = fileList[a];
pthread_create(&threadCount[a], NULL, threads, &fileName);
pthread_join(threadCount[a], (void **)&information);
wordCount = information.words;
// Sets each part of dict equal to the same slot on info.dict
for(b = 0; b < wordCount; ++b){
dict[b] = information.dictionary[b];
}
// Adds to a complete list of all the text files added together
for(y = 0, z = currentSize; z < wordCount; ++z, ++y){
fullDictionary[z] = dict[y];
}
currentSize = (currentSize + wordCount);
}
// Goes through and adds all the words to our dictionary
for(i = 0; i < wordCount; ++i){
find = findWord(dict, fullDictionary[i], currentSize);
if(find == 0){
dict[x] = {fullDictionary[i]};
cout << "Added the Word: " << fullDictionary[i] << "\n";
add = 1;
checkingStart = 1;
}
// Checks the number of times each word appears in the text file
if(checkingStart == 1){
check = checkWord(fullDictionary, dict[x], wordCount);
frequency[x] = {check};
}
// Checks to see if it needs to move to the next open dictionary spot
if(add == 1){
++x;
add = 0;
}
}
return 0;
}
These were the changes that were needed to get the program working.
1) One issue seems to be that the size of the variables in the function threads. Looks like every thread that is spawned has some default limit . You could read up on pthread_attr_setstacksize. but the simplest solution was to reduce the size of the strings in thread.So the size of the variables are why it's gives a segmentation fault as soon as the threads function is called.
As already mention in the comments above usage of vector/maps classes will help reduce the need for large local variables.
2) The return variable needs to be a non-local variable else the return value does not make it back successfully.
3) just noticed the main loop ( variable a ) is running only once . Also once the thread is launched(pthread_create) the loop is waiting for the join . This will result in serialization of the threads. The create can be done first and then the join can be in called in a separate loop after that.
Changes are given below ..
In function - threads
info *information;
//changed to pointer
// info information;
char *fileName = (char *)arg;
ifstream myfile (fileName);
string line;
string fullText[1500];
string dictionary[5000];
// reduced size
//string fullText[15000];
//string dictionary[500000];
.....
information = new info; // create an instance
........
// change to pointer
information->dictionary[i] = dictionary[i];
}
// Sets words equal to word count and then passes the structure information out of the thread
information->words = wordCount;
pthread_exit(information); // return pointer
in function - main
info *information; // change to pointer
....
for(a = 0; a < 2; ++a){ // loop to 2
.....
pthread_create(&threadCount[a], NULL, threads, (void *)fileName); // changed file name
// pthread_create(&threadCount[a], NULL, threads, &fileName);
wordCount = information->words; // changed for pointer
...
dict[b] = information->dictionary[b] // changed for pointer
After the edits you should be able to run to debug the rest of the functionality.
Related
For this project, we are supposed to use an input and output file to better organize our data. Howvever, for whatever reason, every time I rube the program it says
"File couldn't open. Terminating.
Process finished with exit code 1"
I have tried several different methods to try and get the program to open the file (using different commands, changing filename, etc), however each time it gives me the above statement.
Here is the code for my program (please note there is much more code than this. However, this is the only area that uses the input file, so I am confident that the error is residing somewhere withing here):
#include <iostream>
#include <fstream>
#include <sstream>
#include <string>
#include <iomanip>
#include <limits>
#ifdef _MSC_VER // Memory leak check
#define _CRTDBG_MAP_ALLOC
#include <crtdbg.h>
#define VS_MEM_CHECK _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF);
#else
#define VS_MEM_CHECK
#endif
using namespace std;
const int NUM_GRADES = 5;
int main(int argc, char* argv[]) {
VS_MEM_CHECK // Enable memory leak check
ifstream inputFile; // Reading Input file
inputFile.open(argv[1]);
if (inputFile.is_open()) { // Checks if file opened succesfully
cout << "Input File opened successfully.\n";
} else {
cout << "File couldn't open. Terminating.\n";
return 1;
}
int numStudents;
int numExams;
inputFile >> numStudents >> numExams;
inputFile.ignore(std::numeric_limits<int>::max(), '\n');
string *arrayNames = new string[numStudents]; // Intializes Students Array
double *arrayTotalGrade = new double[numStudents]; // Initializes Total Score Array
double *arrayAverages = new double[numExams]; // Initializes Average Score Array
int **arrayScores = new int *[numStudents]; // Initializes Scores Array
for (int i = 0; i < numStudents; ++i) {
arrayScores[i] = new int[numExams];
}
int **arrayGradeCount = new int *[numExams]; // Initializes Grade Count Array
for (int i = 0; i < numExams; ++i) {
arrayGradeCount[i] = new int[NUM_GRADES];
for (int j = 0; j < NUM_GRADES; ++j) {
arrayGradeCount[i][j] = 0;
}
}
for (int i = 0; i < numStudents; ++i) {
string line;
string name;
getline(inputFile, line);
size_t p = 0;
while (!isdigit(line[p])) ++p; // line[p] is the location of the first digit on the line
name = line.substr(0, p - 1); // Gets name from file, p-1 removes an extra whitespace.
arrayNames[i] = name;
line = line.substr(p); // Isolates scores on line
istringstream iss(line); // Puts line (now with only scores' values) into an istringstream
for (int j = 0; j < numExams; ++j) // Puts scores from istringstream into arrayScores onto row 'i'
{
int scores;
iss >> scores;
arrayScores[i][j] = scores;
}
}
inputFile.close();
}
The following C++ program takes two text files, stop_words.txt, and story.txt. It then removes all the stop word occurrences in the story.txt file. For instance,
Monkey is a common name that may refer to groups or species of mammals, in part, the simians of infraorder L. The term is applied descriptively to groups of primates, such as families of new world monkeys and old world monkeys. Many monkey species are tree-dwelling (arboreal), although there are species that live primarily on the ground, such as baboons. Most species are also active during the day (diurnal). Monkeys are generally considered to be intelligent, especially the old world monkeys of Catarrhini.
the text above is story.txt, and the stop_words.txt file is given below:
is
are
be
When I run my code, it doesn't delete all the stop words and keeps some of them. The code also creates a file called stop_words_counter.txt which should display the number of stop word occurrences like so:
is 2
are 4
b 1
But my output file shows the following:
is 1
are 4
be 1
I would be very grateful for some help regarding this code! I have posted it below for your reference.
#include <iostream>
#include <string>
#include <fstream>
using namespace std;
const int MAX_NUM_STOPWORDS = 100;
struct Stop_word
{
string word; // stop word
int count; // removal count
};
int stops[100];
string ReadLineFromStory(string story_filename )
{
string x = "";
string b;
ifstream fin;
fin.open(story_filename);
while(getline(fin, b))
{
x += b;
}
return x;
}
void ReadStopWordFromFile(string stop_word_filename, Stop_word words[], int &num_words)
{
ifstream fin;
fin.open(stop_word_filename);
string a;
int i = 0;
if (fin.fail())
{
cout << "Failed to open "<< stop_word_filename << endl;
exit(1);
}
words[num_words].count = 0;
while (fin >> words[num_words].word)
{
++num_words;
}
fin.close();
}
void WriteStopWordCountToFile(string wordcount_filename, Stop_word words[], int num_words)
{
ofstream fout;
fout.open(wordcount_filename);
for (int i = 0; i < 1; i++)
{
fout << words[i].word << " "<< stops[i] + 1 << endl;
}
for (int i = 1; i < num_words; i++)
{
fout << words[i].word << " "<< stops[i] << endl;
}
fout.close();
}
int RemoveWordFromLine(string &line, string word)
{
int length = line.length();
int counter = 0;
int wl = word.length();
for(int i=0; i < length; i++)
{
int x = 0;
if(line[i] == word[0] && (i==0 || (i != 0 && line[i-1]==' ')))
{
for(int j = 1 ; j < wl; j++)
if (line[i+j] != word[j])
{
x = 1;
break;
}
if(x == 0 && (i + wl == length || (i + wl != length && line[i+wl] == ' ')))
{
for(int k = i + wl; k < length; k++)
line[k -wl] =line[k];
length -= wl;
counter++;
}
}
}
line[length] = 0;
char newl[1000] = {0};
for(int i = 0; i < length; i++)
newl[i] = line[i];
line.assign(newl);
return counter;
}
int RemoveAllStopwordsFromLine(string &line, Stop_word words[], int num_words)
{
int counter[100];
int final = 0;
for(int i = 1; i <= num_words; i++)
{
counter[i] = RemoveWordFromLine(line, words[i].word);
final += counter[i];
stops[i] = counter[i];
}
return final;
}
int main()
{
Stop_word stopwords[MAX_NUM_STOPWORDS]; // an array of struct Stop_word
int num_words = 0, total = 0;
// read in two filenames from user input
string a, b, c;
cin >> a >> b;
// read stop words from stopword file and
// store them in an array of struct Stop_word
ReadStopWordFromFile(a, stopwords, num_words);
// open text file
c = ReadLineFromStory(b);
// open cleaned text file
ofstream fout;
fout.open("story_cleaned.txt");
// read in each line from text file, remove stop words,
// and write to output cleaned text file
total = RemoveAllStopwordsFromLine(c, stopwords, num_words) + 1 ;
fout << c;
// close text file and cleaned text file
fout.close();
// write removal count of stop words to files
WriteStopWordCountToFile("stop_words_count.txt", stopwords, num_words);
// output to screen total number of words removed
cout << "Number of stop words removed = " << total << endl;
return 0;
}
There is one major bug in your code.
in function RemoveAllStopwordsFromLine
you are using the wrong array indices. In C++ the first element in an array has the index 0. Also you must compare with "less" than the size.
for (int i = 1; i <= num_words; i++)
So the first stop word "is", will never be checked and counted.
Please modify to
for (int i = 0; i < num_words; i++)
But then you need also to remove your patch in function WriteStopWordCountToFile . You made a special case for element 0. That is wrong.
Please remove
for (int i = 0; i < 1; i++)
{
fout << words[i].word << " " << stops[i] + 1 << endl;
}
and start the next for with 0. And remove the "+" while calculating the total.
Because you are using C-Style arrays, magic numbers and ultra complex code, I will show you a modern C++ solution.
In C++ you have many useful algorithms. Some are specifically designed to address your requirments. So, please use them. Try to get away from C and migrate to C++.
#include <string>
#include <iostream>
#include <fstream>
#include <vector>
#include <iterator>
#include <algorithm>
#include <regex>
#include <sstream>
// The filenames. Whatever you want
const std::string storyFileName{ "r:\\story.txt" };
const std::string stopWordFileName{ "r:\\stop_words.txt" };
const std::string stopWordsCountFilename{ "r:\\stop_words_count.txt" };
const std::string storyCleanedFileName{ "r:\\story_cleaned.txt" };
// Becuase of the simplicity of the task, put everything in main
int main() {
// Open all 4 needed files
std::ifstream storyFile(storyFileName);
std::ifstream stopWordFile(stopWordFileName);
std::ofstream stopWordsCountFile(stopWordsCountFilename);
std::ofstream storyCleanedFile(storyCleanedFileName);
// Check, if the files could be opened
if (storyFile && stopWordFile && stopWordsCountFile && storyCleanedFile) {
// 1. Read the complete sourcefile with the story into a std::string
std::string story( std::istreambuf_iterator<char>(storyFile), {} );
// 2. Read all "stop words" into a std::vector of std::strings
std::vector stopWords(std::istream_iterator<std::string>(stopWordFile), {});
// 3. Count the occurences of the "stop words" and write them into the destination file
std::for_each(stopWords.begin(), stopWords.end(), [&story,&stopWordsCountFile](std::string& sw) {
std::regex re{sw}; // One of the "stop words"
stopWordsCountFile << sw << " --> " << // Write count to output
std::distance(std::sregex_token_iterator(story.begin(), story.end(), re, 1), {}) << "\n";});
// 4. Replace "stop words" in story and write new story into file
std::ostringstream wordsToReplace; // Build a list of all stop words, followed by an option white space
std::copy(stopWords.begin(), stopWords.end(), std::ostream_iterator<std::string>(wordsToReplace, "\\s?|"));
storyCleanedFile << std::regex_replace(story,std::regex(wordsToReplace.str()), "");
}
else {
// In case that any of the files could not be opened.
std::cerr << "\n*** Error: Could not open one of the files\n";
}
return 0;
}
Please try to study and understand this code. This is a very simple solution.
I'm making a program in C++ which counts NGS read alignments against a reference annotation. Basically the program reads both the annotation and alignment file into memory, iterates through the annotation, binary searches the alignment file for a probable location, upon finding this location linear searches a frame that is around that probable location.
Typically I want to keep this frame somewhat large (10000 alignments), so I had the idea to split the frame up and throw parts of it into separate threads.
Everything compiles and runs, but it doesn't look like my multithreading is working as intended because my comp is using one core for the job. Would anyone be kind enough to help me figure this out where I implemented the threading wrong.
https://sourceforge.net/projects/fast-count/?source=directory
#include <iostream>
#include <cstdlib>
#include <vector>
#include <string>
#include <thread>
#include <sstream>
#include <fstream>
#include <math.h>
#include "api/BamReader.h"
using namespace std;
using namespace BamTools;
int hit_count = 0;
struct bam_headers{
string chr;
int start;
};
struct thread_data{
int thread_id;
int total_thread;
int start_gtf;
int stop_gtf;
};
struct gtf_headers{
string chr;
string source;
string feature;
string score;
string strand;
string frame;
string annotation;
int start;
int end;
};
void process(int* start_holder, int size, int gtf_start, int gtf_stop){
//threaded counter process
for (int t = 0; t < size; t++){
if((start_holder[t] >= gtf_start) && (start_holder[t] <= gtf_stop)){
hit_count++;
}
}
}
vector <string> find_index(vector <vector <bam_headers> > bams){
//define vector for bam_index to chromosome
vector <string> compute_holder;
for (int bam_idx = 0; bam_idx < bams.size();bam_idx++){
compute_holder.push_back(bams[bam_idx][0].chr);
}
return compute_holder;
}
vector <gtf_headers> load_gtf(char* filename){
//define matrix to memory holding gtf annotations by assoc. header
vector<gtf_headers> push_matrix;
gtf_headers holder;
ifstream gtf_file(filename);
string line;
cout << "Loading GTF to memory" << "\n";
if (gtf_file.is_open()){
int sub_count = 0;
string transfer_hold[8];
while(getline(gtf_file,line)){
//iterate through file
istringstream iss(line);
string token;
//iterate through line, and tokenize by tab delimitor
while(getline(iss,token,'\t')){
if (sub_count == 8){
//assign to hold struct, and push to vector
holder.chr = transfer_hold[0];
holder.source = transfer_hold[1];
holder.feature = transfer_hold[2];
holder.start = atoi(transfer_hold[3].c_str());
holder.end = atoi(transfer_hold[4].c_str());
holder.score = transfer_hold[5];
holder.strand = transfer_hold[6];
holder.frame = transfer_hold[7];
holder.annotation = token;
push_matrix.push_back(holder);
sub_count = 0;
} else {
//temporarily hold tokens
transfer_hold[sub_count] = token;
++sub_count;
}
}
}
cout << "GTF successfully loaded to memory" << "\n";
gtf_file.close();
return(push_matrix);
}else{
cout << "GTF unsuccessfully loaded to memory. Check path to file, and annotation format. Exiting" << "\n";
exit(-1);
}
}
vector <vector <bam_headers>> load_bam(char* filename){
//parse individual bam file to chromosome bins
vector <vector <bam_headers> > push_matrix;
vector <bam_headers> iter_chr;
int iter_refid = -1;
bam_headers bam_holder;
BamReader reader;
BamAlignment al;
const vector<RefData>& references = reader.GetReferenceData();
cout << "Loading " << filename << " to memory" << "\n";
if (reader.Open(filename)) {
while (reader.GetNextAlignmentCore(al)) {
if (al.IsMapped()){
//bam file must be sorted by chr. otherwise the lookup will segfault
if(al.RefID != iter_refid){
//check if chr. position has advanced in the bam file, if true, push empty vector
iter_refid++;
push_matrix.push_back(iter_chr);
}else{
//if chr. position hasn't advanced push to current index in 2d vector
bam_holder.chr = references[al.RefID].RefName;
bam_holder.start = al.Position;
push_matrix.at(iter_refid).push_back(bam_holder);
}
}
}
reader.Close();
cout << "Successfully loaded " << filename << " to memory" << "\n";
return(push_matrix);
}else{
cout << "Could not open input BAM file. Exiting." << endl;
exit(-1);
}
}
short int find_bin(const string & gtf_chr, const vector <string> mapping){
//determines which chr. bin the gtf line is associated with
int bin_compare = -1;
for (int i = 0; i < mapping.size(); i++){
if(gtf_chr == mapping[i]){
bin_compare = i;
}
}
return(bin_compare);
}
int find_frame(gtf_headers gtf_matrix, vector <bam_headers> bam_file_bin){
//binary search to find alignment index with greater and less than gtf position
int bin_size = bam_file_bin.size();
int high_end = bin_size;
int low_end = 0;
int binary_i = bin_size / 2;
int repeat = 0;
int frame_start;
bool found = false;
while (found != true){
if ((bam_file_bin[binary_i].start >= gtf_matrix.start) && (bam_file_bin[binary_i].start <= gtf_matrix.end)){
frame_start = binary_i;
found = true;
}else{
if(repeat != binary_i){
if(bam_file_bin[binary_i].start > gtf_matrix.end){
if(repeat != binary_i){
repeat = binary_i;
high_end = binary_i;
binary_i = ((high_end - low_end) / 2) + low_end;
}
}else{
if(repeat != binary_i){
repeat = binary_i;
low_end = binary_i;
binary_i = ((high_end - low_end) / 2) + low_end;
}
}
}else{
frame_start = low_end;
found = true;
}
}
}
return(frame_start);
}
vector <int > define_frame(int frame_size, int frame_start, int bam_matrix){
//define the frame for the search
vector <int> push_ints;
push_ints.push_back(frame_start - (frame_size / 2));
push_ints.push_back(frame_start + (frame_size / 2));
if(push_ints[0] < 0){
push_ints[0] = 0;
push_ints[1] = frame_size;
if(push_ints[1] > bam_matrix){
push_ints[1] = frame_size;
}
}
if(push_ints[1] > bam_matrix){
push_ints[1] = bam_matrix;
push_ints[0] = bam_matrix - (frame_size / 2);
if(push_ints[0] < 0){
push_ints[0] = 0;
}
}
return(push_ints);
}
void thread_handler(int nthread, vector <int> frame, vector <bam_headers> bam_matrix, gtf_headers gtf_record){
int thread_divide = frame[1]-frame[0];//frame_size / nthread;
int thread_remain = (frame[1]-frame[0]) % nthread;
int* start_holder = new int[thread_divide];
for(int i = 0; i < nthread; i++){
if (i < nthread - 1){
for (int frame_index = 0; frame_index < thread_divide; frame_index++){
start_holder[frame_index] = bam_matrix[frame[0]+frame_index].start;
}
frame[0] = frame[0] + thread_divide;
thread first(process, start_holder,thread_divide,gtf_record.start,gtf_record.end);
first.join();
}else{
for (int frame_index = 0; frame_index < thread_divide + thread_remain; frame_index++){
start_holder[frame_index] = bam_matrix[frame[0]+frame_index].start;
}
thread last(process, start_holder,thread_divide + thread_remain,gtf_record.start,gtf_record.end);
last.join();
}
}
}
int main (int argc, char *argv[])
{
// usage
// ./count threads frame_size gtf_file files
//define matrix to memory holding gtf annotations by assoc. header
vector <gtf_headers> gtf_matrix = load_gtf(argv[3]);
//load bam, perform counts
for(int i = 4;i < argc;i++){
//iterate through filenames in argv, define matrix to memory holding bam alignments chr and bp position
vector <vector <bam_headers> > bam_matrix = load_bam(argv[i]);
//map chromosome to bam matrix index
vector <string> index_mapping = find_index(bam_matrix);
//iterate through gtf matrix, find corresponding bins for chr, set search frames, and count
for(int gtf_i = 0; gtf_i < gtf_i < gtf_matrix.size();gtf_i++){ //gtf_i < gtf_matrix.size()
hit_count = 0;
//find corresponding bins for gtf chr
short int bin_compare = find_bin(gtf_matrix[gtf_i].chr,index_mapping);
if(bin_compare != -1){
//find start of search frame
int frame_start = find_frame(gtf_matrix[gtf_i], bam_matrix[bin_compare]);
//get up lower bounds of search frame;
vector <int> full_frame = define_frame(atoi(argv[2]),frame_start,bam_matrix[bin_compare].size());
//create c array of bam positional data for the frame, and post to thread process
thread_handler(atoi(argv[1]),full_frame,bam_matrix[bin_compare],gtf_matrix[gtf_i]);
}
//counts displayed in STOUT
cout << gtf_matrix[gtf_i].chr << "\t" << gtf_matrix[gtf_i].source << "\t" << gtf_matrix[gtf_i].feature << "\t" << gtf_matrix[gtf_i].start << "\t" << gtf_matrix[gtf_i].end << "\t" << gtf_matrix[gtf_i].score << "\t" << gtf_matrix[gtf_i].strand << "\t" << gtf_matrix[gtf_i].frame << "\t" << gtf_matrix[gtf_i].annotation << "\t" << hit_count << "\n";
}
}
}
The answer to your question is very simple:
thread last(process, start_holder,thread_divide + thread_remain,gtf_record.start,gtf_record.end);
last.join();
Here, the parent task creates a new thread, and ... immediately waits for the thread to finish. That's what join() does, it waits for the thread to terminate.
So, your code starts a new thread, and immediately waits for it to finish, before doing anything else, like starting the next thread.
You need to rewrite thread_handler() to instantiate all std::thread instances, and then after instantiating all of them, call join() on each one, to wait for all of them to finish.
The typical approach is to precreate a std::vector of all thread instances, using std::thread's default constructor, then loop over them to initialize each one, then loop over them again, calling join() on each one.
Currently I am getting an runtime "assertation error"
Here is the error:
I'm reading words from a text file into dynamically allocated arrays.
this block of code is where I am filling the new arrays.
I know the problem is being caused by this block of code and something about my logic is off just can't see what it is.
//fill new arrays
for( int y = 0; y < new_numwords; y++)
{
for( int i = 0; i < NUM_WORDS; i++)
{
if (!strcmp(SentenceArry[i], EMPTY[0]) == 0)
{
New_SentenceArry[y] = SentenceArry[i];
New_WordCount[y] = WordCount[i];
y++;
}
}
}
Also how would I pass this dynamically allocated 2D array to a function? (the code really needs to be cleaned up as a whole)
char** SentenceArry = new char*[NUM_WORDS]; //declare pointer for the sentence
for( int i = 0; i < NUM_WORDS; i++)
{
SentenceArry[i] = new char[WORD_LENGTH];
}
Here is the full extent of the code.. help would be much appreciated!
Here is what is being read in:
and the current output (the output is how it's suppose to be ):
#define _CRT_SECURE_NO_WARNINGS
#include <iostream>
#include <fstream>
#include <cstring>
#include <cctype>
#include <iomanip>
using std::setw;
using std::left;
using std::cout;
using std::cin;
using std::endl;
using std::ifstream;
int main()
{
const int NUM_WORDS = 17;//constant for the elements of arrays
const int WORD_LENGTH = 50;//constant for the length of the cstrings (NEED TO GIVE THE VALUE ZERO STILL!)
short word_entry = 0; //declare counter
short new_numwords= 0; //declare new word count
char EMPTY[1][4]; //NULL ARRAY
EMPTY[0][0] = '\0';//define it as null
char** SentenceArry = new char*[NUM_WORDS]; //declare pointer for the sentence
for( int i = 0; i < NUM_WORDS; i++)
{
SentenceArry[i] = new char[WORD_LENGTH];
}
int WordCount[NUM_WORDS];//declare integer array for the word counter
for(int i = 0; i < NUM_WORDS; i++)//fill int array
{
WordCount[i] = 1;
}
int New_WordCount[NUM_WORDS] = {0};
ifstream read_text("DataFile.txt"); //read in our text file
if (read_text.is_open()) //check if the the file was opened
{
read_text >> SentenceArry[word_entry];
//REMOVE PUNCTUATION BEFORE BEING READ INTO THE ARRAY
while (!read_text.eof())
{
word_entry++; //increment counter
read_text >> SentenceArry[word_entry]; //read in single words of the text file into the array SentenceArry
char* ptr_ch;//declare our pointer that will find chars
ptr_ch = strstr( SentenceArry[word_entry], ",");//look for "," within the array
if (ptr_ch != NULL)//if true replace it with a null character
{
strncpy( ptr_ch, "\0" , 1);
}//end if
else
{
ptr_ch = strstr( SentenceArry[word_entry], ".");//look for "." within the array
if (ptr_ch != NULL)//if true replace it with a null character
{
strncpy( ptr_ch, "\0" , 1);
}//end if
}//end else
} //end while
}//end if
else
{
cout << "The file could not be opened!" << endl;//display error message if file doesn't open
}//end else
read_text.close(); //close the text file after eof
//WORD COUNT NESTED FOR LOOP
for(int y = 0; y < NUM_WORDS; y++)
{
for(int i = y+1; i < NUM_WORDS; i++)
{
if (strcmp(SentenceArry[y], EMPTY[0]) == 0)//check if the arrays match
{
y++;
}
else
{
if (strcmp(SentenceArry[y], SentenceArry[i]) == 0)//check if the arrays match
{
WordCount[y]++;
strncpy(SentenceArry[i], "\0" , 3);
}//end if
}//end if
}//end for
}//end for
//find how many arrays still contain chars
for(int i = 0; i < NUM_WORDS; i++)
{
if (!strcmp(SentenceArry[i], EMPTY[0]) == 0)
{
new_numwords++;
}
}
//new dynamic array
char** New_SentenceArry = new char*[new_numwords]; //declare pointer for the sentence
for( int i = 0; i < new_numwords; i++)
{
New_SentenceArry[i] = new char[new_numwords];
}
//fill new arrays
for( int y = 0; y < new_numwords; y++)
{
for( int i = 0; i < NUM_WORDS; i++)
{
if (!strcmp(SentenceArry[i], EMPTY[0]) == 0)
{
New_SentenceArry[y] = SentenceArry[i];
New_WordCount[y] = WordCount[i];
y++;
}
}
}
//DISPLAY REPORT
cout << left << setw(15) << "Words" << left << setw(9) << "Frequency" << endl;
for(int i = 0; i < new_numwords; i++) //compare i to the array constant NUM_WORDS
{
cout << left << setw(15) << New_SentenceArry[i] << left << setw(9) << New_WordCount[i] << endl; //display the contents of the array SentenceArry
}
//DEALLOCATION
for( int i = 0; i < NUM_WORDS; i++)//deallocate the words inside the arrays
{
delete [] SentenceArry[i];
}
for(int i = 0; i < new_numwords; i++)
{
delete [] New_SentenceArry[i];
}
delete [] SentenceArry; //deallocate the memory allocation made for the array SentenceArry
delete [] New_SentenceArry;//deallocate the memory allocation made for the array New_SentenceArry
}//end main
There are several issues with the code, not withstanding that this could be written using C++, not C with a sprinkling of C++ I/O..
Issue 1:
Since you're using c-style strings, any copying of string data will require function calls such as strcpy(), strncpy(), etc. You failed in following this advice in this code:
for( int y = 0; y < new_numwords; y++)
{
for( int i = 0; i < NUM_WORDS; i++)
{
if (!strcmp(SentenceArry[i], EMPTY[0]) == 0)
{
New_SentenceArry[y] = SentenceArry[i]; // This is wrong
New_WordCount[y] = WordCount[i];
y++;
}
}
}
You should be using strcpy(), not = to copy strings.
strcpy(New_SentenceArry[y], SentenceArry[i]);
Issue 2:
You should allocate WORD_LENGTH for both the original and new arrays. The length of the strings is independent of the number of strings.
char** New_SentenceArry = new char*[new_numwords]; //declare pointer for the sentence
for( int i = 0; i < new_numwords; i++)
{
New_SentenceArry[i] = new char[new_numwords];
}
This should be:
char** New_SentenceArry = new char*[new_numwords]; //declare pointer for the sentence
for( int i = 0; i < new_numwords; i++)
{
New_SentenceArry[i] = new char[WORD_LENGTH];
}
Issue 3:
Your loops do not check to see if the index is going out of bounds of your arrays.
It seems that you coded your program in accordance to the data that you're currently using, instead of writing code regardless of what the data will be. If you have limited yourself to 17 words, where is the check to see if the index goes above 16? Nowhere.
For example:
while (!read_text.eof() )
Should be:
while (!read_text.eof() && word_entry < NUM_WORDS)
Issue 4:
You don't process the first string found correctly:
read_text >> SentenceArry[word_entry]; // Here you read in the first word
while (!read_text.eof() )
{
word_entry++; //increment counter
read_text >> SentenceArry[word_entry]; // What about the first word you read in?
Summary:
Even with these changes, I can't guarantee that the program won't crash. Even it it doesn't crash with these changes, I can't guarantee it will work 100% of the time -- a guarantee would require further analysis.
The proper C++ solution, given what this assignment was about, is to use a std::map<std::string, int> to keep the word frequency. The map would automatically store similar words in one entry (given that you remove the junk from the word), and would bump up the count to 1 automatically, when the entry is inserted into the map.
Something like this:
#include <string>
#include <map>
#include <algorithm>
typedef std::map<std::string, int> StringMap;
using namespace std;
bool isCharacterGarbage(char ch)
{ return ch == ',' || ch == '.'; }
int main()
{
StringMap sentenceMap;
//...
std::string temp;
read_text >> temp;
temp.erase(std::remove_if(temp.begin(), temp.end(), isCharacterGarbage),temp.end());
sentenceMap[temp]++;
//...
}
That code alone does everything your original code did -- keep track of the strings, bumps up the word count, removes the junk characters from the word before being processed, etc. But best of all, no manual memory management. No calls to new[], delete[], nothing. The code just "works". That is effectively 5 lines of code that you would just need to write a "read" loop around.
I won't go through every detail, you can do that for yourself since the code is small, and there are vast amounts of resources available explaining std::map, remove_if(), etc.
Then printing out is merely going through the map and printing each entry (string and count). If you add the printing, that may be 4 lines of extra code. So in all, practically all of the assignment is done with effectively 10 or so lines of code.
Remove below code.
for(int i = 0; i < new_numwords; i++)
{
delete [] New_SentenceArry[i];
}
I am a high school student programming as a hobby. I make free stuff and I am working on a game using opengl. I need to save and load data but when met with difficulty I made the following to test my methods.
The save file 'shiptest' is correct but when I open the second file 'shipout' which is created with the save data from 'shiptest' only the first line is there. At first I thought that my array wasn't loading any new data and the clear function wasn't getting rid of the first elements. I corrected this assumption by overwriting those lines after saving the data and observing that the saved lines were loaded after all. My new assumption is that the getline func is only getting the first line each time it's called; but i do not know how to fix this.
#include <iostream>
#include <string>
#include <fstream>
#include <sstream>
#include <stdio.h>
#include <stdlib.h>
unsigned short int shipPart;
float editShip[256][3];//part ID, x relative, y relative, r,g,b
float activeShip[256][3];
void CLEAR(bool edit)
{
for (int n = 0; n < 256; n++)
{
if (edit)
editShip[n][0] = -1;
else
activeShip[n][0] = -1;
}
}
void saveEdit(std::string name)
{
std::ofstream out;
out.open ("ship" + name + ".txt", std::ofstream::out);
for (int n = 0; n < 256; n++)
{
for (int i = 0; i < 3; i++)
{
if (editShip[n][0] == -1)
break;
out << editShip[n][i] << " ";
}
out << "\n";
}
out.close();
}
void load(std::string name, bool edit)
{
CLEAR(edit);
std::ifstream in;
in.open ("ship" + name + ".txt", std::ifstream::in);
std::string line, buf;
std::stringstream ss;
int i;
for (int n = 0; n < 3; n++)
{
getline(in, line);
ss << line;
i=0;
while (ss >> buf)
{
if (edit)
editShip[n][i] = atof(buf.c_str());
else
activeShip[n][i] = atof(buf.c_str());
i++;
}
}
in.close();
}
int main()
{
for (int n = 0; n < 256; n++)
{
editShip[n][0] = -1;
activeShip[n][0] = -1;
}
editShip[0][0] = 5;
editShip[0][1] = .11;
editShip[0][2] = .22;
editShip[1][0] = 4;
editShip[1][1] = .33;
editShip[1][2] = .44;
editShip[2][0] = 3;
editShip[2][1] = .55;
editShip[2][2] = .66;
saveEdit("test");
editShip[0][0] = 5000;
editShip[0][1] = 8978;
editShip[0][2] = 8888;
load("test",1);
saveEdit("out");
std::cout << "Hello world!" << std::endl;
return 0;
}
In load(), you keep appending more lines to your stringstream ss but its eof flag is probably remaining set from the previous time through the loop, so even though there's more to read from it, eof is already set so it won't continue providing data via operator>>(). If you simply call ss.clear() at the top of the for() loop, you'll start with an empty stringstream on each loop, and I think you'll get what you want.
In your load() function:
for (int n = 0; n < 3; n++)
{
ss.clear(); //< Clear ss here before you use it!
getline(in, line);
ss << line;
i=0;
while (ss >> buf)
{
if (edit)
editShip[n][i] = atof(buf.c_str());
else
activeShip[n][i] = atof(buf.c_str());
i++;
}
}
Getline() was working just fine. Just clear the stringstream before you use it and you're good to go. Ran this code on my computer and it works as desired.
EDIT: Ack! Just saw that phonetagger said the same thing while I was making my answer. He deserves the +1's not me.