How to print words using reverse_iterator in STL - c++

I have to read two text files and then compare words from second file with the first one. Then , I have to display KnownWords which are same words from both files and the remaining words which are not same are UnknownWords. Next Step is, I have to display most frequent known words in DisplayMostFreqKnownWords() and unknown words in DisplayMostFreqUnknownWords() functions. I have successfully completed DisplayMostFreqKnownWords() and so far Output is alright. I copied the same code from DisplayMostFreqKnownWords() to DisplayMostFreqUnknownWords() but in this is function it is not showing anything in the output. I dont know what is wrong. Can someone figure this one out.
Output is:
Displaying most frequent known words
Word Count
the 19
a 14
of 11
artificial 11
that 10
to 7
signal 7
and 7
in 6
they 5
Displaying most frequent unknown words
Word Count
Header file:
typedef map<string, vector<int> > WordMap;
typedef WordMap::iterator WordMapIter;
class WordStats
{
public:
WordStats();
void ReadDictionary();
void DisplayDictionary();
void ReadTxtFile();
void DisplayKnownWordStats();
void DisplayUnknownWordStats();
void DisplayMostFreqKnownWords();
void DisplayMostFreqUnknownWords();
private:
WordMap KnownWords;
WordMap UnknownWords;
WordMapIter Paragraph;
set<string> Dictionary;
char Filename[256];
}
My program:
// Displays 10 most frequent words in KnownWords
void WordStats::DisplayMostFreqKnownWords(){
int count;
multimap<int,string > displayFreqWords;// new map with int as key
(multimap because key could occur more than once)
multimap<int,string >::reverse_iterator rit = displayFreqWords.rbegin();
for (Paragraph = KnownWords.begin(); Paragraph != KnownWords.end();
++Paragraph){ // iterate map again
string word = (*Paragraph).first;
int cnt = (*Paragraph).second.size();
displayFreqWords.insert(pair<int,string>(cnt,word));
}
// multimap<int,string>::iterator rit; // iterator for new map
cout <<" Word Count\n";
for(; count<=10 && rit!=displayFreqWords.rend(); rit++, ++count){
string word = (*rit).second;
int cnt = (*rit).first;
cout << setw(15) << word << setw(10) << cnt << endl;
}
}
// Displays 10 most frequent words in UnknownWords
void WordStats::DisplayMostFreqUnknownWords(){
int count;
multimap<int,string > displayFreqUnknownWords;
multimap<int,string >::reverse_iterator rrit =
displayFreqUnknownWords.rbegin();
for (Paragraph = UnknownWords.begin(); Paragraph !=
UnknownWords.end(); ++Paragraph){
string word = (*Paragraph).first;
int cnt = (*Paragraph).second.size();
displayFreqUnknownWords.insert(pair<int,string>(cnt,word));
}
// multimap<int,string>::iterator rit; // iterator for new map
cout <<" Word Count\n";
for(; count<=10 && rrit!=displayFreqUnknownWords.rend(); rrit++, ++count){
string wrd = (*rrit).second;
int ccnt = (*rrit).first;
cout << setw(15) << wrd << setw(10) << ccnt << endl;
}
}

Here's a way to express what I think is your use case. I have used c++17 tuple expansion.
I have used unordered_map to deduce which words are known or unknown, and two multimaps to determine known and unknown word frequency.
Hope it's helpful.
#include <sstream>
#include <tuple>
#include <string>
#include <unordered_map>
#include <algorithm>
#include <iterator>
#include <map>
#include <iostream>
#include <iomanip>
#include <fstream>
// Set this to 1 to run a static test
#define TESTING 0
#if TESTING
using input_type = std::istringstream;
std::tuple<input_type, input_type> open_inputs() {
return {
std::istringstream("the big black cat sat on the grey mat"),
std::istringstream("the gold small cat lay on the purple mat")
};
}
#else
using input_type = std::ifstream;
std::tuple<input_type, input_type> open_inputs() {
return {
std::ifstream("left_file.txt"),
std::ifstream("right_file.txt"),
};
}
#endif
struct Counts {
int left_count = 0, right_count = 0;
int total() const {
return left_count + right_count;
}
bool is_known() const {
return left_count && right_count;
}
};
template<class F>
void for_each_word_in_file(std::istream &is, F f) {
std::for_each(std::istream_iterator<std::string>(is),
std::istream_iterator<std::string>(),
f);
}
int main() {
// open files
auto[left, right] = open_inputs();
auto known_words = std::unordered_map<std::string, Counts>();
// count words in each file
for_each_word_in_file(left, [&known_words](auto &&word) {
++known_words[word].left_count;
});
for_each_word_in_file(right, [&known_words](auto &&word) {
++known_words[word].right_count;
});
// map counts to words, in descending order, allowing multiple entries of the same count
std::multimap<int, std::string, std::greater<>> known_ordered, unknown_ordered;
// iterate all words seen, putting into appropriate map
for (auto&&[word, counts] : known_words) {
(counts.is_known() ? known_ordered : unknown_ordered)
.emplace(counts.total(), word);
}
// emit results
std::cout << "Known words by frequency\n";
for (auto&&[freq, word] : known_ordered) {
std::cout << std::setw(15) << word << " " << freq << '\n';
}
std::cout << "\nUmknown words by frequency\n";
for (auto&&[freq, word] : unknown_ordered) {
std::cout << std::setw(15) << word << " " << freq << '\n';
}
}

Related

c++ stack overflow due to recursive function how can I improve the data handling

I'm tackling a exercise which is designed to cause exactly this problem, of overloading the memory. Pretty much I'm loading various file sizes from 1,000 to 5 million lines of entries like this in a txt file (1 line = 1 entry):
SHFIv,aiSdG
PlgNB,bPHoP
ZHWJU,gfwgC
UAygL,Vqvhi
BlyzX,LLbCo
jbvrT,Utblj
...
pretty much every entry has 2 values separated by comma, in my code, I separate these values and try to find another matching value, there are always only 2 exactly matching values and each time 1 value is found the other one with which it is paired points to another pair, and so on until the final one gets found.
For example SHFIv,aiSdG would point to aiSdG,YDUVo.
I know my code is not very efficient, partly due to using recursion, but I could'nt figure out a better way to do the job, so any suggestions on how to possibly improve it to handle larger inputs would be greatly appriciated
#include <iostream>
#include <fstream>
#include <string>
#include <sstream>
#include <map>
#include <unordered_map>
#include <stdio.h>
#include <vector>
#include <iterator>
#include <utility>
#include <functional>
#include <algorithm>
using namespace std;
template<typename T>
void search_bricks_backwards(string resume, vector<T>& vec, vector<string>& vec2) {
int index = 0;
for (const auto& pair : vec) {
//cout << "iteration " << index << endl;
if (pair.second == resume) {
vec2.insert(vec2.begin(), resume);
cout << "found " << resume << " and " << pair.second << endl;
search_bricks_backwards(pair.first, vec, vec2);
}
if (index + 1 == vec.size()) {
cout << "end of backward search, exitting..." << endl;
}
index++;
}
}
template<typename T>
void search_bricks(string start, vector<T>& vec, vector<string>& vec2) {
int index = 0;
for (const auto& pair : vec) {
//cout << "iteration " << index << endl;
if (pair.first == start) {
vec2.push_back(start);
cout << "found " << start << " and " << pair.first << endl;
search_bricks(pair.second, vec, vec2);
}
if (index + 1 == vec.size()) {
//search_bricks_backwards(start, vec, vec2);
// this also gets called on every recursion rather than just once
// as I originally intended when the forward iteration gets finished
}
index++;
}
}
template<typename T> // printing function
void printVectorElements(vector<T>& vec)
{
for (auto i = 0; i < vec.size(); ++i) {
cout << "(" << vec.at(i).first << ","
<< vec.at(i).second << ")" << endl ;
}
cout << endl;
}
vector<string> split(string s, string delimiter) { // filtering function
size_t pos_start = 0, pos_end, delim_len = delimiter.length();
string token;
vector<string> res;
while ((pos_end = s.find(delimiter, pos_start)) != string::npos) {
token = s.substr(pos_start, pos_end - pos_start);
pos_start = pos_end + delim_len;
res.push_back(token);
}
res.push_back(s.substr(pos_start));
return res;
}
int main()
{
vector<pair<string, string>> bricks;
vector<string> sorted_bricks;
ifstream inFile;
inFile.open("input-pairs-5K.txt"); // transferring data from .txt to a string
stringstream strStream;
strStream << inFile.rdbuf();
string str = strStream.str();
istringstream iss(str);
for (string line; getline(iss, line); )
// filtering data from string and dividing on ","
{
string delimiter = ",";
string s = line;
vector<string> v = split(s, delimiter);
string s1 = v.at(0);
string s2 = v.at(1);
bricks.push_back(make_pair(s1, s2));
}
search_bricks(bricks[0].second, bricks, sorted_bricks);
//printVectorElements(bricks);
//for (auto i = sorted_bricks.begin(); i != sorted_bricks.end(); ++i)
//cout << *i << " "; // this is just to check if vectors have data
}
Here is link to the 1k test data that works for me (only for the search bricks without backwards searching since it triggers on every recursion) again thanks for any suggestions on how to improve or get rid of the recursion. I don't code in c++ often and don't really know how else to tackle this.
Although implementing non-recursive version of your algorithm is canonical solution, if you really need to solve the problem without code modification, you can increase the stack size by modifying compiler option. ~100Mb will be usually sufficient.
In MSVC : /STACK:commit 104857600
In gcc : --stack, 104857600

How do I remove repeated words from a string and only show it once with their wordcount

Basically, I have to show each word with their count but repeated words show up again in my program.
How do I remove them by using loops or should I use 2d arrays to store both the word and count?
#include <iostream>
#include <stdio.h>
#include <iomanip>
#include <cstring>
#include <conio.h>
#include <time.h>
using namespace std;
char* getstring();
void xyz(char*);
void tokenizing(char*);
int main()
{
char* pa = getstring();
xyz(pa);
tokenizing(pa);
_getch();
}
char* getstring()
{
static char pa[100];
cout << "Enter a paragraph: " << endl;
cin.getline(pa, 1000, '#');
return pa;
}
void xyz(char* pa)
{
cout << pa << endl;
}
void tokenizing(char* pa)
{
char sepa[] = " ,.\n\t";
char* token;
char* nexttoken;
int size = strlen(pa);
token = strtok_s(pa, sepa, &nexttoken);
while (token != NULL) {
int wordcount = 0;
if (token != NULL) {
int sizex = strlen(token);
//char** fin;
int j;
for (int i = 0; i <= size; i++) {
for (j = 0; j < sizex; j++) {
if (pa[i + j] != token[j]) {
break;
}
}
if (j == sizex) {
wordcount++;
}
}
//for (int w = 0; w < size; w++)
//fin[w] = token;
//cout << fin[w];
cout << token;
cout << " " << wordcount << "\n";
}
token = strtok_s(NULL, sepa, &nexttoken);
}
}
This is the output I get:
I want to show, for example, the word "i" once with its count of 5, and then not show it again.
First of all, since you are using c++, I would recommend you to split text in c++ way(some examples are here), and store every word in map or unordered_map. Example of my realization you can find here
But if you don't want to rewrite your code, you can simply add a variable that will indicate whether a copy of the word was found before or after the word position. If a copy was not found in front, then print your word
This post gives an example to save each word from your 'strtok' function into a vector of string. Then, use string.compare to have each word compared with word[0]. Those indexes match with word[0] are marked in an int array 'used'. The count of match equals to the number marks in the array used ('nused'). Those words of marked are then removed from the vector, and the remaining carries on to the next comparing process. The program ends when no word remained.
You may write a word comparing function to replace 'str.compare(str2)', if you prefer not to use std::vector and std::string.
#include <iostream>
#include <string>
#include <vector>
#include<iomanip>
#include<cstring>
using namespace std;
char* getstring();
void xyz(char*);
void tokenizing(char*);
int main()
{
char* pa = getstring();
xyz(pa);
tokenizing(pa);
}
char* getstring()
{
static char pa[100] = "this is a test and is a test and is test.";
return pa;
}
void xyz(char* pa)
{
cout << pa << endl;
}
void tokenizing(char* pa)
{
char sepa[] = " ,.\n\t";
char* token;
char* nexttoken;
std::vector<std::string> word;
int used[64];
std::string tok;
int nword = 0, nsize, nused;
int size = strlen(pa);
token = strtok_s(pa, sepa, &nexttoken);
while (token)
{
word.push_back(token);
++nword;
token = strtok_s(NULL, sepa, &nexttoken);
}
for (int i = 0; i<nword; i++) std::cout << word[i] << std::endl;
std::cout << "total " << nword << " words.\n" << std::endl;
nsize = nword;
while (nsize > 0)
{
nused = 0;
tok = word[0] ;
used[nused++] = 0;
for (int i=1; i<nsize; i++)
{
if ( tok.compare(word[i]) == 0 )
{
used[nused++] = i; }
}
std::cout << tok << " : " << nused << std::endl;
for (int i=nused-1; i>=0; --i)
{
for (int j=used[i]; j<(nsize+i-nused); j++) word[j] = word[j+1];
}
nsize -= nused;
}
}
Notice that the removal of used words has to do in backward order. If you do it in sequential order, the marked indexes in the 'used' array will need to be changed. A running test:
$ ./a.out
this is a test and is a test and is test.
this
is
a
test
and
is
a
test
and
is
test
total 11 words.
this : 1
is : 3
a : 2
test : 3
and : 2
I read your last comment.
But I am very sorry, I do not know C. So, I will answer in C++.
But anyway, I will answer with the C++ standard approach. That is usually only 10 lines of code . . .
#include <iostream>
#include <algorithm>
#include <map>
#include <string>
#include <regex>
// Regex Helpers
// Regex to find a word
static const std::regex reWord{ R"(\w+)" };
// Result of search for one word in the string
static std::smatch smWord;
int main() {
std::cout << "\nPlease enter text: \n";
if (std::string line; std::getline(std::cin, line)) {
// Words and its appearance count
std::map<std::string, int> words{};
// Count the words
for (std::string s{ line }; std::regex_search(s, smWord, reWord); s = smWord.suffix())
words[smWord[0]]++;
// Show result
for (const auto& [word, count] : words) std::cout << word << "\t\t--> " << count << '\n';
}
return 0;
}

How to compare two text files and find the similarities between then?

i have loaded both of my files into an array and im trying to compare both of the files to get the comparisons inside the file. However when I run my code I don't receive an output.
This is the contents of both files.
file1
tdogicatzhpigu
file2
dog
pig
cat
rat
fox
cow
So when it does a comparison between the words from search1.txt and the words from text1.txt. I want to find the occurence of each word from search1.txt in text1.txt
What I want to eventually output is whether it has been found the index of the location inside the array.
e.g
"dog". Found, location 1.
Here is my code
#include <iostream>
#include <fstream>
#include <string>
using namespace std;
int main()
{
ifstream file1("text1.txt");
if (file1.is_open())
{
string myArray[1];
for (int i = 0; i < 1; i++)
{
file1 >> myArray[i];
any further help would be greatly appreciated. Thanks in advance.
I believe the goal is to search the text in file1 for each word in file2.
You can't use equality for the two strings, as they aren't equal. You'll need to use the std::string::find method:
std::string target_string;
std::getline(file1, target_string);
std::string keyword;
while (getline(file2, keyword))
{
const std::string::size_type position = target_string.find(keyword);
std::cout << "string " << keyword << " ";
if (position == std::string::npos)
{
std::cout << "not found.\n";
}
else
{
std::cout << "found at position " << position << "\n";
}
}
Edit 1:
An implemented example:
#include <iostream>
#include <string>
using std::cout;
using std::string;
using std::endl;
int main()
{
const std::string target_string = "tdogicatzhpigu";
const std::string key_list[] =
{
"dog",
"pig",
"cat",
"rat",
"fox",
"cow",
};
static const unsigned int key_quantity =
sizeof(key_list) / sizeof(key_list[0]);
for (unsigned int i = 0; i < key_quantity; ++i)
{
const std::string::size_type position = target_string.find(key_list[i]);
std::cout << "string " << key_list[i] << " ";
if (position == std::string::npos)
{
std::cout << "not found.\n";
}
else
{
std::cout << "found at position " << position << "\n";
}
}
return 0;
}

C++ reading sentences

string a = MwZwXxZwDwJrBxHrHxMrGrJrGwHxMrFrZrZrDrKwZxLrZrFwZxErMrXxArZw;
Assume i have this data in my string . I want to record how many M , Z , X , D , J (including those capital letters i didn't mentions ) in in string how can do it ? My friends say use vector can do it but i does not really know how to use vector is there any alternative way to do it .
I tried using for loops to do and find the M , and reset the pointer to 0 to continue find the next capital value , but not sure is there any easier way to do it .
first I'll show you a 'easier' way to me.
#include <iostream>
#include <map>
using namespace std;
int main(int argc, const char * argv[]) {
string str = "MwZwXxZwDwJrBxHrHxMrGrJrGwHxMrFrZrZrDrKwZxLrZrFwZxErMrXxArZw";
map<char,int> map;
for (int i=0; i<str.length(); i++) {
char ch = str[i];
if (isupper(ch)) {
map[ch] ++;
}
}
for (auto item : map) {
cout<<item.first<<':'<<item.second<<endl;
}
return 0;
}
you'll only need to use 1 loop to solve your problem.
the 'isupper(int _c)' is a function from the standard library, it can tell you wether a character is a capital letter.
the 'map' is a data structure from the standard library too, it can do key-value storage for you.
this program outputs this:
A:1
B:1
D:2
E:1
F:2
G:2
H:3
J:2
K:1
L:1
M:4
X:2
Z:8
is this what you want?
Use regex.
using namespace std;
// regex_search example
#include <iostream>
#include <string>
#include <regex>
int main ()
{
std::string s ("MwZwXxZwDwJrBxHrHxMrGrJrGwHxMrFrZrZrDrKwZxLrZrFwZxErMrXxArZw;");
std::smatch m;
std::regex e ("[A-Z\s]+");
map<string,int> map;
std::cout << "Target sequence: " << s << std::endl;
std::cout << "Regular expression: [A-Z\s]+" << std::endl;
std::cout << "The following matches and submatches were found:" << std::endl;
while (std::regex_search (s,m,e)) {
for (auto x:m)
{
//cout << x << " ";
map[x.str()] ++;
}
//cout << std::endl;
s = m.suffix().str();
}
for (auto item : map) {
cout<<item.first<<':'<<item.second<<endl;
}
return 0;
}
The most direct translation of "loop through the string and count the uppercase letters" into C++ I can think of:
#include <iostream>
#include <map>
#include <cctype>
int main()
{
string a = "MwZwXxZwDwJrBxHrHxMrGrJrGwHxMrFrZrZrDrKwZxLrZrFwZxErMrXxArZw";
std::map<char, int> count;
// Loop through the string...
for (auto c: a)
{
// ... and count the uppercase letters.
if (std::isupper(c))
{
count[c] += 1;
}
}
// Show the result.
for (auto it: count)
{
std::cout << it.first << ": " << it.second << std::endl;
}
}

How to save unknown size structure (for later retrieval)

My plan is to store a couple dozen rows with 2 items per row and both items will have a different data type. Not sure if this is the right approach and have heard about using vectors but I can't find any samples that will take in 2 items with different types with many rows (an unknown amount of rows) similar to what I'm trying to do here. The following doesn't compile
#include <iostream>
#include <string>
#include <sstream>
using namespace std;
struct movies_t {
string title;
int year;
} myRecNo[];
void printmovie (movies_t movie);
int main ()
{
string mystr;
for (int i=0; i < 2; i++)
{
switch (i)
{
case 1:
myRecNo[i].title = "2001 A Space Odyssey";
myRecNo[i].year = 1968;
cout << "Auto entry is:\n ";
printmovie (myRecNo[i]);
break;
case 2:
myRecNo[i].title = "2002 A Space Odyssey";
myRecNo[i].year = 1978;
cout << "Auto entry is:\n ";
printmovie (myRecNo[i]);
break;
}
}
return 0;
}
void printmovie (movies_t movie)
{
cout << movie.title;
cout << " (" << movie.year << ")\n";
}
This is the error I get:
Test1.obj||error LNK2019: unresolved external symbol "struct movies_t * myRecNo" (?myRecNo##3PAUmovies_t##A) referenced in function _main|
There are a couple of bad practices in your code, if you are just asking for a way to modify the program so that it will compile and work, see the following:
Declare struct and create struct variables in your main function.
struct movies_t
{
string title;
int year;
};
then, in your main function, movies_t myRecNo[2];
Arrays start at index 0, not 1. so your switch should be
switch (i)
{
case 0:
myRecNo[i].title = "2001 A Space Odyssey";
myRecNo[i].year = 1968;
cout << "Auto entry is:\n ";
printmovie(myRecNo[i]);
break;
case 1:
myRecNo[i].title = "2002 A Space Odyssey";
myRecNo[i].year = 1978;
cout << "Auto entry is:\n ";
printmovie(myRecNo[i]);
break;
}
// the rest of the code..
After you modify these, your code should work.
However, for a better data structure to save an array of paired values, you can use std::vector<std::pair<string, int>> myReg to save your data.
the following code should be much much better, remember to #include <vector>
#include <iostream>
#include <string>
#include <sstream>
#include <vector>
void printmovie(std::vector<std::pair<std::string, int>>);
int main()
{
std::vector<std::pair<std::string, int>> myReg;
myReg.push_back({ "2001 A Space Odyssey", 1968 });
myReg.push_back({ "2002 A Space Odyssey", 1978 }); // <- if your compiler is not using c++11 standard or above, please change this line to myReg.push_back(std::pair<std::string, int>("name of the movie", int)); to use to older version of Initializer
printmovie(myReg);
return 0;
}
void printmovie(std::vector<std::pair<std::string, int>> movie)
{
for (auto itor = movie.begin(); itor != movie.end(); ++itor)
{
//first is the first data in the pair, which is the title
//second is the second data in the pair, which is the year
std::cout << (*itor).first << " (" << (*itor).second << ")\n";
}
}
Thanks everyone & #Zhou.
Zhou's code above might work on a newer version of the compiler but I'm using Code::Blocks IDE with MS Visual C++ 2010 compiler.
Here is the vector method that worked:
#include <iostream>
#include <string>
#include <sstream>
#include <vector>
void printmovie(std::vector<std::pair<std::string, int>>);
int main()
{
std::vector<std::pair<std::string, int>> myReg;
myReg.push_back(std::pair<std::string, int>("title of the movie", 1968));
myReg.push_back(std::pair<std::string, int>("title of the movie2", 1978));
//myReg.push_back({ "2001 A Space Odyssey", 1968 });
//myReg.push_back({ "2002 A Space Odyssey", 1978 });
printmovie(myReg);
//or to print a single element (the 2nd row) thanks #zhou
std::cout << myReg[1].first << " " << myReg[1].second << std::endl;
return 0;
}
void printmovie(std::vector<std::pair<std::string, int>> movie)
{
for (auto itor = movie.begin(); itor != movie.end(); ++itor)
{
//first is the first data in the pair, which is the title
//second is the second data in the pair, which is the year
std::cout << (*itor).first << " (" << (*itor).second << ")\n";
}
}