The issue I'm having with this code stems from the last block of code for the get_words_beginning_s function.
/*
Name: xx
Date: xx
Purpose:Read text from file, count number of words, unique words, word frequency, & number of words that begin with letter 's'
*/
#include <iostream>
#include <fstream>
#include <string>
#include <set>
using namespace std;
multiset<string> display_and_load_words(string filename);
set<string> get_unique_words(multiset<string>& words);
set<string> get_words_beginning_s(multiset<string>& words);
int main() {
cout << "The Word Counter program\n\n";
string filename = "dickens.txt";
cout << "FILE TEXT: ";
//display_text(filename);
auto words = display_and_load_words(filename);
cout << "WORD COUNT: " << words.size() << endl << endl;
auto unique_words = get_unique_words(words);
auto words_beginning_s = get_words_beginning_s(words);
cout << unique_words.size() << " UNIQUE WORDS: ";
for (string word : unique_words) {
cout << word << ' ';
}
cout << endl << endl;
cout << "COUNT PER WORD: ";
for (string word : unique_words) {
cout << word << '=' << words.count(word) << ' ';
}
cout << endl << endl;
cout << "WORDS THAT BEGIN WITH 'S': ";
for (string word : words_beginning_s) {
cout << word << ' ';
}
cout << endl << endl;
}
multiset<string> display_and_load_words(string filename) {
multiset<string> words;
ifstream infile(filename);
if (infile) {
string word;
while (infile >> word) {
cout << word << ' ';
string new_word = "";
for (char c : word) {
if (c == '.' || c == ',') {
continue; // remove punctuation
}
else if (isupper(c)) {
new_word += tolower(c); // convert to lowercase
}
else {
new_word += c;
}
}
words.insert(new_word); // add word
}
cout << endl << endl;
infile.close();
}
return words;
}
set<string> get_unique_words(multiset<string>& words) {
set<string> unique_words;
for (string word : words) {
auto search = unique_words.find(word);
if (search == unique_words.end()) {
unique_words.insert(word);
}
}
return unique_words;
}
set<string> get_words_beginning_s(multiset<string>& words) {
set<string> words_beginning_s;
for (string word : words) {
auto search = words_beginning_s.find(word);
if (search == words_beginning_s.end()) {
for (int i = 0; i < words_beginning_s.size(); ++i) {
if (words_beginning_s[0] == 's') {
words_beginning_s.insert(word);
}
}
}
}
return words_beginning_s;
}
If working with set/multiset, how does one compare positional values within each separate word itself, rather the entire words? Example string in text file - "John goes to the store": Whereas normally a simple for loop can be used with the initial position to compare values and count number of times it appears (something like)-
for (int i = 0; i < words_beginning_s.length(); ++i) {
if (words_beginning_s[0] == 's') {
++s_word;
}
This does not work when using set/multiset. Pretty new to this, so sorry if this question seems dumb.
You can use the multisets member function lower_bound to get iterators to a range and then create a set from that range.
Example:
#include <iostream>
#include <set>
#include <string>
std::set<std::string> get_words_beginning_s(const std::multiset<std::string>& words) {
// create a set from the iterators you get from lower_bound("s") and lower_bound("t"):
return {words.lower_bound("s"), words.lower_bound("t")};
}
int main() {
std::multiset<std::string> words{
"foo", "slayer", "bar", "sepultura", "tesseract", "skinny puppy", "yello"
};
for(const std::string& word : get_words_beginning_s(words)) {
std::cout << word << '\n';
}
}
Output:
sepultura
skinny puppy
slayer
Related
This is making no sense to me. I do something to the code and it works only the first time. I then test it again and it goes back to not including the last element in the vector. I have no idea what I am doing wrong. Please help.
cout << "Enter a sentence: " << endl;
getline(cin, sentence);
for (auto x : sentence) // stores individual words in the vector
{
if (x == ' ')
{
myString.push_back(word);
cout << word << endl;
word = " ";
}
else
{
word = word + x;
}
}
for (auto elem : myString)
{
cout << elem << endl;
}
You are scanning the sentence one letter at a time, appending each letter to word until you encounter a space, and only then are you inserting the word into the vector. So, if the sentence does not end with a space, the last word won't be inserted into the vector.
There are a few different ways you can fix that:
check if word is not empty after the loop exits, and if not then insert it into the vector:
cout << "Enter a sentence: " << endl;
getline(cin, sentence);
for (auto x : sentence)
{
if (isspace(static_cast<unsigned char>(x))
{
if (!word.empty())
{
myString.push_back(word);
word = "";
}
}
else
{
word += x;
}
}
if (!word.empty())
{
myString.push_back(word);
}
for (const auto &elem : myString)
{
cout << elem << endl;
}
Scan for word boundaries yourself, such as with string::find_first_(not_)of():
cout << "Enter a sentence: " << endl;
getline(cin, sentence);
const char* wspace = " \f\n\r\t\v";
size_t start = 0, end;
while ((start = sentence.find_first_not_of(wspace, start)) != string::npos)
{
end = sentence.find_first_of(wspace, start + 1));
if (end == string::npos)
{
myString.push_back(sentence.substr(start));
break;
}
myString.push_back(sentence.substr(start, end-start));
start = end + 1;
}
for (const auto &elem : myString)
{
cout << elem << endl;
}
put the sentence into a std::istringstream and then use operator>> to extract complete whitespace-separate words from it. Let the standard library do all the heavy parsing for you:
cout << "Enter a sentence: " << endl;
getline(cin, sentence);
istringstream iss(sentence);
while (iss >> word)
{
myString.push_back(word);
}
for (const auto &elem : myString)
{
cout << elem << endl;
}
If there is no space after the last word, you are not adding it to your vector.
This code should say if a word is present in a sentence or not. When I insert the sentence and the word where I declare the strings(for exemple: string s = "the cat is on the table" string p = "table" the program says that the word is in the sentence) the code works but, with the getline, the for cycle never begin and it always says that the word isn't in the sentence.
Please help I dont know what to do
#include <iostream>
#include <string>
using namespace std;
int main () {
string s;
string p;
string word;
bool found = false;
int sl = s.length();
int beg = 0;
int pl = p.length();
cout << "sentence: ";
getline(cin, s);
cout << "word: ";
getline(cin, p);
for(int a = 0; a<sl; a++)
{
if(s[a]== ' ')
{
word = s.substr(beg, a-beg);
if (word== p)
{
found = true;
break;
}
beg = a+1;
}
}
if (found== true)
{
cout <<"word " << p << " is in a sentence " << s;
}
else
{
word = s.substr(beg);
if (word== p)
{
found = true;
}
if(found == true)
{
cout <<"the word " << p << " is in the sentence " << s;
}
else
{
cout <<"the word " << p << " isn't in the sentence " << s;
}
}
}
after taking the input strings then use length() to find the length, otherwise you are not taking the actual size of the strings.
getline(cin, s);
getline(cin, p);
int sl = s.length();
int pl = p.length();
For splitting the words after taking the input string by getline() you can use stringstream which is a builtin c++ function, like :
#include <sstream>
#include <iostream>
using namespace std;
int main(){
string arr;
getline(cin, arr);
stringstream ss(arr);
string word;
while(ss >> word){
// your desired strings are in `word` one by one
cout << word << "\n";
}
}
Another thing is that you can declare the strings like string s, p, word;
I'm writing a wordcount function that should be able to read elements from stdin into a string. Then evaluate the string and return the number of words, number of lines, size of the string, and the number of unique words.
My issue is when it comes to adding words to the unique set. When I write it to add elements to a set, it would count the whitespace as part of the word then push entirely into my set.
Example:
Input:
this is
is
a test
test
Output
a
test
is test this
line is 4
Words = 7
size is 27
Unique is 6
It counts 7 words in total and 6 unique. I tried debugging it by printing bits of the code as i go so i can keep track of where I went wrong. I can only conclude that the issue lies within my if loops. How can I get past this, I've been stuck for some time now.
Here is my code:
#include<iostream>
#include<string>
#include<set>
using std::string;
using std::set;
using std::cin;
using std::cout;
set<string> UNIQUE;
size_t sfind(const string s) //will take string a count words, add to set
{
string a;
int linecount = 0;
int state = 0; //0 represents reading whitespace/tab, 1 = reading letter
int count = 0; //word count
for(size_t i =0; i < s.length(); i++) {
a+=s[i]; //add to new string to add to set
if(state ==0) { //start at whitespace
if(state != ' ' && state != '\t') { //we didnt read whitespace
count++;
state =1;
}
}
else if(s[i]== ' ' || s[i] == '\t' || s[i] == '\n') {
state = 0;
UNIQUE.insert(a); //add to UNIQUE words
a.clear(); // clear and reset the string
}
if (s[i] == '\n') {
linecount++;
}
}
for(set<string>::iterator i = UNIQUE.begin(); i!= UNIQUE.end(); i++) {
cout << *i;
}
cout << '\n';
cout << "line is " << linecount << '\n';
return count;
}
int main()
{
char c;
string s;
while(fread(&c,1,1,stdin)) {
s+=c; //read element add to string
}
cout << "Words = " << sfind(s) << '\n';
cout << "size is " << s.length() << '\n';
cout << "Unique is "<< UNIQUE.size() << '\n';
return 0;
}
Also I will be using
fread(&c,1,1,stdin)
because i will be using it later on with a larger wordcount function.
Rather than writing code trying to parse the string on spaces, use std::istringstream to do the parsing.
Here is an example:
#include <string>
#include <iostream>
#include <sstream>
#include <set>
int main()
{
std::set<std::string> stringSet;
std::string line;
while (std::getline(std::cin, line))
{
std::istringstream oneline(line);
std::string word;
while (oneline >> word)
{
std::cout << word << "\n";
stringSet.insert(word);
}
}
std::cout << "\n\nThere are " << stringSet.size() << " unique words";
}
Live Example
How do I read the new line character? I am trying to do a character count, but the new line gets in the way. I tried doing if (text[i] == ' ' && text[i] == '\n') but that didn't work. Here is my repl.it session.
I am trying to read this from file.txt:
i like cats
dogs are also cool
so are orangutans
This is my code:
#include <iostream>
#include <fstream>
using namespace std;
int main()
{
ifstream input;
input.open("file.txt");
int numOfWords = 0;
while (true)
{
string text;
getline(input, text);
for(int i = 0; i < text.length(); i++)
{
if (text[i] == ' ')
{
numOfWords++;
}
}
if (input.fail())
{
break;
}
}
cout << "Number of words: " << numOfWords+1 << endl;
input.close();
}
Your question is asking how to count characters, but your code is counting words instead. std::getline() swallows line breaks. You don't need to worry about them if you want to count words. In fact, you can use operator>> to greatly simplify your counting in that case, eg:
int main()
{
ifstream input("file.txt");
int numOfWords = 0;
string word;
while (input >> word)
++numOfWords;
cout << "Number of words: " << numOfWords << endl;
return 0;
}
If you really want to count characters instead of words, use std::ifstream::get() to read the file 1 character at a time, eg:
int main()
{
ifstream input("file.txt");
int numOfChars = 0;
int numOfWords = 0;
bool isInSpace = true;
char ch;
while (input.get(ch))
{
++numOfChars;
if (std::isspace(ch, input.getloc())) {
isInSpace = true;
}
else if (isInSpace) {
isInSpace = false;
++numOfWords;
}
}
cout << "Number of chars: " << numOfChars << endl;
cout << "Number of words: " << numOfWords << endl;
return 0;
}
I'm implementing a function where the user can search for a word in a vector. The only problem is, my search function is only finding certain words and I'm not sure why.
ifstream in("testdata.txt");
string word1;
vector<string> individual_words;
while (in >> word1)
{
individual_words.push_back(word1);
}
Inside the file testdata.txt is:
Hello how are you
Good are you well?
Snazzy piece of toast
Here is the code where I compare the two words.
string search_word;
while (cin >> search_word)
{
for (int f=0; f < individual_words.size(); f ++)
{
cout << "individual words: " << individual_words[f] <<endl;
cout << "search word: " << search_word;
if (search_word == individual_words[f])
{
cout << " FOUND THE SAME WORD\n!";
break;
}
}
}
For some reason it's only catching certain words in a .txt file and I'm not exactly sure why. I've looked at it and it looks like it ignores the first word and it ignores every last word on each sentence.
Your vector will have duplicates, so it will only find the first occurrences of the words "are" and "you" before your loop breaks. Logically, there is nothing else wrong in this section of code, though it would be better written as:
#include <algorithm>
#include <iostream>
#include <iterator>
#include <string>
#include <sstream>
#include <vector>
using namespace std;
int main()
{
// simplified for demonstration purposes
string test = "Hello how are you\nGood are you well?\nSnazzy piece of toast";
istringstream iss(test);
vector<string> words;
copy(istream_iterator<string>(iss), istream_iterator<string>(), back_inserter(words));
string search_word;
while (cin >> search_word)
{
// this works, but is unnecessary
/*for (int f=0; f < words.size(); f ++)
{
cout << "individual words: " << words[f] <<endl;
cout << "search word: " << search_word;
if (search_word == words[f])
{
cout << " FOUND THE SAME WORD\n!";
break;
}
}*/
// this is a better approach
vector<string>::iterator it = find(words.begin(), words.end(), search_word);
if (it != words.end())
{
cout << "Found the word: " << *it << endl;
}
else
{
cout << "Not found!" << endl;
}
}
return 0;
}