i have a long string variable and i want to search in it for specific words and limit text according to thoses words.
Say i have the following text :
"This amazing new wearable audio solution features a working speaker embedded into the front of the shirt and can play music or sound effects appropriate for any situation. It's just like starring in your own movie"
and the words : "solution" , "movie".
I want to substract from the big string (like google in results page):
"...new wearable audio solution features a working speaker embedded..."
and
"...just like starring in your own movie"
for that i'm using the code :
for (std::vector<string>::iterator it = words.begin(); it != words.end(); ++it)
{
int loc1 = (int)desc.find( *it, 0 );
if( loc1 != string::npos )
{
while(desc.at(loc1-i) && i<=80)
{
i++;
from=loc1-i;
if(i==80) fromdots=true;
}
i=0;
while(desc.at(loc1+(int)(*it).size()+i) && i<=80)
{
i++;
to=loc1+(int)(*it).size()+i;
if(i==80) todots=true;
}
for(int i=from;i<=to;i++)
{
if(fromdots) mini+="...";
mini+=desc.at(i);
if(todots) mini+="...";
}
}
but desc.at(loc1-i) causes OutOfRange exception... I don't know how to check if that position exists without causing an exception !
Help please!
This is an excellent exercise in taking advantage of what the STL has to offer. You simply open a reference and cherry-pick algorithms and classes for your solution!
#include <iostream> // algorithm,string,list,cctype,functional,boost/assign.hpp
using namespace std;
struct remove_from {
remove_from(string& text) : text(text) { }
void operator()(const string& str) {
typedef string::iterator striter;
striter e(search(text.begin(), text.end(), str.begin(), str.end()));
while( e != text.end() ) {
striter b = e;
advance(e, str.length());
e = find_if(e, text.end(), not1(ptr_fun<int,int>(isspace)));
text.erase(b, e);
e = search(text.begin(), text.end(), str.begin(), str.end());
}
}
private:
string& text;
};
int main(int argc, char* argv[])
{
list<string> toremove = boost::assign::list_of("solution")("movie");
string text("This amazing new wearable ...");
for_each(toremove.begin(), toremove.end(), remove_from(text));
cout << text << endl;
return 0;
}
You can just check desc.size() - if it's less than the index you're looking up + 1 then you'll get an exception
The problem is that you start iterating at the first word, then try and check the word before it, hence the OutOfRange Exception.
Your first if could be:
if( loc1 != string::npos && loc1 != 0)
Related
I'm a C# programmer that recently wanted to delve into something lower level so last week started learning C++ but have stumbled on something I thought would be fairly simple.
I enter the following string into my program:
"this is a test this test" and would expect the wordStructList to contain a list of 4 words, with occurrences of "test" and "this" set to 2. When debugging however, the string comparison (I've tried .compare and ==) always seems to increasing the value of occurrences no matter whether the comparison is true.
e.g. currentName = "is"
word = "this"
but occurrences is still been incremented.
#include "stdafx.h"
using std::string;
using std::vector;
using std::find;
using std::distance;
struct Word
{
string name;
int occurrences;
};
struct find_word : std::unary_function<Word, bool>
{
string name;
find_word(string name):name(name) { }
bool operator()(Word const& w) const
{
return w.name == name;
}
};
Word GetWordStruct(string name)
{
Word word;
word.name = name;
word.occurrences = 1;
return word;
}
int main(int argc, char argv[])
{
string s;
string delimiter = " ";
vector<string> wordStringList;
getline(std::cin, s);
do
{
wordStringList.push_back(s.substr(0, s.find(delimiter)));
s.erase(0, s.find(delimiter) + delimiter.length());
if (s.find(delimiter) == -1)
{
wordStringList.push_back(s);
s = "";
}
} while (s != "");
vector<Word> wordStructList;
for (int i = 0; i < wordStringList.size(); i++)
{
Word newWord;
vector<Word>::iterator it = find_if(wordStructList.begin(), wordStructList.end(), find_word(wordStringList[i]));
if (it == wordStructList.end())
wordStructList.push_back(GetWordStruct(wordStringList[i]));
else
{
string word(wordStringList[i]);
for (vector<Word>::size_type j = 0; j != wordStructList.size(); ++j)
{
string currentName = wordStructList[j].name;
if(currentName.compare(word) == 0);
wordStructList[j].occurrences++;
}
}
}
return 0;
}
I hope the question makes sense. Anyone shed any light on this? I'm also open to any tips about how to make the code more sensible/readable. Thanks
The problem is the semicolon after this if statement:
if(currentName.compare(word) == 0);
The semicolon terminates the statement, so the next line
wordStructList[j].occurrences++;
is not part of the if statement any more and will always be executed.
Alright, guys ...
Here's my set that has all the letters. I'm defining a word as consisting of consecutive letters from the set.
const char LETTERS_ARR[] = {"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"};
const std::set<char> LETTERS_SET(LETTERS_ARR, LETTERS_ARR + sizeof(LETTERS_ARR)/sizeof(char));
I was hoping that this function would take in a string representing a sentence and return a vector of strings that are the individual words in the sentence.
std::vector<std::string> get_sntnc_wrds(std::string S) {
std::vector<std::string> retvec;
std::string::iterator it = S.begin();
while (it != S.end()) {
if (LETTERS_SET.count(*it) == 1) {
std::string str(1,*it);
int k(0);
while (((it+k+1) != S.end()) && (LETTERS_SET.count(*(it+k+1) == 1))) {
str.push_back(*(it + (++k)));
}
retvec.push_back(str);
it += k;
}
else {
++it;
}
}
return retvec;
}
For instance, the following call should return a vector of the strings "Yo", "dawg", etc.
std::string mystring("Yo, dawg, I heard you life functions, so we put a function inside your function so you can derive while you derive.");
std::vector<std::string> mystringvec = get_sntnc_wrds(mystring);
But everything isn't going as planned. I tried running my code and it was putting the entire sentence into the first and only element of the vector. My function is very messy code and perhaps you can help me come up with a simpler version. I don't expect you to be able to trace my thought process in my pitiful attempt at writing that function.
Try this instead:
#include <vector>
#include <cctype>
#include <string>
#include <algorithm>
// true if the argument is whitespace, false otherwise
bool space(char c)
{
return isspace(c);
}
// false if the argument is whitespace, true otherwise
bool not_space(char c)
{
return !isspace(c);
}
vector<string> split(const string& str)
{
typedef string::const_iterator iter;
vector<string> ret;
iter i = str.begin();
while (i != str.end())
{
// ignore leading blanks
i = find_if(i, str.end(), not_space);
// find end of next word
iter j = find_if(i, str.end(), space);
// copy the characters in [i, j)
if (i != str.end())
ret.push_back(string(i, j));
i = j;
}
return ret;
}
The split function will return a vector of strings, each element containing one word.
This code is taken from the Accelerated C++ book, so it's not mine, but it works. There are other superb examples of using containers and algorithms for solving every-day problems in this book. I could even get a one-liner to show the contents of a file at the output console. Highly recommended.
It's just a bracketing issue, my advice is (almost) never put in more brackets than are necessary, it's only confuses things
while (it+k+1 != S.end() && LETTERS_SET.count(*(it+k+1)) == 1) {
Your code compares the character with 1 not the return value of count.
Also although count does return an integer in this context I would simplify further and treat the return as a boolean
while (it+k+1 != S.end() && LETTERS_SET.count(*(it+k+1))) {
You should use the string steam with std::copy like so:
#include <iostream>
#include <string>
#include <sstream>
#include <algorithm>
#include <iterator>
#include <vector>
int main() {
std::string sentence = "And I feel fine...";
std::istringstream iss(sentence);
std::vector<std::string> split;
std::copy(std::istream_iterator<std::string>(iss),
std::istream_iterator<std::string>(),
std::back_inserter(split));
// This is to print the vector
for(auto iter = split.begin();
iter != split.end();
++iter)
{
std::cout << *iter << "\n";
}
}
I would use another more simple approach based on member functions of class std::string. For example
const char LETTERS[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
std::string s( "This12 34is 56a78 test." );
std::vector<std::string> v;
for ( std::string::size_type first = s.find_first_of( LETTERS, 0 );
first != std::string::npos;
first = s.find_first_of( LETTERS, first ) )
{
std::string::size_type last = s.find_first_not_of( LETTERS, first );
v.push_back(
std::string( s, first, last == std::string::npos ? std::string::npos : last - first ) );
first = last;
}
for ( const std::string &s : v ) std::cout << s << ' ';
std::cout << std::endl;
Here you make 2 mistakes, I have correct in the following code.
First, it should be
while (((it+k+1) != S.end()) && (LETTERS_SET.count(*(it+k+1)) == 1))
and, it should move to next by
it += (k+1);
and the code is
std::vector<std::string> get_sntnc_wrds(std::string S) {
std::vector<std::string> retvec;
std::string::iterator it = S.begin();
while (it != S.end()) {
if (LETTERS_SET.count(*it) == 1) {
std::string str(1,*it);
int k(0);
while (((it+k+1) != S.end()) && (LETTERS_SET.count(*(it+k+1)) == 1)) {
str.push_back(*(it + (++k)));
}
retvec.push_back(str);
it += (k+1);
}
else {
++it;
}
}
return retvec;
}
The output have been tested.
I am currently trying to count the number of words in a file. After this, I plan to make it count the words between two words in the file. For example. My file may contain. "Hello my name is James". I want to count the words, so 5. And then I would like to count the number of words between "Hello" and "James", so the answer would be 3. I am having trouble with accomplishing both tasks.
Mainly due to not being exactly sure how to structure my code.
Any help on here would be greatly appreciated. The code I am currently using is using spaces to count the words.
Here is my code:
readwords.cpp
string ReadWords::getNextWord()
{
bool pWord = false;
char c;
while((c = wordfile.get()) !=EOF)
{
if (!(isspace(c)))
{
nextword.append(1, c);
}
return nextword;
}
}
bool ReadWords::isNextWord()
{
if(!wordfile.eof())
{
return true;
}
else
{
return false;
}
}
main.cpp
main()
{
int count = 0;
ReadWords rw("hamlet.txt");
while(rw.isNextWord()){
rw.getNextWord();
count++;
}
cout << count;
rw.close();
}
What it does at the moment is counts the number of characters. I'm sure its just a simple fix and something silly that I'm missing. But I've been trying for long enough to go searching for some help.
Any help is greatly appreciated. :)
Rather than parse the file character-by-character, you can simply use istream::operator<<() to read whitespace-separated words. << returns the stream, which evaluates to true as a bool when the stream can still be read from.
vector<string> words;
string word;
while (wordfile >> word)
words.push_back(word);
There is a common formulation of this using the <iterator> and <algorithm> utilities, which is more verbose, but can be composed with other iterator algorithms:
istream_iterator<string> input(wordfile), end;
copy(input, end, back_inserter(words));
Then you have the number of words and can do with them whatever you like:
words.size()
If you want to find "Hello" and "James", use find() from the <algorithm> header to get iterators to their positions:
// Find "Hello" anywhere in 'words'.
const auto hello = find(words.begin(), words.end(), "Hello");
// Find "James" anywhere after 'hello' in 'words'.
const auto james = find(hello, words.end(), "James");
If they’re not in the vector, find() will return words.end(); ignoring error checking for the purpose of illustration, you can count the number of words between them by taking their difference, adjusting for the inclusion of "Hello" in the range:
const auto count = james - (hello + 1);
You can use operator-() here because std::vector::iterator is a “random-access iterator”. More generally, you could use std::distance() from <iterator>:
const auto count = distance(hello, james) - 1;
Which has the advantage of being more descriptive of what you’re actually doing. Also, for future reference, this kind of code:
bool f() {
if (x) {
return true;
} else {
return false;
}
}
Can be simplified to just:
bool f() {
return x;
}
Since x is already being converted to bool for the if.
To count:
std::ifstream infile("hamlet.txt");
std::size_t count = 0;
for (std::string word; infile >> word; ++count) { }
To count only between start and stop:
std::ifstream infile("hamlet.txt");
std::size_t count = 0;
bool active = false;
for (std::string word; infile >> word; )
{
if (!active && word == "Hello") { active = true; }
if (!active) continue;
if (word == "James") break;
++count;
}
I think "return nextword;" should instead be "else return nextword;" or else you are returning from the function getNextWord every time, no matter what the char is.
string ReadWords::getNextWord()
{
bool pWord = false;
char c;
while((c = wordfile.get()) !=EOF)
{
if (!(isspace(c)))
{
nextword.append(1, c);
}
else return nextword;//only returns on a space
}
}
To count all words:
std::ifstream f("hamlet.txt");
std::cout << std::distance (std::istream_iterator<std::string>(f),
std::istream_iterator<std::string>()) << '\n';
To count between two words:
std::ifstream f("hamlet.txt");
std::istream_iterator<std::string> it(f), end;
int count = 0;
while (std::find(it, end, "Hello") != end)
while (++it != end && *it != "James")
++count;
std::cout << count;
Try this:
below the line
nextword.append(1, c);
add
continue;
I'm a beginner to C++, so please be understanding...
I want to search for a string (needle) within a file (haystack), by reading each line separately, then searching for the needle in that line. However, ideally for a more robust code I would like to be able to just read individual words on the line, so that if there are larger (i.e. multiple) white-space gaps betweeen words they are ignored when searching for the needle. (e.g perhaps using the >> operator??) That is, the needle string should not have to exactly match the size of the space between words in the file.
so for example, if I have a needle:
"The quick brown fox jumps over the lazy dog"
in the file this might be written (on a particular line) as:
... "The quick brown fox jumps over the lazy dog" ...
Is there an efficient way to do this?
Currently I include the necessary number of spaces in my needle string but I would like to improve the code, if possible.
My code currently looks something like the following (within a method in a class):
double var1, var2;
char skip[5];
std::fstream haystack ("filename");
std::string needle = "This is a string, and var1 =";
std::string line;
int pos;
bool found = false;
// Search for needle
while ( !found && getline (haystack,line) ) {
pos = line.find(needle); // find position of needle in current line
if (pos != std::string::npos) { // current line contains needle
std::stringstream lineStream(line);
lineStream.seekg (pos + needle.length());
lineStream >> var1;
lineStream >> skip;
lineStream >> var2;
found = true;
}
}
(Just for clarity, after finding the string (needle) I want to store the next word on that line or in some cases store the next word, then skip a word and store the following word, for example:
With a file:
... ...
... This is a string, and var1 = 111 and 777 ...
... ...
I want to extract var1 = 111; var2 = 777; )
Thanks in advance for any help!
This will work, although I think there's a shorter solution:
std::size_t myfind(std::string ins, std::string str) {
for(std::string::iterator it = ins.begin(), mi = str.begin(); it != ins.end(); ++it) {
if(*it == *mi) {
++mi;
if (mi == str.end())
return std::distance(ins.begin(),it);
}
else {
if(*it == ' ')
continue;
mi = str.begin();
}
}
return std::string::npos;
}
// use:
myfind("foo The quick brown fox jumps over the lazy dog bar", "The quick brown fox");
You can find all sequences of white space characters in the line string, and replace them with a single white space. This way you would be able to replace multiple spaces in the needle as well, and the rest of your search algorithm would continue working unchanged.
Here is a way to remove duplicates using STL:
#include <iostream>
#include <algorithm>
#include <string>
#include <iterator>
using namespace std;
struct DupSpaceDetector {
bool wasSpace;
DupSpaceDetector() : wasSpace(0) {}
bool operator()(int c) {
if (c == ' ') {
if (wasSpace) {
return 1;
} else {
wasSpace = 1;
return 0;
}
} else {
wasSpace = 0;
return 0;
}
}
};
int main() {
string source("The quick brown fox jumps over the lazy dog");
string destination;
DupSpaceDetector detector;
remove_copy_if(
source.begin()
, source.end()
, back_inserter(destination)
, detector
);
cerr << destination << endl;
return 0;
}
To solve your problem you should strip extra spaces from the needle and the haystack line. std::unique is defined to do this. Normally it is used after sorting the range, but in this case all we really want to do is remove duplicate spaces.
struct dup_space
{
bool operator()( char lhs, char rhs )
{
return std::isspace( lhs ) && std::isspace( rhs );
}
};
void despacer( const std::string& in, std::string& out )
{
out.reserve( in.size() );
std::unique_copy( in.begin(), in.end(),
std::back_insert_iterator( out ),
dup_space()
);
}
You should use it like this:
void find( const std::string& needle, std::istream haystack )
{
std::string real_needle;
despacer( needle, real_needle );
std::string line;
std::string real_line;
while( haystack.good() )
{
line.clear();
std::getline( haystack, line );
real_line.clear();
despacer( line, real_line );
auto ret = real_line.find( real_needle );
if( ret != std::string::npos )
{
// found it
// do something creative
}
}
}
#include <iostream>
#include <vector>
using namespace std;
void RevStr (char *str)
{
if(*str !=0)
{
vector<char> v1;
while((*str != ' ')&&(*str !=0))
v1.push_back(*str++);
// trying to not add space in the last word of string
if(*str !=0)
{
v1.push_back(' ');
str++;
}
RevStr(str);
cout<<*str;
}
}
int main()
{
RevStr("hello world!");
cout<<endl;
}
I want to change the order of words in the string for example " how are you" => "you are how"
I am having some problem, its not printing correctly (print only w), please help me and tell me what i did wrong. However i know that I should not call "cout<<*str;
" since i am inserting the "array of char" in stack (recurssion) but i dont know what i need to do.
C++ makes it simple:
#include <algorithm>
#include <iterator>
#include <vector>
#include <string>
#include <iostream>
#include <sstream>
std::string reverse(std::string const& text)
{
std::stringstream inStream(text);
std::stringstream outStream;
std::vector<std::string> words;
std::copy(std::istream_iterator<std::string>(inStream), std::istream_iterator<std::string>(), std::back_inserter(words));
std::copy(words.rbegin(), words.rend(), std::ostream_iterator<std::string>(outStream, " "));
return outStream.str();
}
int main()
{
std::cout << reverse("Hello World") << "\n";
}
A common approach to do this is to reverse the entire string first, then for each word, reverse the letters in the word. So no recursion is necessary. You might find it easier to give this a try (yes, I know this isn't exactly an answer to your question :) ).
Use cout << str, not cout << *str to print a string. There's an operator<< overload for char *. But maybe that's not what you're trying to do; I can't quite follow your logic, in any event.
You're losing the "hello" part.
The algorithm you seem to go for does this:
each call to RevStr isolates the first word in the string it is passed as a parameter
calls RevStr with the remaining of the string
prints the word it isolated at step 1 as the stack unwinds
Basically, you should be printing the v1 data.
I would strongly advise making using some of the functionality exposed via std::string as a place to start.
One way you might do this would look like this:
std::string ReverseString(std::string s)
{
std::stack<std::string > stack;
std::string tmpstr = "";
std::string newstr = "";
size_t strsize = s.size();
size_t pos = 0; size_t tmppos = 0;
size_t i = 0; size_t stacksize = 0;
while( pos < strsize )
{
tmppos = s.find(" ", pos, 1); // starting as pos, look for " "
if (tmppos == std::string::npos) // std::string::npos => reached end
{
tmppos = strsize; // don't forget the last item.
}
tmpstr = s.substr(pos, tmppos-pos); // split the string.
stack.push(tmpstr); // push said string onto the stack
pos = tmppos+1;
}
stacksize = stack.size();
for ( i = 0; i < stacksize; i++ )
{
tmpstr = stack.top(); // grab string from top of the stack
stack.pop(); // stacks being LIFO, we're getting
if ( i != 0 ) // everything backwards.
{
newstr.append(" "); // add preceding whitespace.
}
newstr.append(tmpstr); // append word.
}
return newstr;
}
It's by no means the best or fastest way to achieve this; there are many other ways you could do it (Jerry Coffin mentions using std::vector with an iterator, for example), but as you have the power of C++ there, to me it would make sense to use it.
I've done it this way so you could use a different delimiter if you wanted to.
In case you're interested, you can now use this with:
int main(int argc, char** argv)
{
std::string s = "In Soviet Russia String Format You";
std::string t = ReverseString(s);
std::cout << t << std::endl;
}
given that its a char*, this reverses it inplace (ie, doesn't require more memory proportional to the incoming 'str'). This avoids converting it to a std::string ( not that its a bad idea to, just because it's a char* to start with.)
void reverse_words(char* str)
{
char* last = strlen(str) + str;
char *s, *e;
std::reverse(str,last);
for(s=e=str; e != last; e++)
{
if(*e == ' ')
{
std::reverse(s,e);
s = e+1;
}
}
std::reverse(s,e);
}
void Reverse(const string& text)
{
list<string> words;
string temp;
for ( auto cur = text.begin(); cur != text.end(); ++cur)
{
if (*cur == ' ')
{
words.push_front(temp);
temp.clear();
}
else
{
temp += *cur;
}
}
if (! temp.empty())
{
words.push_front(temp);
}
for_each(words.begin(), words.end(), [](const string& word) { cout << word << " "; });
cout << endl;
}
void swap(char* c1, char* c2) {
char tmp = *c1;
*c1 = *c2;
*c2 = tmp;
}
void reverse(char* s, char* e) {
if (s == NULL || e == NULL)
return;
while(s < e)
swap(s++, e--);
}
void reverse_words(char* line) {
if (line == NULL)
return;
reverse(line, line+strlen(line)-1);
char *s = line;
char *e;
while (*s != '\0') {
e = s;
while (*e != ' ' && *e != '\0') ++e;
--e;
reverse(s,e);
s = e+2;
}
}