Find occurrences of all substrings in a given string

Find occurrences of all substrings in a given string - c++

I use a simple string function strstr to find the first occurrence of a string in some text. I used the following code to count the number of unique words in a text.
for (int i = 0; i < 24; i++)
{
if (strstr(text, ops[i]))
{
op++;
}
}
But I want to find the occurrence of all the sub strings in the program. How can I do this?

strstr() is for the C-style string, if you are really using C++, std::string and its member function would be much more convenient.
#include <string>
#include <iostream>
using namespace std;
int main()
{
string s("hello hello");
int count = 0;
size_t nPos = s.find("hello", 0); // first occurrence
while(nPos != string::npos)
{
count++;
nPos = s.find("hello", nPos + 1);
}
cout << count;
};

You can use one of the std::string find methods which would be easier (and safer), but if you really need to use strstr:
int _tmain(int argc, _TCHAR* argv[])
{
const char test[] = "this test is a test";
const char subStr[] = "test";
const char* pCurrent = strstr( test, subStr );
while( pCurrent != NULL )
{
std::cout << "found" << std::endl;
pCurrent++;
pCurrent = strstr( pCurrent, subStr );
}
return 0;
}
This just increments the point where the last sub string was found. Note that you should do the normal string length, NULL and safety checks.

Related

How to split a string by another string in Arduino?

I have a character array like below:
char array[] = "AAAA... A1... 3. B1.";
How can I split this array by the string "..." in Arduino? I have tried:
ptr = strtok(array, "...");
and the output is the following:
AAAA,
A1,
3,
B1
But I actually want output to be
AAAA,
A1,
3.B1.
How to get this output?
edit:
My full code is this:
char array[] = "AAAA... A1... 3. B1.";
char *strings[10];
char *ptr = NULL;`enter code here`
void setup()
{
Serial.begin(9600);
byte index = 0;
ptr = strtok(array, "..."); // takes a list of delimiters
while(ptr != NULL)
{
strings[index] = ptr;
index++;
ptr = strtok(NULL, "..."); // takes a list of delimiters
}
for(int n = 0; n < index; n++)
{
Serial.println(strings[n]);
}
}

The main problem is that strtok does not find a string inside another string. strtok looks for a character in a string. When you give multiple characters to strtok it looks for any of these. Consequently, writing strtok(array, "..."); is exactly the same as writing strtok(array, ".");. That is why you get a split after "3."
There are multiple ways of doing what you want. Below I'll show you an example using strstr. Unlike strtokthe strstr function do find a substring inside a string - just what you are looking for. But.. strstr is not a tokenizer so some extra code is required to print the substrings.
Something like this should do:
int main()
{
char array[] = "AAAA... A1... 3. B1...";
char* ps = array;
char* pf = strstr(ps, "..."); // Find first substring
while(pf)
{
int len = pf - ps; // Number of chars to print
printf("%.*s\n", len, ps);
ps = pf + 3;
pf = strstr(ps, "..."); // Find next substring
}
return 0;
}

You can implement your own split as strtok except the role of the second argument :
#include <stdio.h>
#include <string.h>
char * split(char *str, const char * delim)
{
static char * s;
char * p, * r;
if (str != NULL)
s = str;
p = strstr(s, delim);
if (p == NULL) {
if (*s == 0)
return NULL;
r = s;
s += strlen(s);
return r;
}
r = s;
*p = 0;
s = p + strlen(delim);
return r;
}
int main()
{
char s[] = "AAAA... A1... 3. B1.";
char * p = s;
char * t;
while ((t = split(p, "...")) != NULL) {
printf("'%s'\n", t);
p = NULL;
}
return 0;
}
Compilation and execution:
/tmp % gcc -g -pedantic -Wextra s.c
/tmp % ./a.out
'AAAA'
' A1'
' 3. B1.'
/tmp %
I print between '' to show the return spaces, because I am not sure you want them, so delim is not only ... in that case

Because you tagged this as c++, here is a c++ 'version' of your code:
#include <iostream>
using std::cout;
using std::endl;
#include <vector>
using std::vector;
#include <string>
using std::string;
class T965_t
{
string array;
vector<string> strings;
public:
T965_t() : array("AAAA... A1... 3. B1.")
{
strings.reserve(10);
}
~T965_t() = default;
int operator()() { return setup(); } // functor entry
private: // methods
int setup()
{
cout << endl;
const string pat1 ("... ");
string s1 = array; // working copy
size_t indx = s1.find(pat1, 0); // find first ... pattern
// start search at ---------^
do
{
if (string::npos == indx) // pattern not found
{
strings.push_back (s1); // capture 'remainder' of s1
break; // not found, kick out
}
// else
// extract --------vvvvvvvvvvvvvvvvv
strings.push_back (s1.substr(0, indx)); // capture
// capture to vector
indx += pat1.size(); // i.e. 4
s1.erase(0, indx); // erase previous capture
indx = s1.find(pat1, 0); // find next
} while(true);
for(uint n = 0; n < strings.size(); n++)
cout << strings[n] << "\n";
cout << endl;
return 0;
}
}; // class T965_t
int main(int , char**) { return T965_t()(); } // call functor
With output:
AAAA
A1
3. B1.
Note: I leave changing "3. B1." to "3.B1.", and adding commas at end of each line (except the last) as an exercise for the OP if required.

I looked for a split function and I didn't find one that meets my requirement, so I made one and it works for me so far, of course in the future I will make some improvements, but it got me out of trouble.
But there is also the strtok function and better use that.
https://www.delftstack.com/es/howto/arduino/arduino-strtok/
I have the split function
Arduino code:
void split(String * vecSplit, int dimArray,String content,char separator){
if(content.length()==0)
return;
content = content + separator;
int countVec = 0;
int posSep = 0;
int posInit = 0;
while(countVec<dimArray){
posSep = content.indexOf(separator,posSep);
if(posSep<0){
return;
}
countVec++;
String splitStr = content.substring(posInit,posSep);
posSep = posSep+1;
posInit = posSep;
vecSplit[countVec] = splitStr;
countVec++;
}
}
Llamada a funcion:
smsContent = "APN:4g.entel;DOMAIN:domolin.com;DELAY_GPS:60";
String vecSplit[10];
split(vecSplit,10,smsContent,';');
for(int i = 0;i<10;i++){
Serial.println(vecSplit[i]);
}
String input:
APN:4gentel;DOMAIN:domolin.com;DELAY_GPS:60
Output:
APN:4g.entel
DOMAIN:domolin.com
DELAY_GPS:60
RESET:true
enter image description here

Program to check if one string is contained cyclic in the other one

doing some exrecises for upcoming test. a bit stuck in this one.
"Write a program that asks the user for two strings and checks and prints a message if the second string is contained cyclic in the first string. The cyclic containment means that either the second string appears normally within the first string or the second string appears so that its prefix appears at the end of the first string and the continuation appears at the beginning of the first string".
You can assume that the strings contain only lowercase letters.
String functions are only allowed are : strlen, strcpy, strcmp, strcat
for example:
String A: itisaniceday
String B: sanic
Is a regular occurrence
String A: itisaniceday
String B: dayit
It's a cyclic occurence.
what I did so far:
#include <iostream>
#include <string.h>
using namespace std;
#define Max 128
int isCyclic(char* str1, char* str2);
int main()
{
char* str1 = new char[Max];
char* str2 = new char[Max];
cout << "Please enter two strings:" << endl;
cin >> str1 >> str2;
cout << isCyclic(str1, str2) << endl;
delete[] str1;
delete[] str2;
}
int isCyclic(char* str1, char* str2)
{
int s1 = strlen(str1);
int s2 = strlen(str2);
if (s1!=s2) // if string size is diffrent - they are not contained cyclic
{
return 0;
}
}

You will need two loops, first one over string 1 which is our starting point in string 1 for comparison and second one over string 2 which will be matched to string 1 in a cyclic way. If we reach the end of string 1 and still some characters are left in string 2 then cycle through string 1 starting from index 0.
#include <stdio.h>
#include <iostream>
#include <string.h>
// Should be avoided in general. Use scope resolution instead.
using namespace std;
char* isCyclic(char* s1, char* s2){
int s1_size = strlen(s1);
int s2_size = strlen(s2);
// s1 must contain s2
if(s2_size > s1_size)
return "No Occurence";
for(int i = 0; i < s1_size; i++){
int current = i;
// Boolean to track if we are currently cycling through s1
bool inCycle = false;
int j = 0;
for(; j < s2_size; j++, current++){
// character wise comparision
if(s2[j] != s1[current])
break;
if(! inCycle){
// start from first. Note that we are setting current = -1.
// as we will be incrementing it in the for loop.
if(current == s1_size - 1 && j < s2_size - 1){
current = -1;
inCycle = true;
}
}
}
if(j == s2_size){
if(inCycle)
return "cyclic";
else
return "regular";
}
}
return "No Occurence";
}
int main()
{
printf("Hello World\n");
char* s1 = "itisaniceday";
char* s2 = "dayitis";
cout<<"Occurence Type: "<<isCyclic(s1, s2)<<endl;
return 0;
}

Here is a solution for the second part of the problem (cyclic part). I simply go through all of the characters in the first string and checked if they are the beginning of a cyclic appearance of the second string.
To check that I used % (the modolu operation) if you don't know what it dose then you really need to learn it now.
Also I used bool instead of int because numbers are confusing (and cursed).
#include <iostream>
#include <string.h>
using namespace std;
#define Max 128
bool isCyclic(char* str1, char* str2);
bool isCyclic(char* str1, char* str2,int start);
int main()
{
char* str1 = new char[Max];
char* str2 = new char[Max];
cout << "Please enter two strings:" << endl;
cin >> str1 >> str2;
cout << isCyclic(str1, str2) << endl;
delete[] str1;
delete[] str2;
}
bool isCyclic(char* str1, char* str2) {
for(int i = 0; i < strlen(str1); i++) {
if(str1[i] == str2[0] && isCyclic(str1,str2,i)) {
return true;
}
}
return false;
}
bool isCyclic(char* str1, char* str2,int start)
{
int containingStrLen = strlen(str1);
for(int i = 0; i < strlen(str2); i++) {
if(str1[(start + i)%containingStrLen] != str2[i]) {
return false;
}
}
return true;
}
There are some things missing in this code still:
1) The first part of the problem (it can easily be derived from this code).
2) Some size validation such as making sure that str1 is bigger then str2 before using is cyclic. And that the strings are smaller then Max (I assume).
3) A proper result print.
Good luck in your exam :)

There's a simple trick : if you duplicate the string's prefix at its own end, the problem becomes a straight substring search as a cyclic match would be recomposed at the end. It also handles the corner case where the substring loops back on itself, such as "looploop" inside of "loop".
So here's how you'd do it in broken C-ish dialect:
bool containsCyclic(char const *string, char const *substring) {
std::size_t const stringLen = std::strlen(string);
std::size_t const substringLen = std::strlen(substring);
// Too long a substring wouldn't fit in the string
if(substringLen > 2 * stringLen)
return false;
// Concatenate `string` with its own substring-long prefix
char *const loopedString = new char[stringLen + substringLen + 1];
std::strcpy(loopedString, string);
{ // Partial reimplementation of std::strncpy(loopedString, string, substringLen)
char const *src = string;
char *dest = loopedString + stringLen;
for(std::size_t count = 0; count < substringLen; ++count)
*dest++ = *src++;
*dest = '\0';
}
{ // Partial and naïve reimplementation of std::strstr(loopedString, substring)
for(char const *start = loopedString; start < loopedString + stringLen; ++start) {
// Check if substring is present at this offset
char const *s1 = start;
char const *s2 = substring;
while(*s2 != '\0' && *s1 == *s2)
++s1, ++s2;
if(*s2 == '\0') {
// We found a complete match of substring inside loopedString
delete[] loopedString;
return true;
}
}
}
// No match found
delete[] loopedString;
return false;
}
And just for kicks, here it is in C++:
bool containsCyclicCpp(std::string const &string, std::string const &substring) {
std::string const loopedString = string + string.substr(0, substring.size());
return loopedString.find(substring) != std::string::npos;
}
See it live on Coliru (with tests!)

How to find substring from string?

How do I find a substring from the string path "/user/desktop/abc/post/" using C/C++? I want to check if folder "abc" is present or not in that path.
Path is character pointer char *ptr = "/user/desktop/abc/post/";

Use std::string and find.
std::string str = "/user/desktop/abc/post/";
bool exists = str.find("/abc/") != std::string::npos;

In C, use the strstr() standard library function:
const char *str = "/user/desktop/abc/post/";
const int exists = strstr(str, "/abc/") != NULL;
Take care to not accidentally find a too-short substring (this is what the starting and ending slashes are for).

Example using std::string find method:
#include <iostream>
#include <string>
int main (){
std::string str ("There are two needles in this haystack with needles.");
std::string str2 ("needle");
size_t found = str.find(str2);
if(found!=std::string::npos){
std::cout << "first 'needle' found at: " << found << '\n';
}
return 0;
}
Result:
first 'needle' found at: 14.

Use strstr(const char *s , const char *t)
and include<string.h>
You can write your own function which behaves same as strstr and you can modify according to your requirement also
char * str_str(const char *s, const char *t)
{
int i, j, k;
for (i = 0; s[i] != '\0'; i++)
{
for (j=i, k=0; t[k]!='\0' && s[j]==t[k]; j++, k++);
if (k > 0 && t[k] == '\0')
return (&s[i]);
}
return NULL;
}

As user1511510 has identified, there's an unusual case when abc is at the end of the file name. We need to look for either /abc/ or /abc followed by a string-terminator '\0'. A naive way to do this would be to check if either /abc/ or /abc\0 are substrings:
#include <stdio.h>
#include <string.h>
int main() {
const char *str = "/user/desktop/abc";
const int exists = strstr(str, "/abc/") || strstr(str, "/abc\0");
printf("%d\n",exists);
return 0;
}
but exists will be 1 even if abc is not followed by a null-terminator. This is because the string literal "/abc\0" is equivalent to "/abc". A better approach is to test if /abc is a substring, and then see if the character after this substring (indexed using the pointer returned by strstr()) is either a / or a '\0':
#include <stdio.h>
#include <string.h>
int main() {
const char *str = "/user/desktop/abc", *substr;
const int exists = (substr = strstr(str, "/abc")) && (substr[4] == '\0' || substr[4] == '/');
printf("%d\n",exists);
return 0;
}
This should work in all cases.

If you are utilizing arrays too much then you should include cstring.h because it has too many functions including finding substrings.

Reversing a string, weird output c++

Okay, so I'm trying to reverse a C style string in C++ , and I'm coming upon some weird output. Perhaps someone can shed some light?
Here is my code:
int main(){
char str[] = "string";
int strSize = sizeof(str)/sizeof(char);
char str2[strSize];
int n = strSize-1;
int i =0;
while (&str+n >= &str){
str2[i] = *(str+n);
n--;
i++;
}
int str2size = sizeof(str)/sizeof(char);
int x;
for(x=0;x<str2size;x++){
cout << str2[x];
}
}
The basic idea here is just making a pointer point to the end of the string, and then reading it in backwards into a new array using pointer arithmetic.
In this particular case, I get an output of: " gnirts"
There is an annoying space at the beginning of any output which I'm assuming is the null character? But when I try to get rid of it by decrementing the strSize variable to exclude it, I end up with some other character on the opposite end of the string probably from another memory block.
Any ideas on how to avoid this? PS: (would you guys consider this a good idea of reversing a string?)

A valid string should be terminated by a null character. So you need to keep the null character in its original position (at the end of the string) and only reverse the non-null characters. So you would have something like this:
str2[strSize - 1] = str[strSize - 1]; // Copy the null at the end of the string
int n = strSize - 2; // Start from the penultimate character

There is an algorithm in the Standard Library to reverse a sequence. Why reinvent the wheel?
#include <algorithm>
#include <cstring>
#include <iostream>
int main()
{
char str[] = "string";
std::reverse(str, str + strlen(str)); // use the Standard Library
std::cout << str << '\n';
}

#ildjarn and #Blastfurnace have already given good ideas, but I think I'd take it a step further and use the iterators to construct the reversed string:
std::string input("string");
std::string reversed(input.rbegin(), input.rend());
std::cout << reversed;

I would let the C++ standard library do more of the work...
#include <cstddef>
#include <algorithm>
#include <iterator>
#include <iostream>
int main()
{
typedef std::reverse_iterator<char const*> riter_t;
char const str[] = "string";
std::size_t const strSize = sizeof(str);
char str2[strSize] = { };
std::copy(riter_t(str + strSize - 1), riter_t(str), str2);
std::cout << str2 << '\n';
}

while (&str+n >= &str){
This is nonsense, you want simply
while (n >= 0) {
and
str2[i] = *(str+n);
should be the much more readable
str2[i] = str[n];

Your while loop condition (&str+n >= &str) is equivalent to (n >= 0).
Your *(str+n) is equivalent to str[n] and I prefer the latter.
As HappyPixel said, your should start n at strSize-2, so the first character copied will be the last actual character of str, not the null termination character of str.
Then after you have copied all the regular characters in the loop, you need to add a null termination character at the end of the str2 using str2[strSize-1] = 0;.
Here is fixed, working code that outputs "gnirts":
#include <iostream>
using namespace std;
int main(int argc, char **argv){
char str[] = "string";
int strSize = sizeof(str)/sizeof(char);
char str2[strSize];
int n = strSize-2; // Start at last non-null character
int i = 0;
while (n >= 0){
str2[i] = str[n];
n--;
i++;
}
str2[strSize-1] = 0; // Add the null terminator.
int str2size = sizeof(str)/sizeof(char);
int x;
cout << str2;
}

Count character occurrences in a string in C++

How can I count the number of "_" in a string like "bla_bla_blabla_bla"?

#include <algorithm>
std::string s = "a_b_c";
std::string::difference_type n = std::count(s.begin(), s.end(), '_');

Pseudocode:
count = 0
For each character c in string s
Check if c equals '_'
If yes, increase count
EDIT: C++ example code:
int count_underscores(string s) {
int count = 0;
for (int i = 0; i < s.size(); i++)
if (s[i] == '_') count++;
return count;
}
Note that this is code to use together with std::string, if you're using char*, replace s.size() with strlen(s).
Also note: I can understand you want something "as small as possible", but I'd suggest you to use this solution instead. As you see you can use a function to encapsulate the code for you so you won't have to write out the for loop everytime, but can just use count_underscores("my_string_") in the rest of your code. Using advanced C++ algorithms is certainly possible here, but I think it's overkill.

Old-fashioned solution with appropriately named variables. This gives the code some spirit.
#include <cstdio>
int _(char*__){int ___=0;while(*__)___='_'==*__++?___+1:___;return ___;}int main(){char*__="_la_blba_bla__bla___";printf("The string \"%s\" contains %d _ characters\n",__,_(__));}
Edit: about 8 years later, looking at this answer I'm ashamed I did this (even though I justified it to myself as a snarky poke at a low-effort question). This is toxic and not OK. I'm not removing the post; I'm adding this apology to help shifting the atmosphere on StackOverflow. So OP: I apologize and I hope you got your homework right despite my trolling and that answers like mine did not discourage you from participating on the site.

Using the lambda function to check the character is "_" then the only count will be incremented else not a valid character
std::string s = "a_b_c";
size_t count = std::count_if( s.begin(), s.end(), []( char c ){return c =='_';});
std::cout << "The count of numbers: " << count << std::endl;

#include <boost/range/algorithm/count.hpp>
std::string str = "a_b_c";
int cnt = boost::count(str, '_');

You name it... Lambda version... :)
using namespace boost::lambda;
std::string s = "a_b_c";
std::cout << std::count_if (s.begin(), s.end(), _1 == '_') << std::endl;
You need several includes... I leave you that as an exercise...

Count character occurrences in a string is easy:
#include <bits/stdc++.h>
using namespace std;
int main()
{
string s="Sakib Hossain";
int cou=count(s.begin(),s.end(),'a');
cout<<cou;
}

There are several methods of std::string for searching, but find is probably what you're looking for. If you mean a C-style string, then the equivalent is strchr. However, in either case, you can also use a for loop and check each character—the loop is essentially what these two wrap up.
Once you know how to find the next character given a starting position, you continually advance your search (i.e. use a loop), counting as you go.

I would have done it this way :
#include <iostream>
#include <string>
using namespace std;
int main()
{
int count = 0;
string s("Hello_world");
for (int i = 0; i < s.size(); i++)
{
if (s.at(i) == '_')
count++;
}
cout << endl << count;
cin.ignore();
return 0;
}

You can find out occurrence of '_' in source string by using string functions.
find() function takes 2 arguments , first - string whose occurrences we want to find out and second argument takes starting position.While loop is use to find out occurrence till the end of source string.
example:
string str2 = "_";
string strData = "bla_bla_blabla_bla_";
size_t pos = 0,pos2;
while ((pos = strData.find(str2, pos)) < strData.length())
{
printf("\n%d", pos);
pos += str2.length();
}

The range based for loop comes in handy
int countUnderScores(string str)
{
int count = 0;
for (char c: str)
if (c == '_') count++;
return count;
}
int main()
{
string str = "bla_bla_blabla_bla";
int count = countUnderScores(str);
cout << count << endl;
}

I would have done something like that :)
const char* str = "bla_bla_blabla_bla";
char* p = str;
unsigned int count = 0;
while (*p != '\0')
if (*p++ == '_')
count++;

Try
#include <iostream>
#include <string>
using namespace std;
int WordOccurrenceCount( std::string const & str, std::string const & word )
{
int count(0);
std::string::size_type word_pos( 0 );
while ( word_pos!=std::string::npos )
{
word_pos = str.find(word, word_pos );
if ( word_pos != std::string::npos )
{
++count;
// start next search after this word
word_pos += word.length();
}
}
return count;
}
int main()
{
string sting1="theeee peeeearl is in theeee riveeeer";
string word1="e";
cout<<word1<<" occurs "<<WordOccurrenceCount(sting1,word1)<<" times in ["<<sting1 <<"] \n\n";
return 0;
}

public static void main(String[] args) {
char[] array = "aabsbdcbdgratsbdbcfdgs".toCharArray();
char[][] countArr = new char[array.length][2];
int lastIndex = 0;
for (char c : array) {
int foundIndex = -1;
for (int i = 0; i < lastIndex; i++) {
if (countArr[i][0] == c) {
foundIndex = i;
break;
}
}
if (foundIndex >= 0) {
int a = countArr[foundIndex][1];
countArr[foundIndex][1] = (char) ++a;
} else {
countArr[lastIndex][0] = c;
countArr[lastIndex][1] = '1';
lastIndex++;
}
}
for (int i = 0; i < lastIndex; i++) {
System.out.println(countArr[i][0] + " " + countArr[i][1]);
}
}

We Keep Coding

c++ django amazon-web-services regex python-2.7 google-cloud-platform list unit-testing opengl ember.js

Find occurrences of all substrings in a given string - c++

Related

How to split a string by another string in Arduino?

Program to check if one string is contained cyclic in the other one

How to find substring from string?

Reversing a string, weird output c++

Count character occurrences in a string in C++

Categories

Resources