Get a substring using two char* positionned in a C string - c++

Let long_text, keyword1 and keyword2 be three char* pointers. _keyword1_ and _keyword2_ being two substrings of long_text. Using strstr(long_text, keyword1) I can get a char* which points to the first occurrence of keyword1 in long_text, and using strstr(long_text, keyword2) I can get a char* which points to the first occurrence of keyword2 in long_text. keyword1 and keyword2 do not overlap.
Is there a way to extract a substring from long_text representing the string between keyword1 and keyword2 using the two char* obtained from strstr()?
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
int main(){
char* long_text = "this is keyword1 and this is keyword2 in long_text";
char* keyword1 = "keyword1";
char* keyword2 = "keyword2";
char* k1_start = strstr(long_text, keyword1);
char* k2_start = strstr(long_text, keyword2);
// TODO Be able to print " and this is "
return 0;
}

This is the part you are missing
// Move k1_start to end of keyword1
k1_start += strlen(keyword1);
// Copy text from k1_start to k2_start
char sub_string[32];
int len = k2_start - k1_start;
strncpy(sub_string, k1_start, len);
// Be sure to terminate the string
sub_string[len] = '\0';

Yeah..
This is C-like and uses char * and supporting char array.
#include <stdio.h>
#include <string.h>
int main(void) {
char* long_text = "key1(text)key2";
char* keyword1 = "key1";
char* keyword2 = "key2";
char* k1 = strstr(long_text, keyword1);
char* k2 = strstr(long_text, keyword2);
// from first char of match up to first char of second match
char text[strlen(k1) - strlen(k2)];
int len = (int)strlen(k1);
for (int i = 0;; i++) {
text[i] = *k1; k1++;
if (i == (len - strlen(k2))) {
text[len - strlen(k2)] = '\0';
break;
}
}
char* res;
//We have now only keyword1 + middle part, compare until diff.,
//then remember position and just iterate from to it later.
int j = 0;
for (int i = 0;; i++) {
if (*keyword1 == text[i]) {
j = i;
keyword1++;
} else {
res = &text[++j];
break;
}
}
printf("%s\n", res);
return 0;
}

void look_for_middle(const char *haystack,
const char *needle1, const char *needle2) {
const char *start; /* start of region between keywords */
const char *end; /* end of region between keywords */
const char *pos1; /* match needle1 within haystack */
const char *pos2; /* match needle2 within haystack */
int length; /* length of region between needles */
/* Look for needles in haystack. */
pos1 = strstr(haystack, needle1);
pos2 = strstr(haystack, needle2);
if (pos1 != NULL && pos2 != NULL) {
/* Both needles were found. */
if (pos1 < pos2) {
/* needle1 appears before needle2 */
start = pos1 + strlen(needle1);
end = pos2;
} else {
/* needle2 appears before needle1 */
start = pos2 + strlen(needle2);
end = pos1;
}
length = end - start;
} else {
/* One or more needles were not found. */
start = NULL;
end = NULL;
length = 0;
}
/* Report result. */
if (start != NULL) {
/* Both needles were found. */
if (length < 0) {
printf("Needles overlap\n");
} else {
/*
* In this printf, the "precision" of the string
* is set so that it only prints the portion between
* the needles.
*/
printf("Middle pos %d len %d: %.*s\n",
(int)(start - haystack), length, length, start);
}
} else {
if (pos1 == NULL) {
printf("Needle 1 not found\n");
}
if (pos2 == NULL) {
printf("Needle 2 not found\n");
}
}
}

This function can do your job . .
char* strInner(char *long_text , char *keyword1 , char* keyword2)
{
char * a = strstr(long_text,keyword1);
a+= strlen(keyword1);
if(a==NULL)
{
cout<<"Keyword1 didn't found ! ";
return a ;
}
char * b = strstr(a,keyword2);
if(b==NULL)
{
cout<<"Keyword2 didn't found Or, found before Keyword1 ! ";
return b ;
}
char *inner = (char*)malloc(strlen(a)-strlen(b));
memcpy(inner,a,strlen(a)-strlen(b) );
return inner ;
}

Related

How do I tokenize a string only using <iostream>?

I want to learn how to tokenize a string, like the strtok function only using <iostream>.
I made a program that deletes the spaces but I don't thinks its the same as strtok.
#include <iostream>
int main(){
int i = 0;
char s[100]="fix the car";
while(s[i] != '\0'){
if(s[i] == ' ')
s[i] = s[i-1];
else std::cout << s[i];
i++;
}
return 0;
}
prints: fixthecar
I want the whole strtok function, not just deleting delimiters, heard I have to use pointers, but I don't know how to code it.
The internal implementation of strtok has already been discussed here, you should check that before opening new questions.
The key to the operation of strtok() is preserving the location of the last seperator between seccessive calls (that's why strtok() continues to parse the very original string that is passed to it when it is invoked with a null pointer in successive calls)..
Have a look at this strtok() implementation which has a slightly different functionality than the one provided by strtok()
char *zStrtok(char *str, const char *delim) {
static char *static_str=0; /* var to store last address */
int index=0, strlength=0; /* integers for indexes */
int found = 0; /* check if delim is found */
/* delimiter cannot be NULL
* if no more char left, return NULL as well
*/
if (delim==0 || (str == 0 && static_str == 0))
return 0;
if (str == 0)
str = static_str;
/* get length of string */
while(str[strlength])
strlength++;
/* find the first occurance of delim */
for (index=0;index<strlength;index++)
if (str[index]==delim[0]) {
found=1;
break;
}
/* if delim is not contained in str, return str */
if (!found) {
static_str = 0;
return str;
}
/* check for consecutive delimiters
*if first char is delim, return delim
*/
if (str[0]==delim[0]) {
static_str = (str + 1);
return (char *)delim;
}
/* terminate the string
* this assignmetn requires char[], so str has to
* be char[] rather than *char
*/
str[index] = '\0';
/* save the rest of the string */
if ((str + index + 1)!=0)
static_str = (str + index + 1);
else
static_str = 0;
return str;
}

How to split a string by another string in Arduino?

I have a character array like below:
char array[] = "AAAA... A1... 3. B1.";
How can I split this array by the string "..." in Arduino? I have tried:
ptr = strtok(array, "...");
and the output is the following:
AAAA,
A1,
3,
B1
But I actually want output to be
AAAA,
A1,
3.B1.
How to get this output?
edit:
My full code is this:
char array[] = "AAAA... A1... 3. B1.";
char *strings[10];
char *ptr = NULL;`enter code here`
void setup()
{
Serial.begin(9600);
byte index = 0;
ptr = strtok(array, "..."); // takes a list of delimiters
while(ptr != NULL)
{
strings[index] = ptr;
index++;
ptr = strtok(NULL, "..."); // takes a list of delimiters
}
for(int n = 0; n < index; n++)
{
Serial.println(strings[n]);
}
}
The main problem is that strtok does not find a string inside another string. strtok looks for a character in a string. When you give multiple characters to strtok it looks for any of these. Consequently, writing strtok(array, "..."); is exactly the same as writing strtok(array, ".");. That is why you get a split after "3."
There are multiple ways of doing what you want. Below I'll show you an example using strstr. Unlike strtokthe strstr function do find a substring inside a string - just what you are looking for. But.. strstr is not a tokenizer so some extra code is required to print the substrings.
Something like this should do:
int main()
{
char array[] = "AAAA... A1... 3. B1...";
char* ps = array;
char* pf = strstr(ps, "..."); // Find first substring
while(pf)
{
int len = pf - ps; // Number of chars to print
printf("%.*s\n", len, ps);
ps = pf + 3;
pf = strstr(ps, "..."); // Find next substring
}
return 0;
}
You can implement your own split as strtok except the role of the second argument :
#include <stdio.h>
#include <string.h>
char * split(char *str, const char * delim)
{
static char * s;
char * p, * r;
if (str != NULL)
s = str;
p = strstr(s, delim);
if (p == NULL) {
if (*s == 0)
return NULL;
r = s;
s += strlen(s);
return r;
}
r = s;
*p = 0;
s = p + strlen(delim);
return r;
}
int main()
{
char s[] = "AAAA... A1... 3. B1.";
char * p = s;
char * t;
while ((t = split(p, "...")) != NULL) {
printf("'%s'\n", t);
p = NULL;
}
return 0;
}
Compilation and execution:
/tmp % gcc -g -pedantic -Wextra s.c
/tmp % ./a.out
'AAAA'
' A1'
' 3. B1.'
/tmp %
I print between '' to show the return spaces, because I am not sure you want them, so delim is not only ... in that case
Because you tagged this as c++, here is a c++ 'version' of your code:
#include <iostream>
using std::cout;
using std::endl;
#include <vector>
using std::vector;
#include <string>
using std::string;
class T965_t
{
string array;
vector<string> strings;
public:
T965_t() : array("AAAA... A1... 3. B1.")
{
strings.reserve(10);
}
~T965_t() = default;
int operator()() { return setup(); } // functor entry
private: // methods
int setup()
{
cout << endl;
const string pat1 ("... ");
string s1 = array; // working copy
size_t indx = s1.find(pat1, 0); // find first ... pattern
// start search at ---------^
do
{
if (string::npos == indx) // pattern not found
{
strings.push_back (s1); // capture 'remainder' of s1
break; // not found, kick out
}
// else
// extract --------vvvvvvvvvvvvvvvvv
strings.push_back (s1.substr(0, indx)); // capture
// capture to vector
indx += pat1.size(); // i.e. 4
s1.erase(0, indx); // erase previous capture
indx = s1.find(pat1, 0); // find next
} while(true);
for(uint n = 0; n < strings.size(); n++)
cout << strings[n] << "\n";
cout << endl;
return 0;
}
}; // class T965_t
int main(int , char**) { return T965_t()(); } // call functor
With output:
AAAA
A1
3. B1.
Note: I leave changing "3. B1." to "3.B1.", and adding commas at end of each line (except the last) as an exercise for the OP if required.
I looked for a split function and I didn't find one that meets my requirement, so I made one and it works for me so far, of course in the future I will make some improvements, but it got me out of trouble.
But there is also the strtok function and better use that.
https://www.delftstack.com/es/howto/arduino/arduino-strtok/
I have the split function
Arduino code:
void split(String * vecSplit, int dimArray,String content,char separator){
if(content.length()==0)
return;
content = content + separator;
int countVec = 0;
int posSep = 0;
int posInit = 0;
while(countVec<dimArray){
posSep = content.indexOf(separator,posSep);
if(posSep<0){
return;
}
countVec++;
String splitStr = content.substring(posInit,posSep);
posSep = posSep+1;
posInit = posSep;
vecSplit[countVec] = splitStr;
countVec++;
}
}
Llamada a funcion:
smsContent = "APN:4g.entel;DOMAIN:domolin.com;DELAY_GPS:60";
String vecSplit[10];
split(vecSplit,10,smsContent,';');
for(int i = 0;i<10;i++){
Serial.println(vecSplit[i]);
}
String input:
APN:4gentel;DOMAIN:domolin.com;DELAY_GPS:60
Output:
APN:4g.entel
DOMAIN:domolin.com
DELAY_GPS:60
RESET:true
enter image description here

How to output each word in a char array backwards

I have to make a function which reverses words that have been written in an char array.
char reverse(char m[50]) {
for (int i = 0; i <= m['\0']; i++) {
for (int j = m['\0']-1; j >= m[0]; j--) {
m[i] = m[j];
}
}
}
This is the code I have in mind which would output something like this:
Input: I am new
Output: wen ma I
What I need is:
Input: I am new
Output: I ma wen
Hope you understood what I meant here as I am quite new to programming and really need help with this.
If you want a c++ solution then the following should work:
#include <iostream>
#include <string>
#include <algorithm>
int main()
{
std::string str = "I am new";
auto begin = str.begin();
while (begin != str.end())
{
auto end = std::find(begin, str.end(), ' ');
std::reverse(begin, end);
if (end == str.end())
{
break;
}
begin = end + 1;
}
std::cout << str << "\n";
}
Since not only a space may delimiter words also other whitespaces should be considered.
#include <iostream>
bool IsWhiteSpaceOrEnd(char c) {
switch (c) {
case ' ':
case '\t':
case '\r':
case '\n':
case 0:
return true;
}
return false;
}
void ReverseWord(char* begin, char* end) {
--end;
while (begin < end) {
char temp = *begin;
*begin = *end;
*end = temp;
++begin;
--end;
}
}
void ReverseEachWord(char* str) {
char* begin = str;
while (0 != *begin) {
char* end = begin + 1;
// find next end of word
while (!IsWhiteSpaceOrEnd(*end)) {
++end;
}
// reverse the word between begin and end
ReverseWord(begin, end);
begin = end;
// go forward to the next begin of a word
while ((0 != *begin) && IsWhiteSpaceOrEnd(*begin)) {
++begin;
}
}
}
int main(int argc, char** argv)
{
char* text = strdup("I am new");
ReverseEachWord(text);
return 0;
}
I'd convert the char array input and output to strings, at least for processing. You can convert the result back to char array if you really need that.
string ReverseText(const string& text)
{
string word(""), result("");
for (const auto& character : text)
{
if (!isspace(character))
word += character; // Build the next word to reverse
else
{
std::reverse(word.begin(), word.end()); // Reverse the completed word
result += word + character; // Add your processed word to the result
word = ""; // Clear word variable for next iteration
}
}
std::reverse(word.begin(), word.end()); // Don't forget the last word built
return result + word;
}

Value type const char cannot be used to initialize an entity of type char*

I am having the following problem with my code, though it compiles correctly:
value type const char cannot be used to initialize an entity of type char*
Can someone help me? I can run the code which is weird but I can't create a makefile using this. It's very weird to me.
int SpliString(struct dict_word *entry, const char *str)
{
long word_length,j,k;
int yearIndex;
char *buffer;
char *endOfYears;
char *endOfYear;
char *endOfDefinition;
char *endOfWord = strstr(str, "_#_");
//Sets the first num bytes of the block of memory pointed by ptr
//to the specified value (related as an unsigned char)
memset(entry, 0, sizeof(struct dict_word));
// If '_#_' is not found, it's NULL
if (endOfWord)
{
// Calculating word legth; 'str' points to start of word, 'endofWord' points to '_#_' that is just after word
word_length = endOfWord - str;
// Copying data into the word
strncpy(entry->words, str, word_length);
// 'endOfYears' points to '_#_,' but wee need to find follow '_#_'
// therefore there is added 3 in order to skip curremnt '_#_
endOfYears = strstr(endOfWord+3, "_#_");
if (endOfYears)
{
word_length = endOfYears - (endOfWord+3);
// Skips _#_
buffer = endOfWord+3;
yearIndex = 0;
j = 0;
// Finds next year in the line, it stops if all 10 years is filled
// or end of years string is reached
while(yearIndex<10 && buffer+j<endOfYears)
{
// Stores year in the buffer, with converting 'stirng' to 'int'
entry->year[yearIndex] = atoi(buffer+j);
// check year for negative...
if (entry->year[yearIndex]<=0)
return 0;
// Locating substring; 'j' is current offset from beginning of buffer
endOfYear = strchr(buffer+j, '_');
if (endOfYear)
{
j = endOfYear - buffer;
j++;
yearIndex++;
}
else
{
break;
}
}
//endOfYears points to '_#_' that separatates 'years' and 'definition'
//and there is needed to find '_#_' between 'definition' and 'synonyms'
//therefore it skips '_#_' that separatates 'years' and 'definition',
//+3, because '_#_' has length = 3
endOfDefinition = strstr(endOfYears+3, "_#_");
if (endOfDefinition)
{
word_length = endOfDefinition - (endOfYears+3);
k = 0;
for(j=0; j<word_length; j++)
{
// Skips '_#_'
if (endOfYears[j+3]==',')
{
entry->eng_synonyms[k] = ' ';
k++;
}
else if (endOfYears[j+3]>='a' && endOfYears[j+3]<='z')
{
entry->eng_synonyms[k] = endOfYears[j+3];
k++;
}
else if (endOfYears[j+3]!='_')
{
return 0;
}
}
k = 0;
word_length = (str+strlen(str)) - (endOfDefinition+3);
for(j=0; j<word_length; j++)
{
if (endOfDefinition[j+3]==',')
{
entry->heb_synonyms[k] = ' ';
k++;
}
else if (endOfDefinition[j+3]>='A' && endOfDefinition[j+3]<='Z')
{
entry->heb_synonyms[k] = endOfDefinition[j+3];
k++;
}
else if (endOfDefinition[j+3]!='_')
{
return 0;
}
}
}
// Check for legality
// Check all symbols of 'entry->words'
// calculate length and supress warning
for(j=0;j<(int)strlen(entry->words);j++)
{
if (entry->words[j]<'a' || entry->words[j]>'z')
return 0;
}
return 1;
}
}
return 0;
}
Use
const char *buffer;
const char *endOfWord = strstr(str, "_#_");
Confident OP is compiling in C++.
// C
char *strstr(const char *s1, const char *s2);
// C++
const char* strstr(const char* s1, const char* s2);
char* strstr( char* s1, const char* s2);
See
Compile C app with Visual Studio 2012
How to compile and execute C program on Visual Studio 2012 for Windows 8?

How can I decode HTML entities in C++? [duplicate]

I'm interested in unescaping text for example: \ maps to \ in C. Does anyone know of a good library?
As reference the Wikipedia List of XML and HTML Character Entity References.
For another open source reference in C to decoding these HTML entities you can check out the command line utility uni2ascii/ascii2uni. The relevant files are enttbl.{c,h} for entity lookup and putu8.c which down converts from UTF32 to UTF8.
uni2ascii
I wrote my own unescape code; very simplified, but does the job: pn_util.c
Function Description: Convert special HTML entities back to characters.
Need to do some modifications to fit your requirement.
char* HtmlSpecialChars_Decode(char* encodedHtmlSpecialEntities)
{
int encodedLen = 0;
int escapeArrayLen = 0;
static char decodedHtmlSpecialChars[TITLE_SIZE];
char innerHtmlSpecialEntities[MAX_CONFIG_ITEM_SIZE];
/* This mapping table can be extended if necessary. */
static const struct {
const char* encodedEntity;
const char decodedChar;
} entityToChars[] = {
{"<", '<'},
{">", '>'},
{"&", '&'},
{""", '"'},
{"'", '\''},
};
if(strchr(encodedHtmlSpecialEntities, '&') == NULL)
return encodedHtmlSpecialEntities;
memset(decodedHtmlSpecialChars, '\0', TITLE_SIZE);
memset(innerHtmlSpecialEntities, '\0', MAX_CONFIG_ITEM_SIZE);
escapeArrayLen = sizeof(entityToChars) / sizeof(entityToChars[0]);
strcpy(innerHtmlSpecialEntities, encodedHtmlSpecialEntities);
encodedLen = strlen(innerHtmlSpecialEntities);
for(int i = 0; i < encodedLen; i++)
{
if(innerHtmlSpecialEntities[i] == '&')
{
/* Potential encode char. */
char * tempEntities = innerHtmlSpecialEntities + i;
for(int j = 0; j < escapeArrayLen; j++)
{
if(strncmp(tempEntities, entityToChars[j].encodedEntity, strlen(entityToChars[j].encodedEntity)) == 0)
{
int index = 0;
strncat(decodedHtmlSpecialChars, innerHtmlSpecialEntities, i);
index = strlen(decodedHtmlSpecialChars);
decodedHtmlSpecialChars[index] = entityToChars[j].decodedChar;
if(strlen(tempEntities) > strlen(entityToChars[j].encodedEntity))
{
/* Not to the end, continue */
char temp[MAX_CONFIG_ITEM_SIZE] = {'\0'};
strcpy(temp, tempEntities + strlen(entityToChars[j].encodedEntity));
memset(innerHtmlSpecialEntities, '\0', MAX_CONFIG_ITEM_SIZE);
strcpy(innerHtmlSpecialEntities, temp);
encodedLen = strlen(innerHtmlSpecialEntities);
i = -1;
}
else
encodedLen = 0;
break;
}
}
}
}
if(encodedLen != 0)
strcat(decodedHtmlSpecialChars, innerHtmlSpecialEntities);
return decodedHtmlSpecialChars;
}
QString UNESC(const QString &txt) {
QStringList bld;
static QChar AMP = '&', SCL = ';';
static QMap<QString, QString> dec = {
{"<", "<"}, {">", ">"}
, {"&", "&"}, {""", R"(")"}, {"'", "'"} };
if(!txt.contains(AMP)) { return txt; }
int bgn = 0, pos = 0;
while((pos = txt.indexOf(AMP, pos)) != -1) {
int end = txt.indexOf(SCL, pos)+1;
QString val = dec[txt.mid(pos, end - pos)];
bld << txt.mid(bgn, pos - bgn);
if(val.isEmpty()) {
end = txt.indexOf(AMP, pos+1);
bld << txt.mid(pos, end - pos);
} else {
bld << val;
}// else // if(val.isEmpty())
bgn = end; pos = end;
}// while((pos = txt.indexOf(AMP, pos)) != -1)
return bld.join(QString());
}// UNESC