Remove substring that contains certain pattern and ends with specific char - c++

For example I have a string like following:
"VISITOR_INFO1_LIVE=USv90B-7CzI; LOGIN_INFO=e486e37a395be3f0e3b3237d090a6829c1oAAAB7IjQiOiAiREVMRUdBVEVEIiwgIjciOiAwLCAiMSI6IDEsICIzIjogMjAxMTk0MTMwNiwgIjgiOiA2MDgwMTg0NTEzNjQsICIxMCI6IDIzOTYyMTEyODczNH0=; PREF=f5=30;HSID=AHuJQBOVR0lQoRt_3; APISID=QaParXGsQcEPCzKg/A1smCfYrfMjxvfEPT; YSC=Vm3Amq5loFM";
I want to remove all the patterns that contains *SID (HSID,APISID here) upto ';'. I also want to remove substring "LOGIN_INFO= ....;"
So, output string should be "VISITOR_INFO1_LIVE=L80EDuHCEF8; PREF=f5=30";
Following is the solution I have come up with but I think performance can be improved:
const char *str ="VISITOR_INFO1_LIVE=USv90B-7CzI; LOGIN_INFO=e486e37a395be3f0e3b3237d090a6829c1oAAAB7IjQiOiAiREVMRUdBVEVEIiwgIjciOiAwLCAiMSI6IDEsICIzIjogMjAxMTk0MTMwNiwgIjgiOiA2MDgwMTg0NTEzNjQsICIxMCI6IDIzOTYyMTEyODczNH0=; PREF=f5=30;HSID=AHuJQBOVR0lQoRt_3; APISID=QaParXGsQcEPCzKg/A1smCfYrfMjxvfEPT; YSC=Vm3Amq5loFM";
char *Cookie = NULL;
cout << "original string is:\n" << str << "\n";
int len = strlen(str)+1;
cout << "length of original string is : " << len << "\n";
Cookie = new char[strlen(str)];
strncpy(Cookie,str,len);
char *p1 = strstr(Cookie,"LOGIN_INFO");
char *p2 = NULL;
if(p1){
p2 = strstr(p1,";")+1;
while(*p2 == ' ') p2++;
}
if(p1 && p2)
memmove(p1,p2,strlen(p2)+1);
char *ID = strstr(Cookie,"SID");
while( ID != NULL){
char *start_pos = NULL, *end_pos = NULL;
while((*ID != ';') && (*ID != Cookie[0]) && (*ID != ' ')){
--ID;
}
if(*ID == Cookie[0]) start_pos = ID;
else start_pos = ID+1;
end_pos = strstr(start_pos,";")+1;
while(*end_pos == ' ')
end_pos++;
memmove(start_pos,end_pos,strlen(end_pos)+1);
// }
/*else
std::cout << "does not find substr " << "\n";*/
// cout << "modified string is :" << Cookie << "\n";
ID = strstr(Cookie,"SID");
}
//cout << "final modified string is : " << Cookie << "\n";
char *Cookie_modified = NULL;
const char *pch = strstr(Cookie,"PREF");
if(pch != NULL){
const char *append = "&f2=8000000";
int len = strlen(Cookie) + strlen(append) + 1;
Cookie_modified = new char[len];
strncpy(Cookie_modified,Cookie,len);
Cookie_modified[len-1] = '\0';
char *p = strstr(Cookie_modified,"PREF");
strncpy(p+(strlen(p)),append,strlen(append));
cout << "modified Cookie is : " << Cookie_modified << "\n";
// cout << "length of modified cookie is : " << strlen(Cookie_modified) << "\n";
}
else{
cout << "do not find reference: " << "\n";
const char *append = ";PERF=f2=8000000";
int len = strlen(Cookie) + strlen(append) + 1;
Cookie_modified = new char[len];
Cookie_modified[len-1] = '\0';
strcat(Cookie_modified,Cookie);
strcat(Cookie_modified,append);
cout << "case 2: modified Cookie is: " << Cookie_modified << "\n";
}
delete[] Cookie;
delete[] Cookie_modified;
return 0;
}

Just use std::regex_replace with the following regex:
([^\s]*SID[^;]*;|[^\s]*LOGIN_INFO[^;]*;)
And replace with empty string.
Live Demo

Related

How to parse a const char* from a double / long without the std::string library?

How can I parse a const char* from a double or long?
Mainly because my code is a lot faster when I use a const char*, so i decided to create a small base string class. But my code to parse a double has some bugs.
My code only works partially. Some help would be very appreciated.
I am using macos, g++ & c++17.
Code:
#include <iostream>
class bstring {
public:
const char* characters;
bstring(const char* c = "") { characters = c; }
static bstring parse(const double number, int precision = 100) {
// Convert.
int decimal, sign;
char *buffer;
buffer = ecvt(number, precision, &decimal, &sign);
int n = strlen(buffer);
// Add decimal.
char before[decimal];
strncpy(before, 0 + buffer, decimal);
char after[n - decimal - 1];
strncpy(after, decimal + buffer, n - decimal - 1);
// Remove zero padding.
int removed = 0;
while (true) {
size_t n = sizeof(after) - removed;
size_t index_to_remove = n - 1;
if (after[index_to_remove] == '0') {
for (size_t i = index_to_remove; i < n - 1; ++i) {
after[i] = after[i + 1];
}
removed += 1;
} else { break; }
}
bool is_zero = removed == sizeof(after);
int after_size = sizeof(after)-removed;
char* nafter = (char*)malloc(sizeof(char) * after_size);
// Concat.
char* new__{ new char[strlen(before) + 1 + after_size] };
new__ = strcpy(new__, before);
new__ = strcat(new__, ".");
if (is_zero) {
char a[] = "0";
new__ = strcat(new__, a);
} else {
new__ = strcat(new__, after);
}
// Assign.
bstring s = new__;
delete[] new__; new__ = NULL;
return s;
//
}
};
std::ostream& operator <<(std::ostream &s, bstring x) { return s << x.characters; }
int main() {
std::cout << "Should be " << "-1234.39950" << ": " << bstring::parse(-1234.39950) << std::endl;
std::cout << "Should be " << "-1.0" << ": " << bstring::parse(-1.0) << std::endl;
std::cout << "Should be " <<"0.0" << ": " << bstring::parse(0.0) << std::endl;
std::cout << "Should be " <<"0.3897495" << ": " << bstring::parse(0.3897495) << std::endl;
std::cout << "Should be " <<"1.0" << ": " << bstring::parse(1.0) << std::endl;
std::cout << "Should be " <<"100.00" << ": " << bstring::parse(1000.0) << std::endl;
std::cout << "Should be " <<"10000.000" << ": " << bstring::parse(1000000.0) << std::endl;
std::cout << "Should be " <<"1000000.0000" << ": " << bstring::parse(1000000000.0) << std::endl;
std::cout << "Should be " <<"1000000000.0000" << ": " << bstring::parse(1000000000000.0) << std::endl;
std::cout << "Should be " <<"1000000000000.0000" << ": " << bstring::parse(1000000000000000.0) << std::endl;
}
Edit:
Is this piece of code okay? Or am I doing something wrong by not deleting it / By where I assign the new__ to.
// Concat.
bstring concat(const char* c) {
int n = ::strlen(characters) + ::strlen(c);
if (n == 0) { return bstring(); }
if (::strlen(c) == 0) { return bstring(characters); }
char* new__{ new char[n + 1] };
new__ = strcpy(new__, characters);
new__ = strcat(new__, c);
// const char* n = new__;
// delete[] new__; new__ = NULL;
bstring s = new__;
return s;
}

segfault variable creation c++

So I'm having some troubles with a segfault right now.
The problem appears to be on line 122
file_stream.read(binaryData, fileSizeInt);
The message seems to be unable to create variable object.
I'm using the debugger on CLion, if this is of any help.
I'm fairly new to C++ so please bear with me on this. I've included my source code below, since I have no idea what will and wont be relevant.
#include <fstream>
#include <iostream>
#include <math.h>
using namespace std;
//string dataLocation = "/dev/rdisk2"; // Variable for the mounting point of the SD Card.
string dataLocation = "/Volumes/Untitled/data/smith_data_backup.dat"; // Test location
int sectorSize = 512;
int bytesRead = 0;
string outDir = "/Volumes/Untitled/data/readData/";
char data_stop[10] = "Data_Stop";
char* findHeader(char* inputData) {
int i = 1;
//look for the end of the header
while(true) {
//cout << data[i] << '\n';
if (inputData[i] == '}') {
// found the end of the header
cout << "found the end of the header" << '\n';
break;
}
if (i > 200) { // don't get into an infinite loop here
cout << "something went wrong here" << '\n';
break;
}
i++;
}
// copy the header to a new char array
char* header = new char[i+1];
memcpy(header, &inputData[0], (size_t) (i +1));
return header;
}
int main() {
ifstream file_stream;
file_stream.open(dataLocation);
for( int j = 0; j < 10; j++ ) {
char data[sectorSize]; // figure out a decent size for this.....
string date = "";
string time = "";
string site = "";
string intId = "";
string totalFiles = "";
string fileNo = "";
string fileSize = "";
try {
// read the data file... this will be fun
cout << "Reading the data file" << '\n';
file_stream.read(data, sectorSize);
bytesRead += sectorSize;
if (data[0] == '{') {
char *header = findHeader(data);
cout << header << '\n';
//loop over the header to find the date, time, site ID, Instrument ID, and expected file size.
int commasFound = 0;
for(int i = 1; header[i] != '}', i++;) {
// increment the number of commas that have been found
if (header[i] == ',') {
commasFound ++;
continue;
}
//check for the end f the header
if (header[i] == '}') {
break;
}
//for some reason the first one never gets added
if (i==2) {
date += header[1];
}
// append to appropriate strings based on the number of commas that have been passed in the header
if (commasFound == 0) {
date += header[i];
}
if (commasFound == 1 && time.length() < 6) {
time += header[i];
}
if (commasFound == 2) {
fileNo += header[i];
}
if (commasFound == 6) {
site += header[i];
}
if (commasFound == 8) {
intId += header[i];
}
if (commasFound == 16) {
fileSize += header[i];
}
if (commasFound == 17) {
totalFiles += header[i];
}
//paranoia of infinite loops
if (i > 150) {
break;
}
}
string formattedDate = "20" + date.substr(4,2) + date.substr(2,2) + date.substr(0,2);
cout << formattedDate << " " << time << " " << " " << site << " " << intId << " " << fileSize << " " << totalFiles<< " " << '\n';
// Read in the data size
int fileSizeInt = atoi(fileSize.c_str()) * atoi(fileSize.c_str());
char binaryData[fileSizeInt];
file_stream.read(binaryData, fileSizeInt);
bytesRead += fileSizeInt;
string dateDir = outDir + formattedDate.substr(0,4) + "/" + date.substr(2,2) + "/" + site + "/" + date.substr(0,2) + "/";
string fileName = dateDir + formattedDate + "_" + time + "_" + site + "_" + intId + "_Full_Data.dat";
//cout << fileName << '\n';
//create the directory with the date.
string mkdirCommand = "mkdir -p " + dateDir;
system(mkdirCommand.c_str());
cout << "size " << sizeof(binaryData) << '\n';
//write data to file
try {
cout << "Write the data file " << '\n';
cout << header << " " << strlen(header) << '\n';
ofstream outFile;
outFile.open(fileName.c_str());
outFile.write(header, strlen(header));
outFile.write(binaryData, sizeof(binaryData));
outFile.write(data_stop, sizeof(data_stop));
outFile.close();
} catch (ofstream::failure e ) {
cout << "Unable to write the file " << fileName << '\n';
}
//read till the start of the next sector
int nextSector = (int) ceil(((double) bytesRead)/512.0);
int startOfNextSector = nextSector * 512;
cout << startOfNextSector << '\n';
int bytesToRead = startOfNextSector - bytesRead;
char dump[bytesToRead];
file_stream.read(dump, bytesToRead);
bytesRead += bytesToRead;
// The first block may be empty, quick check to see if the first block needs to be skipped
} else if (j == 0 && data[0] != '{') {
cout << "Skipping the first block" << '\n';
continue;
} else {
//cout << "no start here....." << bytesRead << '\n';
}
}
catch (ifstream::failure& e) {
cout << "Error" << "\n";
break;
}
}
file_stream.close();
return 0;
}

getting bus error : 10 with string append

I have a function that takes two strings and determines if they are the same. I am trying to tokenize the string and combine all of tokens into one string. This is what I have so far and I am getting Bus error :10.
any help appreciated.
#include <iostream>
#include <string>
using namespace std;
bool stringCheck(string s1, string s2){
string strCheck1 = "";
string strCheck2 = "";
char *cstr1 = new char[s1.length()]; // char array with length of string
strcpy(cstr1, s1.c_str()); // copies characters of string to char array
char *cstr2 = new char[s2.length()];
strcpy(cstr2, s2.c_str());
char *p1 = strtok(cstr1, " "); // creates a char array that stores token that
// is delimeted
cout << "p1 " << p1 << endl; ///outputs token that is found
strCheck1.append(p1); // appends token to string
cout << "strCheck1 " << strCheck1 << endl; // outputs string
while(p1 != NULL) // while the token is not a null character
{
cout<<"parsing" << endl;
p1 = strtok(NULL, " "); // continue to parse current string.
cout << "p1 " << p1 << endl;
strCheck1.append(p1);
cout << "str1 " << strCheck1 << endl;
}
char * p2 = strtok(cstr2, " ");
cout << "p2 " << p2 << endl;
strCheck2.append(p2);
cout << "strCheck2 " << strCheck2 << endl;
while(p2 != null){
p2 = strtok(NULL, " ");
strCheck2.append(p2);
cout << "str2 " << strCheck2 << endl;
}
if( strCheck1.compare(strCheck2) != 0)
{
return 0;
}
else return 1;
}
int main(void){
string s1 = "jam yoooo jay";
string s2 = "jam yoooo";
if(stringCheck(s1, s2) == 1){
cout << "strings same"<< endl;;
}
else{
cout << "strings not same" << endl;
}
}
is there a conditional statement I could pair up with
while(p1 != NULL)
I know this is a pretty silly function but just trying to polish up my skills. any help appreciated!
There are some things you must change:
char *cstr1 = new char[s1.length()];
c-string are null-terminated, so you need one more char to store the null character:
char *cstr1 = new char[s1.length() + 1];
(same for cstr2)
strCheck1.append(p1)
p1 cannot be a null pointer (see Assign a nullptr to a std::string is safe? for further details). So you have to check...
if (p1) strCheck1.append(p1);
(same for p2).
cout << p1 << endl
if p1 is a null pointer bad things can happen (see Why does std::cout output disappear completely after NULL is sent to it). So you have to check...
if (p1) { cout << "p1 " << p1 << endl; strCheck1.append(p1); }
(same for p2)
there is a memory leak (cstr1 / cstr2 must be deleted).
At the end it should work.
Probably you should consider other systems to extract tokens (where you haven't to mix std::string and c-string). E.g.:
#include <iostream>
#include <string>
#include <sstream>
int main()
{
std::string text("text-to-tokenize");
std::istringstream iss(text);
std::string token;
while(getline(iss, token, '-'))
std::cout << token << std::endl;
return 0;
}

Invalid null pointer

I am trying to write a program that will parse a string and give each position of word. I cannot figure out why I am getting a
"DEBUG ASSERTION FAILED" Experssion: Invalid null pointer
window when it reaches the last word of the string.
char * pointer_char;
int pos = 0;
std::string str = "This test string will fail at this word..!. ";
int i = 0;
int length = str.length();
char * c = new char [str.size()+1];
std::copy(str.begin(), str.end(), c);
c[str.size()] = '\0';
cout << "Testing string is " << str << endl << endl;
pointer_char = strtok (c," ");
while(pointer_char != NULL)
{
cout << pointer_char << endl;
pointer_char = strtok(NULL, " .!");
string word = pointer_char;
size_t found= str.find(word);
if (found!=string::npos)
cout << "Position of " << word << " found at: " << int(found) << endl;
system("pause");
}
return 0;
The problem is you aren't checking the return value of strtok.
pointer_char = strtok(NULL, " .!");
string word = pointer_char;
You're only testing it at the top of the loop.
pointer_char = strtok(nullptr, " .!");
if (pointer_char == nullptr)
break;

Why this program gives a Segmentation Fault while calling a function?

This is a part of my program while I run this program I get a segmentation fault. I've narrowed it down to the line:
checkBase(ptr1, ptr2)
i'm passing both of these as pointers. and they are declare as char* and its a runtime error not compile time.
file contains
< a href = "http://www.google.com"> www.spam.google.com < /a >
in this case ptr1 = www.google.com and ptr2 = spam.google.com
while(inf){
count++;
getline(inf, line);
//cout << "*******" << count << "*******" << endl <<line << endl;
p = new char[line.length()+1];
strcpy(p, line.c_str());
if(strstr(p, "href")){
ptr = strstr(p, "href");
while(ptr[0]!='\0'){
ptr += 1;
if(ptr[0] == 'w' && ptr[1] == 'w' && ptr[2] == 'w'){
cout << ptr << endl;
ptr = strtok(ptr, "\"");
cout << "add1 " << ptr << endl;
add1 = ptr;
ptr1 = ptr;
ptr = strtok(NULL, "> ");
add2 = ptr;
ptr2 = ptr;
cout << "ptr1: " << ptr1 << endl << "ptr2: " <<ptr2 << endl;
if(add1 == add2)
cout << "There is an exact match at line: " << count << endl << line << endl;
else{
cout << "in else" << endl;
checkBase(ptr1, ptr2); //THIS GIVES A SEGMENTATION FAULT
}
}
}
}
}
void checkBase(char *add1, char *add2){
cout << "here" << endl;
char *base1[1000000], *base2[1000000];
int count1 = 0, count2 = 0;
base1[count1] = strtok(add1, ".");
while(base1[count1] != NULL){
count1++;
base1[count1] = strtok(NULL, ".");
cout << base1[count1] << endl;
}
base2[count2] = strtok(add2, ".");
while(base2[count2] != NULL){
count2++;
base2[count2] = strtok(NULL, ".");
}
cout << base2[count2-1] << endl;
if(((strcmp(base1[count1-1],base2[count2-1])) != 0) && (strcmp(base1[count1-2], base2[count2-2]) != 0)){
//if((strcmp(base1[count1-1], base2[count2-1]) != 0)){
cout << "Bases do not match: " << endl
<< base1[count1-2] << "." << base1[count1-1] << " and "
<< base2[count2-2] << "." << base2[count2-1] << endl;
//}
}
else{
cout << "Bases match: " << endl
<< base1[count1-2] << "." << base1[count1-1] << " and "
<< base2[count2-2] << "." << base2[count2-1] << endl;
}
}
I have no idea why this is giving a segmenation fault.
char *base1[1000000], *base2[1000000];
No doubt this is causing stack overflow. The stack is limited in size, and creating arrays more than a few kb in size is a bad idea. Try allocating them on the heap, for example vector<char *> base1(1000000)
You should also calculate the exact size required and allocate that much, or push_back on the vector.
A couple of problems, beyond the stack overflow already mentioned by #Neil Kirkwell
Those shouldn't be while loops solely conditioned on base1[count1] != NULL; you should also make sure count1 is less than the number of elements in the array.
If either count2 or count1 is 0 or 1 you will be trying to reference index of -1 and -2... not so good.
use strrchr to search backwards and make your life easier
It's wasteful to build those arrays entirely, since you only seem to care about the last two tokens, you only need two pointers in each.
i.e.
char *one_a = NULL, *one_b = NULL, *two_a=NULL, *two_b = NULL;
char *temp = strtok(add1, ".");
while (temp) {
one_b = one_a;
one_a = temp
temp = strtok(NULL, ".");
}
char *temp = strtok(add2, ".");
while (temp) {
two_b = two_a;
two_a = temp
temp = strtok(NULL, ".");
}
//now just compare one_a with two_a and one_b with two_b and you're done.