Split char* at delimiter in freestanding mode c++ - c++

I am trying to write my own operating system. I have followed the tutorials on the OSDev Wiki, and I am now working on writing a console mode, with commands. I need to be able to split a char* into a char**, without all the library functionality (hence freestanding). I have tried iterating through until I meet my delimiter etc, but however I do it, I just get garbage stuck on the end of my first result. What am I doing wrong? This is what I have so far:
static char** splitStr (char* string, char delim) {
char returner[VGA_WIDTH][255];
int loc = 0;
int innerLoc = 0;
for (int i = 0; string[i] != 0x00; i++) {
char c = string[i];
if (c != delim) {
returner[loc][innerLoc] = c;
innerLoc++;
} else {
print ("a string was ");
println (returner[loc]);
innerLoc = 0;
loc++;
}
}
print ("the first string was ");
println (returner[0]);
return (char**)returner;
}
I am asking a question about how to write a specific function in C++ freestanding mode.

void split(const char* str, const char d, char** into)
{
if(str != NULL && into != NULL)
{
int n = 0;
int c = 0;
for(int i = 0; str[c] != '\0'; i++,c++)
{
into[n][i] = str[c];
if(str[c] == d)
{
into[n][i] = '\0';
i = -1;
++n;
}
}
}
}
I'm allocating using calloc to get rid of garbage characters.
EDIT: You should allocate the pointers inside the char** before writing to them.
void allocarr(char** pointers, int bytes, int slots)
{
int i = 0;
while(i <= slots)
{
pointers[i] = (char*)calloc(1, bytes);
++i;
}
}
...
char** sa = (char**)malloc(50*sizeof(char*));
allocarr(sa, 512, 50);
split("Hello;World;", ';', sa);
puts(sa[0]);

Related

C++ custom string split?

I am still a newbie to c++; I was wondering why the code i wrote for a custom string split fuction does not work? (it splits by char and not string) I think there is something wrong with memcpy in the second instance?
char** strsplit(const char *s, const char splitboi)
{
const int LEN = length(s);
int segs = 0;
char *segstore, **out;
for (int chrs=0; chrs<=LEN; chrs++)
{
if(*(s+chrs) != splitboi)
{char* temp = chrs==0 ? (char*)"" : segstore;
segstore = new char[chrs+1];
memcpy(&segstore, &temp, sizeof(char*));
segstore[chrs] = *(s+chrs);}
else if(*(s+chrs) == splitboi)
{char **temp = out;
out = new char* [segs+1];
memcpy(&out, &temp, sizeof(char**)); //something wrong with this
out[segs] = segstore;
segs++;}
}
delete segstore;
cout << out[0] << '\n';
return out;
}

How to fix 'Heap has been corrupted 'error in c++?

When I run the program, I get exception "heap has been corrupted" after completion of the function
I have read that this exception may cause if you are using memory that has been freed, or when you are writing to index which is out of array index. But none of the cases applies here. I have read other answers of some problems but it didn't help much.
`char fileNametoExport[26]="d:\\FOlder1\\part1.ipt";
char WorkingFolderName[260] ="d:\\folder";
int start = rFind(fileNametoExport, '\\');
int finish = rFind(fileNametoExport, '.');
if (start == -1)
start = 0;
char partname[260];
strcpy(partname,substr(fileNametoExport, start, finish));
::AfxMessageBox((LPCTSTR)partname);
char xtfile[260];
char xmltxtfile[260];
strcpy(xtfile, strcat(WorkingFolderName, partname));
strcat(xtfile, "__Default.x_t");
strcpy(xmltxtfile, WorkingFolderName);
strcat(xmltxtfile,"_XT_SE_INV_Default_SOLID_0_Solid1_xt.xmt_txt");`
function rfind() to find occurence of char in char array-
int rFind(char* s, char c)
{
int sz = 0;
char *tmp = s;
while (*tmp != '\0')
{
sz++;
tmp++;
}
for (int i = sz - 1; i >= 0; i--)
{
if (*(s + i) == c)
return i;
}
return -1;
}
function substr() to get substring from position x to y (y exclusive)
char* substr(char* s, const int b, const int f)
{
char *str = new char[f - b];
int t = 0;
for (int i = b; i != f; i++)
{
str[t] = s[i];
t++;
}
str[t] = '\0';
return str;
}
P.S- While giving input I ensure that fileNametoExport always contains '.' and '\'.
Your program do not check lengths of input strings. You can receive a string longer than your buffer and program will fail.
If your program get fileNametoExport = "d:\\somefolder\\somefilewithoutdot" , finish will be -1 and program fail at strcpy(partname,substr(fileNametoExport, start, finish)); .
Program writes after buffer in char* substr(char* s, const int b, const int f) at line
str[t] = '\0';
because t at this point equal f-b , size of str buffer.
Function _ASSERTE( _CrtCheckMemory( ) ); from <crtdbg.h> very useful when searching for bugs like this. Put it around suspicious code and it fails after your bug. It works only in debug.

Am I releasing the memory in the heap correctly?

I'm studying C/C++ and the exercise I'm doing is to create a program which evaluates an arithmetic expression.
To complete the exercise, I need a general purpose function which is able to tokenize a string.
As the size of the string to parse is not known at compile time, I have to allocate dynamically some data in the heap.
After the work is done, the memory in the heap can be released.
My question is simple: I'm releasing the memory correctly? See the questions in the comments.
Tokenize function
char** Tokenize(const char delimiter, const char* string)
{
const char* pString = string;
char** tokens = new char*[strlen(string)];
char* buffer = new char[strlen(string)];
char* pointer = buffer;
int c = 0;
for (int k = 0; k < strlen(string); k++)
{
if (string[k] == delimiter)
{
buffer[k] = '\0';
tokens[c] = pointer;
pointer = buffer + k + 1;
c++;
continue;
}
buffer[k] = string[k];
}
tokens[c] = nullptr;
return tokens;
}
The main function which tests Tokenize function and relases the heap.
int main()
{
char** tokens = Tokenize('.', "192.168.1.1");
char** startTokensPointer = tokens;
char* startBufferPointer = *tokens;
while (*tokens != nullptr)
{
cout << *tokens << endl;
tokens++;
}
delete[] startTokensPointer; //Releases tokens??
delete[] startBufferPointer; //Releases buffer??
system("PAUSE");
}
You are not deallocating buffer correctly. If none of the chars in string is equal to delimiter the code in this if statement :
if (string[k] == delimiter)
will never be executed and c will remain 0. Then this line :
tokens[c] = nullptr;
will set the first element of tokens that is stored in startBufferPointer to nullptr. In that case you are leaking buffer as the pointer to buffer is "forgotten" in main.
tokens is deallocated correctly in all cases.
Yes, there is no memory leak, but why not use a type that makes it guaranteed?
struct Tokens
{
explicit Tokens(size_t len) : tokens(new char*[len]), buffer(new char[len])
{ }
std::unique_ptr<char*[]> tokens;
std::unique_ptr<char[]> buffer;
};
Tokens Tokenize(const char delimiter, const char* string)
{
auto len = strlen(string);
Tokens result(len);
char* pointer = result.buffer.get();
int c = 0;
for (size_t k = 0; k < len; k++)
{
if (string[k] == delimiter)
{
result.buffer[k] = '\0';
result.tokens[c] = pointer;
pointer = result.buffer.get() + k + 1;
c++;
continue;
}
result.buffer[k] = string[k];
}
result.tokens[c] = nullptr;
return result;
}
int main()
{
auto tok = Tokenize('.', "192.168.1.1");
char** tokens = tok.tokens.get();
while (*tokens != nullptr)
{
cout << *tokens << endl;
tokens++;
}
}
Now all the memory is managed automatically and it's almost impossible to leak.

Value type const char cannot be used to initialize an entity of type char*

I am having the following problem with my code, though it compiles correctly:
value type const char cannot be used to initialize an entity of type char*
Can someone help me? I can run the code which is weird but I can't create a makefile using this. It's very weird to me.
int SpliString(struct dict_word *entry, const char *str)
{
long word_length,j,k;
int yearIndex;
char *buffer;
char *endOfYears;
char *endOfYear;
char *endOfDefinition;
char *endOfWord = strstr(str, "_#_");
//Sets the first num bytes of the block of memory pointed by ptr
//to the specified value (related as an unsigned char)
memset(entry, 0, sizeof(struct dict_word));
// If '_#_' is not found, it's NULL
if (endOfWord)
{
// Calculating word legth; 'str' points to start of word, 'endofWord' points to '_#_' that is just after word
word_length = endOfWord - str;
// Copying data into the word
strncpy(entry->words, str, word_length);
// 'endOfYears' points to '_#_,' but wee need to find follow '_#_'
// therefore there is added 3 in order to skip curremnt '_#_
endOfYears = strstr(endOfWord+3, "_#_");
if (endOfYears)
{
word_length = endOfYears - (endOfWord+3);
// Skips _#_
buffer = endOfWord+3;
yearIndex = 0;
j = 0;
// Finds next year in the line, it stops if all 10 years is filled
// or end of years string is reached
while(yearIndex<10 && buffer+j<endOfYears)
{
// Stores year in the buffer, with converting 'stirng' to 'int'
entry->year[yearIndex] = atoi(buffer+j);
// check year for negative...
if (entry->year[yearIndex]<=0)
return 0;
// Locating substring; 'j' is current offset from beginning of buffer
endOfYear = strchr(buffer+j, '_');
if (endOfYear)
{
j = endOfYear - buffer;
j++;
yearIndex++;
}
else
{
break;
}
}
//endOfYears points to '_#_' that separatates 'years' and 'definition'
//and there is needed to find '_#_' between 'definition' and 'synonyms'
//therefore it skips '_#_' that separatates 'years' and 'definition',
//+3, because '_#_' has length = 3
endOfDefinition = strstr(endOfYears+3, "_#_");
if (endOfDefinition)
{
word_length = endOfDefinition - (endOfYears+3);
k = 0;
for(j=0; j<word_length; j++)
{
// Skips '_#_'
if (endOfYears[j+3]==',')
{
entry->eng_synonyms[k] = ' ';
k++;
}
else if (endOfYears[j+3]>='a' && endOfYears[j+3]<='z')
{
entry->eng_synonyms[k] = endOfYears[j+3];
k++;
}
else if (endOfYears[j+3]!='_')
{
return 0;
}
}
k = 0;
word_length = (str+strlen(str)) - (endOfDefinition+3);
for(j=0; j<word_length; j++)
{
if (endOfDefinition[j+3]==',')
{
entry->heb_synonyms[k] = ' ';
k++;
}
else if (endOfDefinition[j+3]>='A' && endOfDefinition[j+3]<='Z')
{
entry->heb_synonyms[k] = endOfDefinition[j+3];
k++;
}
else if (endOfDefinition[j+3]!='_')
{
return 0;
}
}
}
// Check for legality
// Check all symbols of 'entry->words'
// calculate length and supress warning
for(j=0;j<(int)strlen(entry->words);j++)
{
if (entry->words[j]<'a' || entry->words[j]>'z')
return 0;
}
return 1;
}
}
return 0;
}
Use
const char *buffer;
const char *endOfWord = strstr(str, "_#_");
Confident OP is compiling in C++.
// C
char *strstr(const char *s1, const char *s2);
// C++
const char* strstr(const char* s1, const char* s2);
char* strstr( char* s1, const char* s2);
See
Compile C app with Visual Studio 2012
How to compile and execute C program on Visual Studio 2012 for Windows 8?

making a new char* out of two old char*s

I am open to using intermediary C++ code, though C code is the preference.
I have code like the following:
char *fileName1 = "graph";
char *extension1 = ".eps";
I want to create a new char* variable called fileName1WithExtension1 which would correspond to "graph.eps", formed from the two char* variables given above. How can this be done?
If you use C++, have those as std::string strings:
std::string fileName1 = "graph";
std::string extension1 = ".eps";
And then simply
std::string fileName1WithExtension1 = filename1 + extension1;
If you then need to pass that to a C library function expecting a C string, get char pointer with fileName1WithExtension1.c_str()
There really is no reason to muck about with plain C strings in C++ code. It is so error-prone and tedious, that it should be actively avoided.
char *new_string;
new_string = malloc(strlen(fileName1) + strlen(extension1) + 1);
sprintf(new_string, "%s%s", fileName1, extension1);
...
free(new_string)
You can use the asprintf() function
char *buffer;
asprintf (&buffer, "%s%s", fileName1, extension1);
When the buffer variable become useless in your code, you have to free the memory allocated for buffer by asprintf with
free(buffer);
char *joined;
joined = (char*)malloc(strlen(fileName1) + strlen(extension1) + 1);
strcpy(joined, fileName1)
strcat(joined, extension1)
For a small performance increase, if the compiler is smart enough at optimizing, change the last line to
strcpy(joined+strlen(fileName1), extension1)
Even better, store the length of fileName1 in a variable the first time you determine it, and use it in the final strcpy().
If you want to go REALLY low-level, with ugly loops and such, you can do this: (Tested, it compiles and gives the expected and desired results)
char* filename1 = "graph";
char* extension1 = ".eps";
char* filename1WithExtension1 = combine(filename1, extension1);
where:
char* combine(char* str1, char* str2)
{
int str1len = 0, str2len = 0;
while(str1[str1len] != '\0') {
str1len++;
}
while(str2[str2len] != '\0') {
str2len++;
}
int outputlen = str1len + str2len + 1;
char* output = new char[outputlen];
for(int i = 0; i < str1len; i++)
{
output[i] = str1[i];
}
for(int i = str1len; i < outputlen; i++)
{
output[i] = str2[i - str1len];
}
return output;
}
I did a bit of C brush-up for fun, here's an alternative (C90 and C++ compatible code) for joining an array of C strings with separator. It should be very efficient with any decently optimizing compiler, too:
#include <string.h>
#include <stdlib.h>
/* **parts are strings to join, a NULL-terminated array of char*
* sep is separator string, use "" for no separator, must not be NULL
* returns malloc-allocated buffer which must be freed
* if len_out!=NULL, sets *len_out to strlen of result string */
char *astrjoin(int *len_out, const char *sep, char **parts) {
int part_count;
int parts_total_len = 0;
for(part_count = 0; parts[part_count]; ++part_count) {
parts_total_len += strlen(parts[part_count]);
}
if (part_count > 0) {
int malloc_size = (part_count - 1) * strlen(sep) + parts_total_len + 1;
char *result = (char*)malloc(malloc_size);
char *dest = result;
for(;;) {
const char *src;
for(src=*parts; *src; ++src) *dest++ = *src;
if (!*++parts) break;
for(src=sep ; *src; ++src) *dest++ = *src;
}
*dest = 0;
if (len_out) *len_out = malloc_size - 1;
return result;
} else {
if (len_out) *len_out = 0;
return strdup("");
}
}
Example usage:
int main(int argc, char *argv[]) {
/* argv is NULL-terminated array of char pointers */
char *commandline = astrjoin(NULL, " ", argv);
printf("argc: %d\nargv: %s\n", argc, commandline);
free(commandline);
return 0;
}
To call that in context of your question, it could be something like:
char *tmparr[] = { fileName1, exteansion1, NULL };
char *fileName1WithExtension1 = astrjoin(NULL, "", tmparr);
It would be trivial to create a version with sep and/or len_out dropped, or a version supporting "varargs", with signature looking something like:
char *astrjoin_va(int *len_out, const char *sep, ...);
Which would be nicer to call in context of your question:
char *fileName1WithExtension1 = astrjoin_va(NULL, "", fileName1, extension1, NULL);