Equivalent of wstring in C - c++

How can I read and access Unicode characters with standard C. Previously I was using C++ and std::wstring for whole word and 'const wchar_t' for a single characters, which works perfectly( below is example code).
But now I am not allowed to use C++. How can I replace the 'wstring' in C? How can I convert my code to standard C?
...
...
const wchar_t small_char[10]={ L'锕',L'吖',L'啊',L'阿',L'呵',L'嗄',L'埃',L'哀',L'哎'};
std::wstring strIn=L"锕吖哎";
std::wstring s_temp=L"";
int length= strIn.length();
for(int i=0;i<length;i++){
if(strIn[i]==small_char[2]){
s_temp=s_temp+L"ba";
}
else if(strIn[i]==small_char[5]){
s_temp=s_temp+L"pe";
}
else{
s_temp=s_temp+strIn[i];
}
}
...
...

How can I replace the 'wstring' in C? How can I convert my code to standard C?
std::wstring is just a wrapper for wchar_t*. You can use wchar_t directly, you just have to manage the string memory and concatenations manually.
Try this:
...
const wchar_t small_char[10] = { L'锕', L'吖', L'啊', L'阿', L'呵', L'嗄', L'埃', L'哀', L'哎'};
wchar_t *strIn = L"锕吖哎";
int length = wcslen(strIn);
wchar_t *s_temp = (wchar_t*) calloc((length*2)+1, sizeof(wchar_t));
int s_temp_len = 0;
for(int i = 0; i < length; i++)
{
if (strIn[i] == small_char[2])
{
memcpy(&s_temp[s_temp_len], L"ba", 2*sizeof(wchar_t));
s_temp_len += 2;
s_temp[s_temp_len] = L'\0';
}
else if (strIn[i] == small_char[5])
{
memcpy(&s_temp[s_temp_len], L"pe", 2*sizeof(wchar_t));
s_temp_len += 2;
s_temp[s_temp_len] = L'\0';
}
else
{
s_temp[s_temp_len] = strIn[i];
s_temp_len += 1;
s_temp[s_temp_len] = L'\0';
}
}
// use s_temp up to s_temp_len characters as needed...
free(s_temp);
...
If you want something more like std::wstring, you should pre-allocate a small buffer and resize it whenever you are going to exceed its capacity during concatenations. A struct is useful for keeping track of that:
struct my_wstring
{
wchar_t *data;
int length;
int capacity;
};
void wstring_init(struct my_wstring *str)
{
str->data = NULL;
str->length = 0;
str->capacity = 0;
};
void wstring_clear(struct my_wstring *str)
{
free(str->data);
str->data = NULL;
str->length = 0;
str->capacity = 0;
};
// allocate in multiples of 32
const int delta = 32;
void wstring_append_str_len(struct my_wstring *str, const wchar_t *value, int valueLen)
{
if ((!str) || (!value) || (valueLen < 1)) return;
int newLen = str->length + valueLen;
if ((newLen + 1) > str->capacity)
{
// round to next highest multiple of 32
int newCap = ((newLen + 1) + (delta - 1)) & ~delta;
wchar_t *newData = (wchar_t*) realloc(str->data, newCap * sizeof(wchar_t));
if (!newData)
{
// memory allocation error, do something!
return;
}
str->data = newData;
str->capacity = newCap;
}
memcpy(&(str->data[str->length]), value, valueLen * sizeof(wchar_t));
str->length = newLen;
str->data[newLen] = L'\0';
}
void wstring_append_str(struct wstring *str, const wchar_t *value)
{
wstring_append_str_len(str, value, wcslen(value));
}
void wstring_append_chr(struct wstring *str, const wchar_t value)
{
wstring_append_str_len(str, &value, 1);
}
...
const wchar_t small_char[10] = { L'锕', L'吖', L'啊', L'阿', L'呵', L'嗄', L'埃', L'哀', L'哎'};
wchar_t *strIn = L"锕吖哎";
struct my_wstring s_temp;
wstring_init(&s_temp);
int length = wcslen(strIn);
for(int i = 0; i < length; i++)
{
if (strIn[i] == small_char[2])
{
wstring_append_str(&s_temp, L"ba");
}
else if (strIn[i] == small_char[5])
{
wstring_append_str(&s_temp, L"pe");
}
else
{
wstring_append_chr(&s_temp, strIn[i]);
}
}
// use s_temp.data up to s_temp.length characters as needed...
wstring_clear(&s_temp);
...

The equivalent C routines are
== wcscmp or wcsncmp
+= wcscat or wcscat_s
= wcscpy or wcsncpy or wcscpy_s
.size() or length() wcslen
In your case since you are comparing one character at a time, you do not need wcscmp. Make sure all your strings are null terminated otherwise the non _s versions won't work.

Related

I keep getting a segfault every time I run this program, but I cannot understand why

I have a function that takes a user entered string and splits it into individual words using a dynamically allocated two-dimensional array. The words are separated by delimiters used as indicators of where one word ends and another begins.
Here is my code:
int countWords(const char * sentence, char * delims)
{
int wordsInArray = 0;
int count = 0;
while(*(sentence + count) != '\0')
{
if(*(sentence + count) == *delims && *(sentence + count + 1) != *delims)
{
wordsInArray++;
}
if(*(sentence + count + 1) == '\0')
{
wordsInArray++;
}
count++;
}
return wordsInArray;
}
int getLength(const char * sentence)
{
const char *p = sentence;
while(*p != '\0')
{
p++;
}
return p-sentence;
}
char ** getWords(const char * sentence, int & wordcount)
{
char delims[] = " .,\t?!";
int sentenceLength = getLength(sentence);
wordcount = countWords(sentence, delims);
char ** words;
words = new char *[wordcount];
int length = 0;
int count = 0;
for (int a = 0; a < sentenceLength; a++)
{
if(*(sentence + a) != *delims)
{
length++;
}
else if ((*(sentence + a) == *delims && *(sentence + a + 1) != *delims) || *(sentence + a) == '\0')
{
*(words + count) = new char[length+1];
for (int z = 0; z < length; z++)
{
*(*(words + count) + z) = *(sentence + z);
}
length = 0;
count++;
}
}
return words;
}
However, my countWords function is not properly counting the words in the string, and I do not know why.
Try something more like this:
int indexOf(const char * sequence, char ch) {
const char *p = sequence;
while (*p != '\0') {
if (*p == ch) {
return p - sequence;
}
}
return -1;
}
const char* findFirstOf(const char * sequence, const char *chars) {
const char *p = sequence;
while (*p != '\0') {
if (indexOf(chars, *p) != -1) {
return p;
}
}
return NULL;
}
const char* findFirstNotOf(const char * sequence, const char *chars) {
const char *p = sequence;
while (*p != '\0') {
if (indexOf(chars, *p) == -1) {
return p;
}
}
return NULL;
}
int countWords(const char * sequence, char * delims) {
int count = 0;
const char *p = sequence;
do {
p = findFirstNotOf(p, delims);
if (p == NULL) break;
++count;
p = findFirstOf(p, delims);
}
while (p != NULL);
return count;
}
int getLength(const char * sequence) {
const char *p = sequence;
while (*p != '\0') {
++p;
}
return p-sequence;
}
char* dupString(const char * sequence, int length = -1) {
if (length == -1) {
length = getLength(sequence);
}
char *result = new char[length+1];
for (int i = 0; i < length; ++i) {
result[i] = sequence[i];
}
result[length] = '\0';
return result;
}
char** getWords(const char * sequence, int & wordcount) {
const char delims[] = " .,\t?!";
int count = countWords(sequence, delims);
char ** words = new char *[count];
if (count > 0) {
count = 0;
const char *p = sequence;
do {
p = findFirstNotOf(p, delims);
if (p == NULL) break;
const char *q = findFirstOf(p, delims);
if (q == NULL) {
words[count++] = dupString(p);
break;
}
words[count++] = dupString(p, q-p);
p = ++q;
}
while (true);
}
wordcount = count;
return words;
}
That being said, the fact you are using new[] means you are using C++, so you should be using the STL to make life easier:
#include <string>
#include <vector>
std::vector<std::string> getWords(const std::string & sequence) {
const char delims[] = " .,\t?!";
std::vector<std::string> words;
std::string::size_type i = 0;
do {
i = sequence.find_first_not_of(delims, i);
if (i == std::string::npos) break;
std::string::size_type j = sequence.find_first_of(delims, i);
if (j == std::string::npos) {
words.push_back(sequence.substr(i));
break;
}
words.push_back(sequence.substr(i, j-i));
i = ++j;
}
while (true);
return words;
}

Access Violation Pointer Error

I am implementing my version of the basic String class, however I am running into an issue that I have never seen before and have no idea how to properly debug. My code is pasted below. All functions have their header counterparts. My test is simply creating one object using the convert constructor.
A4String obj1("this");
My problem is I get an Access violation reading location exception thrown. My research has indicated that I may be trying to access memory outside of Visual Studio's allotment. I'm having trouble finding where this pointer error exists though. I have placed breakpoints through every step of the convert constructor and subsequent function calls within however my program doesn't throw the exception until it returns to main, seemingly after my program has executed completely.
#include "A4String.h"
A4String::A4String() {
data = new char[5];
data[0] = '\0';
capacity = 5;
}
A4String::~A4String() {
if (capacity != 0)
delete[] data;
}
//Copy Constructor
A4String::A4String(const A4String &right) {
cout << "copy" << endl;
data = new char[right.capacity + 1];
strcpy(data, right.data, capacity);
capacity = right.capacity;
}
//Convert Constructor
A4String::A4String(const char *sptr) {
cout << "convert" << endl;
capacity = (strlen(sptr)) + 1;
data = new char[capacity + 1];
strcpy(sptr, data, capacity);
}
//Assignment
A4String& A4String::operator = (const A4String & right) {
//if (capacity != 0) delete[] data;
data = new char[right.capacity + 1];
strcpy(data, right.data, capacity);
capacity = right.capacity;
return *this;
}
//Equivalence
bool A4String::operator == (const A4String &right) const {
return (strcmp(data, right.data)) == 0;
}
int A4String::length() const {
return capacity;
}
void A4String::addChar(char) {
//Not implemented yet
}
string A4String::toString() {
string str = "";
int i = 0;
while (data[i] != '\0') {
str += data[i];
i++;
}
return str;
}
void A4String::strcpy(const char *source, char* destination, int size)
{
for (int i = 0; i < 20; i++)
destination[i] = '\0';
int index = 0;
while (source[index] != '\0')
{
destination[index] = source[index];
index++;
}
destination[index] = '\0';
}
int A4String::strcmp(char *str1, char *str2)
{
if (*str1 < *str2)
return -1;
if (*str1 > *str2)
return 1;
if (*str1 == '\0')
return 0;
return strcmp(str1 + 1, str2 + 1);
return 0;
}
int A4String::strlen( char *s)
{
char *start;
start = s;
while (*s != 0)
{
++s;
}
return s - start;
}
The problem is your A4String::strcpy, the line
for (int i = 0; i < 20; i++)
destination[i] = '\0';
The destination has less than 20 characters, so it crashes.
Use of the hard code number 20 in the A4String::strcpy is not right. I suggest changing it to size.
void A4String::strcpy(const char *source, char* destination, int size)
{
// for (int i = 0; i < 20; i++)
for (int i = 0; i < size; i++)
destination[i] = '\0';
int index = 0;
// Add an additional check here also.
// while (source[index] != '\0' )
while (source[index] != '\0' && index < size)
{
destination[index] = source[index];
index++;
}
destination[index] = '\0';
}
Disclaimer Fixing the above function may not fix your crashing problem even though the use of 20 is most likely crashing your program. In other words, there might be other problems in your code too.

C++ array of unsigned char arrays

I'm trying to understand how to create & handle an array of unsigned char arrays in C++. Such as:
Array[0] = { new array of unsigned chars }
Array[1] = { new array of unsigned chars }
Array[2] = { new array of unsigned chars }
....and so on
I've written the next code but I have the feeling that I'm doing something wrong. The code works correctly, but I don't know if the way I declare the "buffer" and how I delete the cache is the correct way, or if it can produce a memory leak.
#define MAX_BUFFER 10
unsigned char* cache[MAX_BUFFER];
bool cache_full = false;
void AddToCache(unsigned char *buffer, const size_t buffer_size)
{
if (cache_full == true)
{
return;
}
for (int index = 0; index < MAX_BUFFER; index++)
{
if (cache[index] == NULL)
{
cache[index] = new unsigned char[buffer_size];
memcpy(cache[index], buffer, buffer_size);
}
if (index < MAX_BUFFER - 1)
{
cache_full = true;
}
}
}
void ClearCache()
{
for (int index = 0; index < MAX_BUFFER; index++)
{
if (cache[index] != NULL)
{
delete[] cache[index];
cache[index] = NULL;
}
}
cache_full = false;
}
bool IsCacheFull()
{
return cache_full;
}
This works?
memcpy(cache, buffer, buffer_size);
It shouldn't. That's overwriting all the pointers in cache with the contents of buffer. In context, this should probably be:
memcpy(cache[index], buffer, buffer_size);
Also, you'll be setting cache_full to true, repeatedly, every time you add to the cache. Try:
AddToCache(unsigned char *buffer, const size_t buffer_size)
{
for (int index = 0; index < MAX_BUFFER; index++)
{
if (cache[index] == NULL)
{
cache[index] = new unsigned char[buffer_size];
memcpy(cache[index], buffer, buffer_size);
return(index); // in case you want to find it again
}
}
// if we get here, we didn't find an empty space
cache_full = true;
return -1;
}

Encode/Decode URl In C++

How can i encode a URL in client side and Decode the same in Server side.Is there any Built in apis are available for this purpose.Please anyone suggest a solution.Also i want to know how can i do percentage encoding in C++?
I've found this implementation from dlib quite useful. You don't even need to grab the whole library, just these 4 functions (unhex, hex, encode, decode). And it has a boost license.
You can check out this article and this
Encode:
std::string UriEncode(const std::string & sSrc)
{
const char DEC2HEX[16 + 1] = "0123456789ABCDEF";
const unsigned char * pSrc = (const unsigned char *)sSrc.c_str();
const int SRC_LEN = sSrc.length();
unsigned char * const pStart = new unsigned char[SRC_LEN * 3];
unsigned char * pEnd = pStart;
const unsigned char * const SRC_END = pSrc + SRC_LEN;
for (; pSrc < SRC_END; ++pSrc)
{
if (SAFE[*pSrc])
*pEnd++ = *pSrc;
else
{
// escape this char
*pEnd++ = '%';
*pEnd++ = DEC2HEX[*pSrc >> 4];
*pEnd++ = DEC2HEX[*pSrc & 0x0F];
}
}
std::string sResult((char *)pStart, (char *)pEnd);
delete [] pStart;
return sResult;
}
Decode:
std::string UriDecode(const std::string & sSrc)
{
// Note from RFC1630: "Sequences which start with a percent
// sign but are not followed by two hexadecimal characters
// (0-9, A-F) are reserved for future extension"
const unsigned char * pSrc = (const unsigned char *)sSrc.c_str();
const int SRC_LEN = sSrc.length();
const unsigned char * const SRC_END = pSrc + SRC_LEN;
// last decodable '%'
const unsigned char * const SRC_LAST_DEC = SRC_END - 2;
char * const pStart = new char[SRC_LEN];
char * pEnd = pStart;
while (pSrc < SRC_LAST_DEC)
{
if (*pSrc == '%')
{
char dec1, dec2;
if (-1 != (dec1 = HEX2DEC[*(pSrc + 1)])
&& -1 != (dec2 = HEX2DEC[*(pSrc + 2)]))
{
*pEnd++ = (dec1 << 4) + dec2;
pSrc += 3;
continue;
}
}
*pEnd++ = *pSrc++;
}
// the last 2- chars
while (pSrc < SRC_END)
*pEnd++ = *pSrc++;
std::string sResult(pStart, pEnd);
delete [] pStart;
return sResult;
}
For Encoding:
You can use "g_uri_escape_string()" function provided glib.h.
https://developer.gnome.org/glib/stable/glib-URI-Functions.html
#include <stdio.h>
#include <stdlib.h>
#include <glib.h>
int main() {
char *uri = "http://www.example.com?hello world";
char *encoded_uri = NULL;
//as per wiki (https://en.wikipedia.org/wiki/Percent-encoding)
char *escape_char_str = "!*'();:#&=+$,/?#[]";
encoded_uri = g_uri_escape_string(uri, escape_char_str, TRUE);
printf("[%s]\n", encoded_uri);
free(encoded_uri);
return 0;
}
compile it with:
gcc encoding_URI.c `pkg-config --cflags --libs glib-2.0`

C++ vector equivalent in C [duplicate]

This question already has answers here:
How to replicate vector in c?
(6 answers)
Closed 2 years ago.
I have a code (C++) that looks like this
vector<int> values[10000];
int i, j;
while (.....) {
scanf("%d%d", &i, &j);
values[i].push_back(j);
values[j].push_back(i);
}
but I want to rewrite this code to C. How can I do this?
I researched the opportunity to make the own stack, but maybe have more lightweight way to rewrite this code, maybe two-dimensional arrays. So far I can not think how this remake, I hope that someone more experienced tell me how to do it :)
Sorry guys, added a more advanced example...
Instead of rolling your own, you may want to try a C container library, e.g. http://code.google.com/p/ccl/
You can use Gena library. It closely resembles stl::vector in pure C89.
You can check it out here:
https://github.com/cher-nov/Gena
Something like this:
#include <stdio.h>
#include <stdlib.h>
typedef struct _darray
{
size_t size;
size_t actual_size;
int *content;
} darray;
void darray_create(darray *d)
{
d->actual_size = d->size = 0;
d->content = NULL;
}
void darray_append(darray *d, int v)
{
if (d->size+1 > d->actual_size)
{
size_t new_size;
if (!d->actual_size)
{
new_size = 1;
}
else
{
new_size = d->actual_size * 2;
}
int *temp = realloc(d->content, sizeof(int) * new_size);
if (!temp)
{
fprintf(stderr, "Failed to extend array (new_size=%zu)\n", new_size);
exit(EXIT_FAILURE);
}
d->actual_size = new_size;
d->content = temp;
}
d->content[d->size] = v;
d->size++;
}
const int* darray_data(darray *d)
{
return d->content;
}
void darray_destroy(darray *d)
{
free(d->content);
d->content = NULL;
d->size = d->actual_size = 0;
}
size_t darray_size(darray *d)
{
return d->size;
}
int main()
{
int i;
darray myarray;
const int *a;
darray_create(&myarray);
for(i = 0; i < 100; i++)
{
darray_append(&myarray, i);
}
a = darray_data(&myarray);
for(i = 0; i < darray_size(&myarray); i++)
{
printf("i=%d, value=%d\n", i, a[i]);
}
darray_destroy(&myarray);
}
You can try something like this:
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
struct vector
{
int len;
int allocated;
int step;
int *data;
};
#define INIT_SIZE 1
void init_vector(struct vector *v)
{
v->len = 0;
v->allocated = 0;
v->step = 2;
v->data = NULL;
}
int append(struct vector *v, int item)
{
if (!v->data)
{
v->data = malloc(INIT_SIZE * sizeof(int));
if (!v->data)
return -1;
v->allocated = INIT_SIZE;
}
else
if (v->len >= v-vallocated)
{
int *tmp = realloc(v->data,
v->allocated * v->step * sizeof(int));
if (!tmp)
return -1;
v->data = tmp;
v->allocated *= v->step;
}
v->data[v->len] = item;
v->len++;
return 0;
}
int delete(struct vector *v, int index)
{
if (index < 0 || index >= v->len)
return -1;
memmove(v->data + index, v->data + index + 1,
(v->len - index - 1) * sizeof(int));
v->len--;
return 0;
}
void print(const struct vector *v)
{
printf("Array:\n");
for (int i = 0; i < v->len; i++)
printf("%d ", v->data[i]);
printf("\n");
}
int main(void)
{
struct vector v;
int rc;
init_vector(&v);
rc = append(&v, 1);
assert(rc == 0);
rc = append(&v, 2);
assert(rc == 0);
rc = append(&v, 3);
assert(rc == 0);
rc = append(&v, 4);
assert(rc == 0);
rc = append(&v, 5);
assert(rc == 0);
print(&v);
rc = delete(&v, 2);
assert(rc == 0);
print(&v);
free(v.data);
return 0;
}
A rough equivalent of a C++ vector would be a resizing C array (to account for more elements than available).
Ergo, the equivalent of an array of vectors would be an array of pointers (an array of arrays wouldn't cut it because of the resizing constraint).
int* values[1000];
You'll need to account for the sizes though, so you could either do that externally or wrap the logic inside a structure.
int sizes[1000];
int noElements[1000];
// all sizes and noElements initially 0
for (int i = 0; i < 10; i++) {
if ( noElements[i] >= sizes[i] )
{
// allocate more memory for values[i];
// copy old contents into the new memory
// update sizes[i]
}
values[i][noElements] = 10;
noElements++;
}
There is no C standard equivalent to the c++ vector, though you could create a struct based off of the vector in c++. The struct would
Resize itself if the array bounds are passed the max size
perform the operations similar to that of a vector
OR
Create a linked list stack struct that simulates that of a c++ vector
I'm affraid you'll have to work with heap memory in 80's fashion in the plain C.
typedef struct tagArrayDesc {
int* arr;
size_t top;
size_t reserved;
} ArrayDesc;
#define EC(NAME, T) size_t ensure_capacity##NAME##(size_t size, \
T** vec, \
size_t reserved) \
{ \
size_t new_reserved; \
new_reserved = reserved; \
if (reserved < size) { \
if (reserved != 0) { \
new_reserved *= 2; \
} else { \
new_reserved = 0x10; \
} \
} \
if (new_reserved < size) { \
new_reserved = (size * 4) / 3; \
} \
if (new_reserved > reserved) { \
*vec = realloc(*vec, sizeof(**vec) * new_reserved); \
memset((*vec) + reserved, 0, sizeof(T) * (new_reserved - reserved)); \
} \
return new_reserved; \
}
EC(_int, int)
EC(_array_desc, ArrayDesc)
int main()
{
ArrayDesc* rows = NULL;
size_t rows_size = 0;
size_t rows_reserved = 0;
while (true) {
int i, j;
scanf("%d%d", &i, &j);
rows_reserved = ensure_capacity_array_desc(i + 1, &rows, rows_reserved);
rows[i].reserved = ensure_capacity_int(j + 1, &rows[i].arr, rows[i].reserved);
rows[i].arr[j] = 42;
}
return 0;
}
You have to work with dynamic memory allocation. It's not hard. Every time when a new item must be inserted just use realloc. Somethink that looks like this:
#include <cstdlib>
typedef struct { } UserType;
int currentSize = 0;
UserType* values;
/// Add new value to values method
void addValue(const UserType& newValue)
{
++currentSize;
values = static_cast<UserType*>(realloc(values, currentSize));
if (values == NULL)
// memory allocation filed, place fix code here
*(values + currentSize) = newValue;
}
Remember, u have to use free for free memory of the values. Also, you may don't free allocated memory if will end work right now.