radix sort array of strings - c++

I am trying to use radix sort to sort file contain social security and date of birth the format looks like this "###-##-####,#######.I have to apply radix sort on each fields according to command line switch. I have a radix sort that is work for int array and i am trying to modify the code for string type array but i am not sure how to accomplish this. I did a quick sort for string type by comparing strings and pivot and that is work fine however for radix sort I am not if I can do this with string type or I have to convert the string to integer. I have tried to use "atoi" to convert to integer but I am not sure how to correctly do this if I have to.
string getMax(string arr[], int n){
string max = arr[0];
for (int i = 1; i < n; i++){
if (arr[i]>max)
max = arr[i];
}
return max;
}
void countSort(string a[], int size, int k){
string *b = NULL; int *c = NULL;
b = new string[size];
c = new int[k];
for (int i = 0; i <k; i++){
c[i] = 0;
//cout << c[i] << "\n";
}
for (int j = 0; j <size; j++){
c[(a[j]/k)%10]++; //a[j] is a string
//cout << c[a[j]] << endl;
}
for (int f = 1; f <10; f++){
c[f] += c[f - 1];
}
for (int r = size - 1; r >= 0; r--){
b[c[(a[r] / k) % 10] - 1] = a[r];
c[(a[r] / k) % 10]--;
}
for (int l = 0; l < size; l++){
a[l] = b[l];
}
}
void radixSort(string b[], int r){
string max = getMax(b, r);
for (int digit = 1; max / digit > 0; digit *= 10){
countSort(b, r, digit);
}
};

I didn't try, but I think you can do radix sort for string.
Calculate the length of the longest string in the array to sort.
Do radix sort just like for integers. Do sorting using each characters in the string.
If a string is shorter than another and there is no character in the "digit", consider its value as -65536 (or a smaller value than any other characters).
UPDATE: I tested my idea and it seems working.
#include <cstdio>
#include <string>
using std::string;
size_t getMax(string arr[], int n){
size_t max = arr[0].size();
for (int i = 1; i < n; i++){
if (arr[i].size()>max)
max = arr[i].size();
}
return max;
}
void countSort(string a[], int size, size_t k){
string *b = NULL; int *c = NULL;
b = new string[size];
c = new int[257];
for (int i = 0; i <257; i++){
c[i] = 0;
//cout << c[i] << "\n";
}
for (int j = 0; j <size; j++){
c[k < a[j].size() ? (int)(unsigned char)a[j][k] + 1 : 0]++; //a[j] is a string
//cout << c[a[j]] << endl;
}
for (int f = 1; f <257; f++){
c[f] += c[f - 1];
}
for (int r = size - 1; r >= 0; r--){
b[c[k < a[r].size() ? (int)(unsigned char)a[r][k] + 1 : 0] - 1] = a[r];
c[k < a[r].size() ? (int)(unsigned char)a[r][k] + 1 : 0]--;
}
for (int l = 0; l < size; l++){
a[l] = b[l];
}
// avold memory leak
delete[] b;
delete[] c;
}
void radixSort(string b[], int r){
size_t max = getMax(b, r);
for (size_t digit = max; digit > 0; digit--){ // size_t is unsigned, so avoid using digit >= 0, which is always true
countSort(b, r, digit - 1);
}
}
int main(void) {
string data[] = {
"aaaba",
"dfjasdlifjai",
"jiifjeogiejogp",
"aabaaaa",
"gsgj",
"gerph",
"aaaaaaa",
"htjltjlrth",
"joasdjfisdjfdo",
"hthe",
"aaaaaba",
"jrykpjl",
"hkoptjltp",
"aaaaaa",
"lprrjt"
};
puts("before sorting:");
for (size_t i = 0; i < sizeof(data) / sizeof(data[0]); i++) {
printf(" %s\n", data[i].c_str());
}
radixSort(data, (int)(sizeof(data) / sizeof(data[0])));
puts("after sorting:");
for (size_t i = 0; i < sizeof(data) / sizeof(data[0]); i++) {
printf(" %s\n", data[i].c_str());
}
return 0;
}

Related

C++ getting segfaults on an array I already assigned values to

I was trying to do a problem on HackerEarth, and I am getting Segmentation Faults for this for loop:
for (int index = 0; index < 18; index++){
cout << arr_list[arr_index][index];
}
Even though I assigned values to arr_list[arr_index][index] in the loop right before (so I'm guessing the values are somehow not being saved, but I don't know how the values aren't being saved).
When I remove this for loop, I don't get any segfaults, and the cout information prints what's expected (the numbers I've inputted, with each digit twice for each cout inside the loop).
#include <iostream>
#include <string>
using namespace std;
void step(int arr_list[1000000][18], int cs, int N){
/**
int freq[100000] = {0};
for (int i = 0; i < N; i++){
int cur_arr[18];
for (int index = 0; index < 18; index++){
cur_arr[index] = arr_list[i][index];
}
if (cs == 4){
freq[cur_arr[0]*100 + cur_arr[1] * 10 + cur_arr[2]] += 1;
} else{
freq[cur_arr[18 - cs*5] * 10000 + cur_arr[18 - cs*5 + 1] * 1000 + cur_arr[18 - cs*5 + 2]*100 + cur_arr[18 - cs*5 + 3] * 10 + cur_arr[18 - cs*5 + 4]] += 1;
}
}
for (int i = 1; i < 100000; i++){
freq[i] += freq[i-1];
}
int new_arr_list[1000000][18];
for (int i = N-1; i >= 0; i--){
int pos;
int cur_arr[18];
for (int index = 0; index < 18; index++){
cur_arr[index] = arr_list[i][index];
}
if (cs == 4){
pos = cur_arr[0]*100 + cur_arr[1] * 10 + cur_arr[2];
} else{
pos = cur_arr[18 - cs*5] * 10000 + cur_arr[18 - cs*5 + 1] * 1000 + cur_arr[18 - cs*5 + 2]*100 + cur_arr[18 - cs*5 + 3] * 10 + cur_arr[18 - cs*5 + 4];
}
for (int index = 0; index < 18; index++){
new_arr_list[freq[pos] - 1][index] = arr_list[i][index];
}
freq[pos] --;
}
for (int i = 0; i < N; i++){
for (int index = 0; index < 18; index++){
arr_list[i][index] = new_arr_list[i][index];
}
}
**/
}
int main()
{
ios_base::sync_with_stdio(false);
cin.tie(NULL);
int T;
int arr_index = 0;
cin >> T;
int arr_list[1000000][18];
int max_len = 0;
for (int testcase = 0; testcase < T; testcase ++){
string a;
cin >> a;
int a_len = a.length();
if (a_len > max_len){
max_len = a_len;
}
int arr_entry[18];
for (int i = 0; i < a_len ; i++){
arr_entry[18 - a_len + i] = a[i] - 48;
}
for (int i = 0; i < 18 - a_len; i++){
arr_entry[i] = 0;
}
for (int index = 0; index < 18; index++){
arr_list[arr_index][index] = arr_entry[index];
cout << arr_entry[index];
cout << arr_list[arr_index][index];
}
for (int index = 0; index < 18; index++){
cout << arr_list[arr_index][index];
}
arr_index ++;
}
/**
for (int c = 1; c < 5; c++){
if (max_len > (c-1)*5){
step(arr_list, c, T);
for (int i = 0; i < T; i++){
int is_leading_zero = 1;
for (int j = 0; j < 18; j++){
if (is_leading_zero == 0){
cout << arr_list[i][j];
}else{
if (arr_list[i][j] != 0){
is_leading_zero = 0;
cout << arr_list[i][j];
}
}
}
cout << " ";
}
cout << "\n";
}
}
**/
}
I'm assuming this is a common error, and that I'm missing something simple that gives me segfaults for values I already assigned data to.
Does anyone know why this is happening?
You are allocating 72 MB on the stack:
int main()
{
[...]
int arr_list[1000000][18];
[...]
}
This is probably causing a stack overflow.
On the Microsoft Windows platform, the maximum stack size is, by default, 1 MB. On Linux, it is typically 8 MB.
When allocating such large amounts of memory, I recommend that you instead either use
dynamic memory allocation, or
a global variable, or
a static local variable.
This ensures that the array is not stored on the stack.

Sort Diagonally Two Dimensional Array

Firstly I created my two dimensional array, then I translated it to one dimensional array and I bubble sorted the 1D array, but after I didn't find the pattern to bring it back to 2D array diagonally sorted.
#include<iostream>
#include<iomanip>
const int r = 10;
const int c = 10;
const int lim = r * c;
int A[r][c] = { 0 };
int B[lim];
using namespace std;
void generatearray(int A[][], int r, int c){
srand(time(NULL));
for (int i = 0; i < r; i++)
{
for (int j = 0; j < c; j++)
{
A[i][j] = rand() % lim;
}
}
}
void transformingto1Darray(int A[r][c], int b[lim]){
int p = 0;
for (int m = 0; m < r; m++){
for (int n = 0; n < c; n++){
B[p] = A[m][n];
p++;
}
}
}
void sorting1Darray(int B[][]){
int temp = 0;
for (int k = 0; k < lim - 1; k++){
for (int i = 0; i < lim - 1; i++)
if (B[i] > B[i + 1]){
temp = B[i];
B[i] = B[i + 1];
B[i + 1] = temp;
}
}
}
void sortingdiagonally2Darray(int A[][], int B[]){
int main{
generatearray(A);
transformingto1Darray(A, B);
sorting1Darray(B);
sortingdiagonally2Darray(A, B);
return 0;
}
It's a bit of a wonky solution but it dose work. Because of the way multidimensional indexing works the value in B[i] will be equal to the value in A[0][i].
In your case you want something like this in your sortingdiagonally2Darray function.
for (int i = 0; i > r * c; i++) {
A[0][i] = B[i];
}
This works because under the hood arrays are just pointers. B[x] is syntactic sugar for *(B + x) and A[0][x] will equate to *(*(A + 0) + x) because it's a pointer to a pointer (hence the double star/double brackets).

mtrix chain multiplication print the sequence of the mattrices

I have written code for matrix chain multiplication in dynamic programming in c++.
there is an error in the recursive call for printing the correct parenthesization of the matrices. I am taking input from text file and giving output on a text file. please help..
#include <iostream>
#include <fstream>
#include <limits.h>
using namespace std;
int * MatrixChainOrder(int p[], int n)
{
static int m[100][100];
static int s[100][100];
int j, q;
int min = INT_MAX;
for (int i = 1; i <= n; i++)
m[i][i] = 0;
for (int L = 2; L <= n; L++) {
for (int i = 1; i <= n - L + 1; i++) {
j = i + L - 1;
m[i][j] = min;
for (int k = i; k <= j - 1; k++) {
q = m[i][k] + m[k + 1][j] + p[i - 1] * p[k] * p[j];
if (q < m[i][j]) {
m[i][j] = q;
s[i][j] = k;
}
}
}
}
return (*s);
}
void Print(int *s, int i, int j)
{
ofstream outfile("output.text");
if (i == j)
{
outfile << "a1";
}
else
outfile << "(";
{
Print(*s, i, s[i][j]);
Print(*s, s[i][j] + 1, j);
outfile << ")";
}
outfile.close();
}
int main()
{
int arr[100];
int num, i = 0;
ifstream infile("input.text");
while (infile)
{
infile >> num;
arr[i] = num;
i++;
}
i = i - 1;
infile.close();
Print(MatrixChainOrder(arr, i - 1), 0, i - 1);
return 0;
}
In C++ it is better to use std::vector for arrays. Aside from that, you can't mix pointers and arrays like that because the compiler loses track of array size.
For example this doesn't work:
int x[10][20];
void foo(int *ptr)
{
//the numbers 10 and 20 have not been passed through
}
But you can change it to
int x[10][20];
void foo(int arr[10][20])
{
//the numbers 10 and 20 are available
}
MatrixChainOrder is supposed to return a number, according to this link
int MatrixChainOrder(int s[100][100], int p[], int n)
{
int m[100][100];
for (int i = 0; i < 100; i++) m[i][i] = 0;
for (int i = 0; i < 100; i++) s[i][i] = 0;
int q = 0;
for (int L = 2; L <= n; L++) {
for (int i = 1; i <= n - L + 1; i++) {
int j = i + L - 1;
m[i][j] = INT_MAX;
for (int k = i; k <= j - 1; k++) {
q = m[i][k] + m[k + 1][j] + p[i - 1] * p[k] * p[j];
if (q < m[i][j]) {
m[i][j] = q;
s[i][j] = k;
}
}
}
}
return q;
}
int main()
{
int arr[] = { 40, 20, 30, 10, 30 };
int array_size = sizeof(arr) / sizeof(int);
int n = array_size - 1;
int s[100][100];
int minimum = MatrixChainOrder(s, arr, n);
printf("{ 40, 20, 30, 10, 30 } should result in 26000 : %d\n", minimum);
return 0;
}
Likewise you can change your Print function
void Print(int s[100][100], int i, int j)
{
if (i < 0 || i >= 100 || j < 0 || j >= 100)
{
cout << "array bound error\n";
}
//safely access s[i][j] ...
}

Find a subarray of m*m (2<=m<n) having largest sum; out of an n*n int array(having +ve, -ve, 0s)

I have written a solution for the above problem but can someone please suggest an optimized way.
I have traversed through the array for count(2 to n) where count is finding subarrays of size count*count.
int n = 5; //Size of array, you may take a dynamic array as well
int a[5][5] = {{1,2,3,4,5},{2,4,7,-2,1},{4,3,9,9,1},{5,2,6,8,0},{5,4,3,2,1}};
int max = 0;
int **tempStore, size;
for(int count = 2; count < n; count++)
{
for(int i = 0; i <= (n-count); i++)
{
for(int j = 0; j <= (n-count); j++)
{
int **temp = new int*[count];
for(int i = 0; i < count; ++i) {
temp[i] = new int[count];
}
for(int k = 0; k < count; k++)
{
for(int l = 0; l <count; l++)
{
temp[k][l] = a[i+k][j+l];
}
}
//printing fetched array
int sum = 0;
for(int k = 0; k < count; k++)
{
for(int l = 0; l <count; l++)
{
sum += temp[k][l];
cout<<temp[k][l]<<" ";
}cout<<endl;
}cout<<"Sum = "<<sum<<endl;
if(sum > max)
{
max = sum;
size = count;
tempStore = new int*[count];
for(int i = 0; i < count; ++i) {
tempStore[i] = new int[count];
}
//Locking the max sum array
for(int k = 0; k < count; k++)
{
for(int l = 0; l <count; l++)
{
tempStore[k][l] = temp[k][l];
}
}
}
//printing finished
cout<<"------------------\n";
//Clear temp memory
for(int i = 0; i < size; ++i) {
delete[] temp[i];
}
delete[] temp;
}
}
}
cout<<"Max sum is = "<<max<<endl;
for(int k = 0; k < size; k++)
{
for(int l = 0; l <size; l++)
{
cout<<tempStore[k][l]<<" ";
}cout<<endl;
}cout<<"-------------------------";
//Clear tempStore memory
for(int i = 0; i < size; ++i) {
delete[] tempStore[i];
}
delete[] tempStore;
Example:
1 2 3 4 5
2 4 7 -2 1
4 3 9 9 1
5 2 6 8 0
5 4 3 2 1
Output:
Max sum is = 71
2 4 7 -2
4 3 9 9
5 2 6 8
5 4 3 2
This is a problem best solved using Dynamic Programming (DP) or memoization.
Assuming n is significantly large, you will find that recalculating the sum of every possible combination of matrix will take too long, therefore if you could reuse previous calculations that would make everything much faster.
The idea is to start with the smaller matrices and calculate sum of the larger one reusing the precalculated value of the smaller ones.
long long *sub_solutions = new long long[n*n*m];
#define at(r,c,i) sub_solutions[((i)*n + (r))*n + (c)]
// Winner:
unsigned int w_row = 0, w_col = 0, w_size = 0;
// Fill first layer:
for ( int row = 0; row < n; row++) {
for (int col = 0; col < n; col++) {
at(r, c, 0) = data[r][c];
if (data[r][c] > data[w_row][w_col]) {
w_row = r;
w_col = c;
}
}
}
// Fill remaining layers.
for ( int size = 1; size < m; size++) {
for ( int row = 0; row < n-size; row++) {
for (int col = 0; col < n-size; col++) {
long long sum = data[row+size][col+size];
for (int i = 0; i < size; i++) {
sum += data[row+size][col+i];
sum += data[row+i][col+size];
}
sum += at(row, col, size-1); // Reuse previous solution.
at(row, col, size) = sum;
if (sum > at(w_row, w_col, w_size)) { // Could optimize this part if you only need the sum.
w_row = row;
w_col = col;
w_size = size;
}
}
}
}
// The largest sum is of the sub_matrix starting a w_row, w_col, and has dimensions w_size+1.
long long largest = at(w_row, w_col, w_size);
delete [] sub_solutions;
This algorithm has complexity: O(n*n*m*m) or more precisely: 0.5*n*(n-1)*m*(m-1). (Now I haven't tested this so please let me know if there are any bugs.)
Try this one (using naive approach, will be easier to get the idea):
#include <iostream>
#include<vector>
using namespace std;
int main( )
{
int n = 5; //Size of array, you may take a dynamic array as well
int a[5][5] =
{{2,1,8,9,0},{2,4,7,-2,1},{5,4,3,2,1},{3,4,9,9,2},{5,2,6,8,0}};
int sum, partsum;
int i, j, k, m;
sum = -999999; // presume minimum part sum
for (i = 0; i < n; i++) {
partsum = 0;
m = sizeof(a[i])/sizeof(int);
for (j = 0; j < m; j++) {
partsum += a[i][j];
}
if (partsum > sum) {
k = i;
sum = partsum;
}
}
// print subarray having largest sum
m = sizeof(a[k])/sizeof(int); // m needs to be recomputed
for (j = 0; j < m - 1; j++) {
cout << a[k][j] << ", ";
}
cout << a[k][m - 1] <<"\nmax part sum = " << sum << endl;
return 0;
}
With a cumulative sum, you may compute partial sum in constant time
std::vector<std::vector<int>>
compute_cumulative(const std::vector<std::vector<int>>& m)
{
std::vector<std::vector<int>> res(m.size() + 1, std::vector<int>(m.size() + 1));
for (std::size_t i = 0; i != m.size(); ++i) {
for (std::size_t j = 0; j != m.size(); ++j) {
res[i + 1][j + 1] = m[i][j] - res[i][j]
+ res[i + 1][j] + res[i][j + 1];
}
}
return res;
}
int compute_partial_sum(const std::vector<std::vector<int>>& cumulative, std::size_t i, std::size_t j, std::size_t size)
{
return cumulative[i][j] + cumulative[i + size][j + size]
- cumulative[i][j + size] - cumulative[i + size][j];
}
live example

Counting Sort confusion. Not sorting array (c++)

I was researching counting sort and decided to try an algorithm i found online. Though, it doesn't seem to actually sort my array.
void countSort2(int arr[], int n, int exp)
{
int *output = new int[n]; // output array
int i, count[10] = {0};
// Store count of occurrences in count[]
for (i = 0; i < n; i++)
count[ (arr[i]/exp)%10 ]++;
// Change count[i] so that count[i] now contains actual position of
// this digit in output[]
for (i = 1; i < 10; i++)
count[i] += count[i - 1];
// Build the output array
for (i = n - 1; i >= 0; i--)
{
output[count[ (arr[i]/exp)%10 ] - 1] = arr[i];
count[ (arr[i]/exp)%10 ]--;
}
// Copy the output array to arr[], so that arr[] now
// contains sorted numbers according to curent digit
for (i = 0; i < n; i++)
arr[i] = output[i];
}
int main()
{
int b[10] = {4,3,2,1,6,7,8,9,7,6};
countSort2(b,10,10);
int i = 0;
while(i<10)
{
cout<<b[i]<<endl;
i++;
}
When the array is printed out, I get: "4,3,2,1,6,7,8,9,7,6". Am I calling the function wrong?
This is how you call the method [1]..
10 is the number of elements...
int main()
{
int b[10] = {14,23,22,11,66,67,58,49,17,16};
countSort2(b,10,1);
countSort2(b,10,10);
int i = 0;
while(i<10)
{
cout<<b[i]<<endl;
i++;
}
return 0;
}
This is a radix sort that sorts an array by a decimal digit. The sort is done from least significant digit to most significant digit. This means a series of calls with exp = 1, 10, 100, 1000, 10000, ... .
Here is an example of a radix sort that sorts an array of 64 bit unsigned integers by the bytes in the integers, from least significant to most significant. In this example, the temporary array is passed as a parameter to RadixSort():
typedef unsigned __int64 UI64;
typedef unsigned __int64 * PUI64;
PUI64 RadixSort(PUI64 pData, PUI64 pTemp, size_t count)
{
size_t mIndex[8][256] = {0}; // index matrix
PUI64 pDst, pSrc, pTmp;
size_t i,j,m,n;
UI64 u;
for(i = 0; i < count; i++){ // generate histograms
u = pData[i];
for(j = 0; j < 8; j++){
mIndex[j][(size_t)(u & 0xff)]++;
u >>= 8;
}
}
for(j = 0; j < 8; j++){ // convert to indices
n = 0;
for(i = 0; i < 256; i++){
m = mIndex[j][i];
mIndex[j][i] = n;
n += m;
}
}
pDst = pTemp; // radix sort
pSrc = pData;
for(j = 0; j < 8; j++){
for(i = 0; i < count; i++){
u = pSrc[i];
m = (size_t)(u >> (j<<3)) & 0xff;
pDst[mIndex[j][m]++] = u;
}
pTmp = pSrc;
pSrc = pDst;
pDst = pTmp;
}
return(pSrc);
}