My code crashes the terminal each time I execute it. I have tried it with Geany and Visual C++ for debugging and it also crashes when just going through cmd prompt.
I got rid of all the bugs using the debugger, but it didn't help. I am supposed to be creating a simulator of a cache, and looking for hits and misses using random numbers, here is my code:
#include <iostream>
#include <cmath>
#include <stdlib.h>
#include <ctime>
#include <iomanip>
#include <fstream>
using namespace std;
const int r = 8192;
const int c = 3;
const int r1 = 262144;
int i;
int j;
int k;
int max;
int min;
int numberW0;
int index_max;
int index_min;
int numberX0;
int tag_max;
int tag_min;
int numberY0;
unsigned int data_max;
int data_min;
int numberZ0;
int numberW1;
int index_max1;
int index_min1;
int numberX1;
int tag_max1;
int tag_min1;
int numberY1;
unsigned int data_max1;
int data_min1;
int numberZ1;
int numberW2;
int index_max2;
int index_min2;
int numberX2;
int tag_max2;
int tag_min2;
int numberY2;
unsigned int data_max2;
int data_min2;
int numberZ2;
int numberW3;
int index_max3;
int index_min3;
int numberX3;
int tag_max3;
int tag_min3;
int numberY3;
unsigned int data_max3;
int data_min3;
int numberZ3;
int numberW4;
int index_max4;
int index_min4;
int numberX4;
int tag_max4;
int tag_min4;
int numberY4;
unsigned int data_max4;
int data_min4;
int numberZ4;
double new_valid = 0;
double new_tag = -9999999999;
double new_data = -9999999999;
double tag_value;
int s;
int t;
double block0;
double block1;
double block2;
double block3;
int v;
double levBlock;
fstream armRes;
int main()
{
double way0 [r][c]; //declaration of the arrays
double way1 [r][c];
double way2 [r][c];
double way3 [r][c];
double lev2[r1][c];
for (i = 0; i < r; i++) // initialization of the arrays
{
way0[i][0] = new_valid;
way0[i][1] = new_tag;
way0[i][2] = new_data;
way1[i][0] = new_valid;
way1[i][1] = new_tag;
way1[i][2] = new_data;
way2[i][0] = new_valid;
way2[i][1] = new_tag;
way2[i][2] = new_data;
way3[i][0] = new_valid;
way3[i][1] = new_tag;
way3[i][2] = new_data;
}
for (j = 0; j < r1; j++)
{
lev2[j][0] = new_valid;
lev2[j][1] = new_tag;
lev2[j][2] = new_data;
}
srand (time(0)); // random number generator seed
int max = r;
int min = 0;
int max1 = r1;
int min1 = 0;
int numberW0 = (rand()%max+min+1); // number of entires to populate way0
for (k = 0; k <= numberW0; k++)
{
int index_max = 8192;
int index_min = 0;
numberX0 = rand()%index_max+index_min+1; // random index to use
int tag_max = 32768;
int tag_min = 0;
numberY0 = rand()%tag_max+tag_min+1; // random tag for the index
unsigned int data_max = 429496729; // random data for the index
int data_min = 0;
numberZ0 = rand()%data_max+data_min+1;
way0 [numberX0][0] = 1;
way0 [numberX0][1] = numberY0;
way0 [numberX0][2] = numberZ0;
}
int numberW1 = (rand()%max+min+1); // number of entires to populate way1
for (k = 0; k <= numberW1; k++)
{
int index_max1 = 8192;
int index_min1 = 0;
numberX1 = rand()%index_max1+index_min1+1; // random index to use
int tag_max1 = 32768;
int tag_min1 = 0;
numberY1 = rand()%tag_max1+tag_min1+1; // random tag for the index
unsigned int data_max1 = 429496729; // random data for the index
int data_min1 = 0;
numberZ1 = rand()%data_max1+data_min1+1;
way1 [numberX1][0] = 1;
way1 [numberX1][1] = numberY1;
way1 [numberX1][2] = numberZ1;
}
int numberW2 = (rand()%max+min+1); // number of entires to populate way2
for (k = 0; k <= numberW2; k++)
{
int index_max2 = 8192;
int index_min2 = 0;
numberX2 = rand()%index_max2+index_min2+1; // random index to use
int tag_max2 = 32768;
int tag_min2 = 0;
numberY2 = rand()%tag_max2+tag_min2+1; // random tag for the index
unsigned int data_max2 = 429496729; // random data for the index
int data_min2 = 0;
numberZ2 = rand()%data_max2+data_min2+1;
way2 [numberX2][0] = 1;
way2 [numberX2][1] = numberY2;
way2 [numberX2][2] = numberZ2;
}
int numberW3 = (rand()%max+min+1); // number of entires to populate way3
for (k = 0; k <= numberW3; k++)
{
int index_max3 = 8192;
int index_min3 = 0;
numberX3 = rand()%index_max3+index_min3+1; // random index to use
int tag_max3 = 32768;
int tag_min3 = 0;
numberY3 = rand()%tag_max3+tag_min3+1; // random tag for the index
unsigned int data_max3 = 429496729; // random data for the index
int data_min3 = 0;
numberZ3 = rand()%data_max3+data_min3+1;
way3 [numberX3][0] = 1;
way3 [numberX3][1] = numberY3;
way3 [numberX3][2] = numberZ3;
}
int numberW4 = (rand()%max1+min1+1); // number of entires to populate level 2
for (k = 0; k <= numberW4; k++)
{
int index_max4 = r1;
int index_min4 = 0;
numberX4 = rand()%index_max4+index_min4+1; // random index to use
int tag_max4 = 32768;
int tag_min4 = 0;
numberY4 = rand()%tag_max4+tag_min4+1; // random tag for the index
unsigned int data_max4 = 429496729; // random data for the index
int data_min4 = 0;
numberZ4 = rand()%data_max4+data_min4+1;
lev2 [numberX4][0] = 1;
lev2 [numberX4][1] = numberY4;
lev2 [numberX4][2] = numberZ4;
}
armRes.open("C:\\Users\\Max\\Documents\\CSIT4\\Project\\Practice\\ARMresults.txt", ios::out | ios::app );
for (t = 0; t<= 15000; t++)
{
int tag_value = rand()%32768+0+1;
for (s = 0; s <= r; s++)
{
block0 = way0 [s][1];
block1 = way1 [s][1];
block2 = way2 [s][1];
block3 = way3 [s][1];
if (tag_value == block0)
{
armRes << "L1 Hit (Way 0) -- AT INDEX: " << s << endl;
}
else if (tag_value == block1)
{
armRes << "L1 Hit (Way1) -- AT INDEX: " << s << endl;
}
else if (tag_value == block2)
{
armRes << "L1 Hit (Way2) -- AT INDEX: " << s << endl;
}
else if (tag_value == block3)
{
armRes << "L1 Hit (Way3) -- AT INDEX: " << s << endl;
}
else
{
for (v = 0; v <= r1; v++)
{
double levBlock = lev2 [v][1];
if (tag_value == levBlock)
{
armRes << "L2 Hit -- AT INDEX: " << v << endl;
}
else
{
armRes << "Cache Miss For Tag: " << tag_value << endl;
}
}
}
}
}
armRes.close();
return 0;
}
Any help would be greatly appreciated as I no longer have any academic help
One issue right away: You are more than likely blowing out the stack with the memory taken up by those arrays in main(). Since you're using Visual C++, I can assume the following:
double way0 [r][c]; //declaration of the arrays
double way1 [r][c];
double way2 [r][c];
double way3 [r][c];
double lev2[r1][c];
Given this:
const int r = 8192;
const int c = 3;
const int r1 = 262144;
If the sizeof(double) == 8, then that is over 7,000,000 bytes of stack required to hold those arrays. The default stack is (I believe) 1 megabyte (maybe 2 megabytes, can't recall off the top of my head), but nowhere near close to 7 megabytes.
Therefore you need to use dynamic allocation to create those arrays. The simplest way is to use a std::vector and size accordingly.
#include <vector>
//...
typedef std::vector<double> Double1D;
typedef std::vector<Double1D> Double2D;
Double2D way0(r, Double1D(c));
Double2D way1(r, Double1D(c));
Double2D way2(r, Double1D(c));
Double2D way3(r, Double1D(c));
Double2D lev2(r1, Double1D(c));
This now creates the data away from the stack and instead onto the heap. The rest of the code should be able to compile with no further changes.
Note that I did not check any of the array indices you're using in the rest of your program, so you could still have an array access violation somewhere in the rest of the code. Others have commented that you do have some index boundary issues using rand(), but at least you won't run into stack space issues.
Related
I want to write a method in C++ which creates an array of monotonically increasing values. It has the inputs of int begin, int end, int interval.
In this example; method should return the array of [0,1,2,3,4,5,6,7,8,9,10]. When I print the results it should print out the first two indexes and get 0 and 1. However, when I print it, it gives 0 for the first one and 9829656 for the second one.
When I only print one index it is always correct, but when I print more than one index, every value except for the first printed one gives a different result. I think the other results are related to memory address since I used pointers.
#include <iostream>
using namespace std;
int* getIntervalArray(int begin, int end, int interval){
int len = (end - begin) / interval + 1;
int result[11] = {};
for (int i = 0; i <= len - 1; i++) {
result[i] = begin + interval * i;
}
return result;
}
int main(){
int begin = 0;
int end = 10;
int interval = 1;
int* newResult = getIntervalArray(begin, end, interval);
cout << newResult[0] << endl;
cout << newResult[1] << endl;
return 0;
}
You are returning a pointer to a local variable. You can instead return a std::vector by value as shown below:
#include <iostream>
#include <vector>
//return a vector by value
std::vector<int> getIntervalArray(int begin, int end, int interval){
int len = (end - begin) / interval + 1;
std::vector<int> result(len); //create a vector of size len
for (int i = 0; i <= len - 1; i++) {
result.at(i) = begin + interval * i;
}
return result;
}
int main(){
int begin = 0;
int end = 10;
int interval = 1;
std::vector<int> newResult = getIntervalArray(begin, end, interval);
//print out elements of returned vector
for(int i = 0; i < newResult.size(); ++i)
{
std::cout << newResult.at(i) << std::endl;
}
return 0;
}
The output of the above program can be seen here.
A possible solution dynamically allocating the local array, and returning it via a smart pointer:
#include <array>
#include <iostream>
#include <memory> // make_unique
auto getIntervalArray(int begin, int end, int interval)
{
int len = (end - begin) / interval + 1;
auto result{ std::make_unique<std::array<int, 11>>() };
for (int i = 0; i <= len - 1; i++) {
(*result)[i] = begin + interval * i;
}
return result;
}
int main()
{
int begin = 0;
int end = 10;
int interval = 1;
auto newResult{ getIntervalArray(begin, end, interval) };
std::cout << (*newResult)[0] << std::endl;
std::cout << (*newResult)[1] << std::endl;
std::cout << (*newResult)[2] << std::endl;
return 0;
}
Demo
Set the array variable in your function as static. This is because C++ does not support returning the address of a local variable.
static int result[11];
try this. also add deletion of the newResult
#include <iostream>
using namespace std;
int* getIntervalArray(int begin, int end, int interval){
int len = (end - begin) / interval + 1;
int* result = new int[len];
int lastValue = begin;
for (int i = 0; i <= len - 1; i++) {
result[i] = lastValue;
lastValue += interval;
}
return result;
}
int main(){
int begin = 0;
int end = 10;
int interval = 1;
int* newResult = getIntervalArray(begin, end, interval);
cout << newResult[0] << endl;
cout << newResult[1] << endl;
// add delete here.
return 0;
}
I'm making this program where I have to count the number of swaps and comparisons a quick sort function, and we have to pass the swaps and comps to the function. I'm not too sure how to do this. I have it so it can be done without passing anything to it, as shown below.
#include <iostream>
#include <ctime>
#include <stdlib.h>
#include <math.h>
using namespace std;
struct SwapandComp {
int swaps;
int comps;
};
const long ARRAY_SIZE = 5000;
int totalSwaps = 0;
int totalComps = 0;
int partition(int[], int, int) //add parameters int& swap and int& comp
SwapandComp quickSort(int[], int, int) //also add parameters for int& swap and int& comp
int main() {
SwapandComp qui;
long masterAry[ARRAY_SIZE] = {0};
int quickAry[ARRAY_SIZE] = {0};
int start = 0;
int end = 0;
double difference = 0;
int size = ARRAY_SIZE;
srand(time(NULL));
for (int i = 0; i < ARRAY_SIZE; i++) {
masterAry[i] = rand();
}
for (int a = 0; a < ARRAY_SIZE; a++) {
quickAry[a] = masterAry[a];
}
start = clock();
qui = quickSort(quickAry, 0, ARRAY_SIZE - 1);
end = clock();
difference = end - start;
double f = difference / CLOCKS_PER_SEC;
cout << "Quick: " << f << " " << qui.swaps << " " << qui.comps << endl;
}
This is the main. It's where values are assigned to the array to be sorted by the quickSort function, which will be defined below.
int partition(int numbers[], int i, int k) { //add parameters int& swap and int& comp
int l = 0;
int h = 0;
int midpoint = 0;
int pivot = 0;
int temp = 0;
bool done = false;
// Pick middle element as pivot
midpoint = i + (k - i) / 2;
pivot = numbers[midpoint];
l = i;
h = k;
while (!done) {
// Increment l while numbers[l] < pivot
while (numbers[l] < pivot) {
++l;
totalComps++;
}
// Decrement h while pivot < numbers[h]
while (pivot < numbers[h]) {
--h;
totalComps++;
}
// If there are zero or one elements remaining,
// all numbers are partitioned. Return h
if (l >= h) {
totalComps++;
done = true;
}
else {
// Swap numbers[l] and numbers[h],
// update l and h
temp = numbers[l];
numbers[l] = numbers[h];
numbers[h] = temp;
totalSwaps++;
++l;
--h;
}
}
//cout << totalSwaps << " " << totalComps << endl;
return h;
}
This is the partition function to find where to find the next partition point
SwapandComp quickSort(int numbers[], int i, int k) { //add parameters int& swap and int& comp
SwapandComp quick = { 0 };
//quick.swaps = quick.comps = 0;
int j = 0;
int z = 0;
// Base case: If there are 1 or zero elements to sort,
// partition is already sorted
if (i >= k) {
return quick;
}
// Partition the data within the array. Value j returned
// from partitioning is location of last element in low partition.
j = partition(numbers, i, k);
// Recursively sort low partition (i to j) and
// high partition (j + 1 to k)
quickSort(numbers, i, j);
quickSort(numbers, j + 1, k);
quick.swaps = totalSwaps;
quick.comps = totalComps;
//totalSwaps = 0;
//totalComps = 0;
return quick;
}
And finally, here is the quick sort function where all the swaps and comps will be added together and put into the struct. Again, I'm not too sure how to add in the pass by reference variables for swap and comp. Any help is appreciated! (Also sorry about the code formatting, it got kind of crazy on my screen.)
So I have my program here:
#include <iostream>
#include <string>
#include <pthread.h>
#include <unistd.h>
#include <math.h>
#include <stdlib.h>
using namespace std;
int const size = 3;
struct Arguments{
int array[];
float result1[];
float result2[];
};
//void calc(int arr[], float rarr1[], float rarr2[], int size);
void* calc(void *param);
int main(int argc, char *argv[]){
time_t t;
srand((unsigned) time(&t));
int arr[size][size] = {};
float rarr1[size][size-1] = {};
float rarr2[size][size-1] = {};
for(int x = 0; x < size; x++){
for(int y = 0; y < size; y++){
int number = rand()%10;
arr[x][y] = number;
}
}
for(int x = 0; x < size; x++){
for(int y = 0; y < size; y++){
cout << arr[x][y] << " ";
}
cout << endl;
}
cout << endl;
/////////////////////////////////////////
pthread_t child;
struct Arguments input;
for(int i = 0; i < size; i++){
input.array[i] = arr[0][i];
}
pthread_create(&child, NULL, calc, (void*)&input);
pthread_join(child, NULL);
//calc(&input);
for(int i = 0; i < size-1; i++){
rarr1[0][i] = input.result1[i];
cout << "Test: " << rarr1[0][i] << endl;
}
//////////////////////////////////
return 0;
}
//void calc(int arr[], float rarr1[], float rarr2[], int size){
void* calc(void *param){
struct Arguments *input = (struct Arguments*)param;
int arr1[] = {};
float rarr1[] = {};
float rarr2[] = {};
for(int i = 0; i < size; i++){
arr1[i] = input->array[i];
}
for(int i = 0; i < size; i++){
int a = arr1[i];
int b = arr1[i+1];
int difference = a-b;
if(difference < 0){
difference = difference * -1;
}
float euc = 1 + pow(difference, 2);
euc = sqrt(euc);
rarr1[i] = euc;
}
for(int i = 0; i <size-1; i++){
input->result1[i] = rarr1[i];
}
for(int i = 0; i <size-1; i++){
int a = arr1[i];
int b = arr1[i+1];
int difference = a-b;
if(difference < 0){
difference = difference * -1;
}
float apar = (difference/rarr1[i]);
float result = asin(apar);
result = result*(180/3.14);
rarr2[i] = result;
}
return NULL;
}
The important part that causes the trouble is between ////// lines but I left the rest of the code for the context, since it might be useful.
So I have the function calc(param); that does the important calculation in the program.
It is working just fine as long as I call it myself (by actually including the function call in the code) and the test loop right after it gives the correct results.
However, when I try to use pthread_create(); to create a new thread that will take care of executing that function, the test loop spits out nonsense and some random huge numbers different each time.
It's kinda weird because the code compiles either way, and literally the only thing that I change is these 2 lines.
What am I doing wrong and why the function spits out garbage when started by the Pthread? Is there a way to fix it?
Ok so if anyone's having a similar problem:
Declare the size of arrays no matter what. It turns out that my program didn't work properly because I initialized my result arrays as float result1[]; instead of float result1[size];
I need a way to check that these pins change from HIGH to LOW in right order. (i.e. RED > BROWN > BLUE > GREY > BLACK > YELLOW > WHITE > ORANGE > PURPLE > GREEN) If one of the pins change out of order, it has to add 1 to "errorCount" if error count exceeds 4 then call a function.
Part of code for pins:
const int wire_BROWN = 30;
int wire_BROWN_state = 0;
int wire_BROWN_lastState = 0;
const int wire_RED = 32;
int wire_RED_state = 0;
int wire_RED_lastState = 0;
const int wire_ORANGE = 34;
int wire_ORANGE_state = 0;
int wire_ORANGE_lastState = 0;
const int wire_YELLOW = 36;
int wire_YELLOW_state = 0;
int wire_YELLOW_lastState = 0;
const int wire_GREEN = 38;
int wire_GREEN_state = 0;
int wire_GREEN_lastState = 0;
const int wire_BLUE = 40;
int wire_BLUE_state = 0;
int wire_BLUE_lastState = 0;
const int wire_PURPLE = 42;
int wire_PURPLE_state = 0;
int wire_PURPLE_lastState = 0;
const int wire_GREY = 44;
int wire_GREY_state = 0;
int wire_GREY_lastState = 0;
const int wire_WHITE = 46;
int wire_WHITE_state = 0;
int wire_WHITE_lastState = 0;
const int wire_BLACK = 48;
int wire_BLACK_state = 0;
int wire_BLACK_lastState = 0;
You could put them in an array in the order that you want to check them in, and then loop through the array, running a check
Pseudocode:
byte check(int a[]){ // Get's called on update of pins
byte error = 0;
for (byte i=1; i < a.length - 1; a++);
if (a[i] < a[i-1]){
error++;
}
return(error)
}
byte stateArray[] = {
digitalRead(wire_BROWN),
digitalRead(wire_BLUE),
digitalRead(wire_BLACK),
digitalRead(wire_YELLOW),
digitalRead(wire_WHITE),
digitalRead(wire_ORANGE),
digitalRead(wire_PURPLE),
digitalRead(wire_GREEN),
};
byte errorCount = check(stateArray);
if (errorCount > 4) {
// Error Handling
}
I am having trouble making a deep copy of a host_vector to device_vector. I think that I am having an issue modifying the value of an element stored in a device_vector. You can find a compilable version at the bottom but the code in question is the following (i put stars on the rows that trigger the segmentation fault):
thrust::host_vector<CharArr> hostToSort(size);
thrust::host_vector<long long> hostToSortRow(size);
for(int i =0; i < size; i++){
CharArr sortRow;
sortRow.value = arrayToSort[i];
sortRow.length = strlen(arrayToSort[i]);
hostToSort[i] = sortRow;
hostToSortRow[i] = arrayToSortRow[i];
}
thrust::device_vector<CharArr> deviceArrayToSort =hostToSort;
//thrust::copy(hostToSort.begin(),hostToSort.end(),deviceArrayToSort.begin());
// = ;// (arrayToSort,arrayToSort + size);
thrust::device_vector<long long> deviceArrayToSortRow = hostToSortRow;//(arrayToSortRow,arrayToSortRow + size);
// thrust::sort(deviceArrayToSort.begin(),deviceArrayToSort.end());
for(int i = 0; i < size; i++){
char * hostString = hostToSort[i].value;
int sizeString = strlen(hostString);
char * deviceString = 0;
CharArr * deviceCharArr = (&deviceArrayToSort[i]).get();
cudaMalloc((void **) deviceString,sizeString);
cudaMemcpy(deviceString,hostString,sizeString,cudaMemcpyHostToDevice);
**** deviceCharArr->length = sizeString;
**** deviceCharArr->value = deviceString;
}
What happens is that when we arrive at the actual assignment
deviceCharArr->value = deviceString
It throws a Segmentation Fault error. I am very new to CUDA and apologize if there is an obvious answer but I have not been able to find many examples of people allocating char * on devices.
Complete Compilable version is here
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <thrust/reduce.h>
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <thrust/sort.h>
#include <thrust/reduce.h>
typedef struct{
char * value;
int length;
} CharArr;
struct CharArrayCmp{
__host__ __device__
bool operator()(const CharArr & o1, const CharArr & o2) {
return this->compare(o1.value,o1.length,o2.value,o2.length);
}
__host__ __device__ bool compare (const char * src, int lenSrc, const char * dst, int lenDest)
{
int end;
if(lenSrc > lenDest){
end = lenDest;
}else{
end = lenSrc;
}
for(int i = 0; i < end; i++){
if(src[i] > dst[i]){
return false;
}else if(src[i] < dst[i]){
return true;
}
}
if(lenSrc >= lenDest){
return false;
}
return true;
}
};
void sortCharArrayHost(char ** arrayToSort, long long * arrayToSortRow,long long size){
std::cout <<"about to start LongIndex" <<std::endl;
thrust::host_vector<CharArr> hostToSort(size);
thrust::host_vector<long long> hostToSortRow(size);
for(int i =0; i < size; i++){
CharArr sortRow;
sortRow.value = arrayToSort[i];
sortRow.length = strlen(arrayToSort[i]);
hostToSort[i] = sortRow;
hostToSortRow[i] = arrayToSortRow[i];
}
/*thrust::device_vector<CharArr> deviceArrayToSort =hostToSort;
//thrust::copy(hostToSort.begin(),hostToSort.end(),deviceArrayToSort.begin());
// = ;// (arrayToSort,arrayToSort + size);
thrust::device_vector<long long> deviceArrayToSortRow = hostToSortRow;//(arrayToSortRow,arrayToSortRow + size);
// thrust::sort(deviceArrayToSort.begin(),deviceArrayToSort.end());
for(int i = 0; i < size; i++){
char * deviceString = 0;
char * hostString = hostToSort[i].value;
int size = strlen(hostString)*sizeof(char);
int cudaStatus;
CharArr * deviceCharArr = (&deviceArrayToSort[i]).get();
cudaStatus = cudaMalloc((void **) deviceString,size);
cudaStatus = cudaMemcpy(deviceString,hostString,size,cudaMemcpyHostToDevice);
(&deviceArrayToSort[i]).get()->value = "";
}
*/
// thrust::sort_by_key(deviceArrayToSort.begin(),deviceArrayToSort.end(),deviceArrayToSortRow.begin(),CharArrayCmp());
thrust::sort_by_key(hostToSort.begin(),hostToSort.end(),hostToSortRow.begin(),CharArrayCmp());
//copy the contents back into our original array to sort now sorted
// hostToSort = deviceArrayToSort;
for(int i =0; i < size; i++){
arrayToSort[i] = hostToSort[i].value;
}
// thrust::copy(deviceArrayToSortRow.begin(),deviceArrayToSortRow.end(),arrayToSortRow);
thrust::copy(hostToSortRow.begin(),hostToSortRow.end(),arrayToSortRow);
}
void sortCharArrayDevice(char ** arrayToSort, long long * arrayToSortRow,long long size){
std::cout <<"about to start LongIndex" <<std::endl;
thrust::host_vector<CharArr> hostToSort(size);
thrust::host_vector<long long> hostToSortRow(size);
for(int i =0; i < size; i++){
CharArr sortRow;
sortRow.value = arrayToSort[i];
sortRow.length = strlen(arrayToSort[i]);
hostToSort[i] = sortRow;
hostToSortRow[i] = arrayToSortRow[i];
}
thrust::device_vector<CharArr> deviceArrayToSort =hostToSort;
//thrust::copy(hostToSort.begin(),hostToSort.end(),deviceArrayToSort.begin());
// = ;// (arrayToSort,arrayToSort + size);
thrust::device_vector<long long> deviceArrayToSortRow = hostToSortRow;//(arrayToSortRow,arrayToSortRow + size);
// thrust::sort(deviceArrayToSort.begin(),deviceArrayToSort.end());
for(int i = 0; i < size; i++){
char * hostString = hostToSort[i].value;
int sizeString = strlen(hostString);
char * deviceString = 0;
CharArr * deviceCharArr = (&deviceArrayToSort[i]).get();
cudaMalloc((void **) deviceString,sizeString);
cudaMemcpy(deviceString,hostString,sizeString,cudaMemcpyHostToDevice);
deviceCharArr->length = sizeString;
deviceCharArr->value = deviceString;
}
thrust::sort_by_key(deviceArrayToSort.begin(),deviceArrayToSort.end(),deviceArrayToSortRow.begin(),CharArrayCmp());
//copy the contents back into our original array to sort now sorted
for(int i =0; i < size; i++){
arrayToSort[i] = (&deviceArrayToSort[i]).get()->value;
}
thrust::copy(deviceArrayToSortRow.begin(),deviceArrayToSortRow.end(),arrayToSortRow);
}
int main()
{
char ** charArr = new char*[10];
charArr[0] = "zyxw";
charArr[1] = "abcd";
charArr[2] = "defg";
charArr[3] = "werd";
charArr[4] = "aasd";
charArr[5] = "zwedew";
charArr[6] = "asde";
charArr[7] = "rurt";
charArr[8] = "ntddwe";
charArr[9] = "erbfde";
long long * rows = new long long[10];
for(int i = 0; i < 10;i++ ){
rows[i] = i;
}
sortCharArrayHost(charArr,rows,10);
for(int i = 0; i < 10; i++){
std::cout<<"Row is "<<rows[i]<<" String is "<<charArr[i]<<std::endl;
}
charArr[0] = "zyxw";
charArr[1] = "abcd";
charArr[2] = "defg";
charArr[3] = "werd";
charArr[4] = "aasd";
charArr[5] = "zwedew";
charArr[6] = "asde";
charArr[7] = "rurt";
charArr[8] = "ntddwe";
charArr[9] = "erbfde";
for(int i = 0; i < 10;i++ ){
rows[i] = i;
}
sortCharArrayDevice(charArr,rows,10);
for(int i = 0; i < 10; i++){
std::cout<<"Row is "<<rows[i]<<" String is "<<charArr[i]<<std::endl;
}
}
As JackOLantern has already pointed out, this is not acceptable:
// this creates an allocation on the device
thrust::device_vector<CharArr> deviceArrayToSort =hostToSort;
// this takes the (device) address an element and assigns it to a pointer variable
CharArr * deviceCharArr = (&deviceArrayToSort[i]).get();
// this then dereferences a device pointer in host code which is illegal
deviceCharArr->length = sizeString;
In CUDA you are not allowed to dereference a device pointer in host code or vice-versa.
It seems you have the following data sets:
The strings to be sorted
An array of string "handles" consisting of CharArr objects each containing pointer to start of string and length
An array of string indices (i.e. 0, 1, 2, ...)
You want to sort 2, and 3 above, based on 1. Thrust "likes" to have everything in one or 2 vectors, if possible. Let's try the following:
concatenate all strings together into a single char vector.
mark the start index of each string in another int vector. The difference in successive start indices will constitute the length of each string. We'll combine the start and the length of each string into a thrust::tuple for use in the comparator, by using a zip_iterator
sort the "tuple array" (i.e. sort index and length at the same time) using the desired comparison functor. Any necessary rearrangement of other data can be accomplished using the reordered index vector.
If you want a re-ordered string index also (i.e. 0, 1, 2, ...) you can create that vector easily enough and add it as a third element to the tuple to be sorted.
Note that the above approach entirely avoids the use of pointers, which as you've seen can be troublesome to manage between host and device copies of the same data.
Here's a fully worked example:
$ cat t439.cu
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <thrust/sort.h>
#include <thrust/copy.h>
#define NUM_STRINGS 10
struct stringCmp{
const char * strings;
stringCmp(char * _strings) : strings(_strings) {}
template<typename myTuple>
__host__ __device__
bool operator()(const myTuple & o1, const myTuple & o2) {
int idxSrc = thrust::get<0>(o1);
int lenSrc = thrust::get<1>(o1);
int idxDst = thrust::get<0>(o2);
int lenDst = thrust::get<1>(o2);
int end;
if(lenSrc > lenDst){
end = lenDst;
}else{
end = lenSrc;
}
for(int i = 0; i < end; i++){
if(strings[idxSrc+i] > strings[idxDst+i]){
return false;
}else if(strings[idxSrc+i] < strings[idxDst+i]){
return true;
}
}
if(lenSrc >= lenDst){
return false;
}
return true;
}
};
void sortCharArrayDevice(char ** arr, int *rows, int num_str){
thrust::host_vector<char> h_strings;
thrust::host_vector<int> h_st_idx(num_str);
thrust::host_vector<int> h_len(num_str);
thrust::host_vector<int> h_rows(num_str);
// concatenate strings
// assume no zero length strings
h_st_idx[0] = 0;
for (int i = 0; i < num_str; i++){
int sidx = 0;
while (arr[i][sidx] != '\0'){
h_strings.push_back(arr[i][sidx]);
sidx++;}
h_len[i] = sidx;
if (i < num_str-1) h_st_idx[i+1] = h_st_idx[i] + sidx;
h_rows[i] = rows[i];
}
// copy data to device
thrust::device_vector<char> d_strings = h_strings;
thrust::device_vector<int> d_st_idx = h_st_idx;
thrust::device_vector<int> d_len = h_len;
thrust::device_vector<int> d_rows = h_rows;
// sort on device
thrust::sort(thrust::make_zip_iterator(thrust::make_tuple(d_st_idx.begin(), d_len.begin(), d_rows.begin())), thrust::make_zip_iterator(thrust::make_tuple(d_st_idx.end(), d_len.end(), d_rows.end())), stringCmp(thrust::raw_pointer_cast(d_strings.data())));
thrust::copy(d_rows.begin(), d_rows.end(), rows);
}
int main()
{
char ** charArr = new char*[NUM_STRINGS];
charArr[0] = "zyxw";
charArr[1] = "abcd";
charArr[2] = "defg";
charArr[3] = "werd";
charArr[4] = "aasd";
charArr[5] = "zwedew";
charArr[6] = "asde";
charArr[7] = "rurt";
charArr[8] = "ntddwe";
charArr[9] = "erbfde";
int * rows = new int[NUM_STRINGS];
for(int i = 0; i < NUM_STRINGS;i++ ){
rows[i] = i;
}
sortCharArrayDevice(charArr,rows,NUM_STRINGS);
for(int i = 0; i < NUM_STRINGS; i++){
std::cout<<"Row is "<<rows[i]<<" String is "<<charArr[rows[i]]<<std::endl;
}
}
$ nvcc -arch=sm_20 -o t439 t439.cu
$ ./t439
Row is 4 String is aasd
Row is 1 String is abcd
Row is 6 String is asde
Row is 2 String is defg
Row is 9 String is erbfde
Row is 8 String is ntddwe
Row is 7 String is rurt
Row is 3 String is werd
Row is 5 String is zwedew
Row is 0 String is zyxw
$