Access Violation with OCCI next() and setDataBuffer() - c++

unfortunately I can't see the forest for the trees and need help. I call a result set via a SELECT statement, which has 70 columns. I initialize my buffer for all 70 columns which are chars and set them with setDataBuffer. Unfortunately I can only retrieve 15-17 records. After that I get an Access Violation error message. If I try next(1000) it does not work at all. I think it has something to do with the pointers but I don't see the error. Does anyone know what I am doing wrong?
#pragma region Arrays
char*** data_array = new char**[70];
for (unsigned int i = 0; i < 70; ++i)
{
data_array[i] = new char*[1000];
for (unsigned int j = 0; j < 1000; ++j)
{
data_array[i][j] = new char[500];
}
}
ub2** size_array = new ub2 * [70];
for (unsigned int i = 0; i < 70; ++i)
{
size_array[i] = new ub2[1000];
}
sb2** ind_array = new sb2 * [70];
for (unsigned int i = 0; i < 70; ++i)
{
ind_array[i] = new sb2[1000];
}
ub2** rc_array = new ub2 * [70];
for (unsigned int i = 0; i < 70; ++i)
{
rc_array[i] = new ub2[1000];
}
#pragma endregion
#pragma region setDataBuffer
for (unsigned int i = 0; i < 70; ++i)
{
resultSet->setDataBuffer(i + 1, data_array[i][0], OCCI_SQLT_STR, 500, size_array[i], ind_array[i], rc_array[i]);
}
#pragma endregion
try
{
ResultSet::Status resultSetStatus = resultSet->next(25);
if (resultSetStatus == ResultSet::Status::DATA_AVAILABLE)
{
unsigned int rowCount = resultSet->getNumArrayRows();
for (unsigned int row = 0; row < rowCount; ++row)
{
for (unsigned int column = 0; column < 70; ++column)
{
auto value = data_array[column][row];
auto vsize = *size_array[column];
std::string cellContent(value, vsize);
}
}
}
}
catch(SQLException& sqlEx)
{
std::string msg = sqlEx.getMessage();
int i = 0;
}

The problem is your allocation of data_array, because you create it as a jagged array, not a contiguous array of memory as needed by setDataBuffer.
If you want do use dynamic allocation using new[] I suggest something like this instead:
using data_type = int8_t[1000][500];
auto data_array = new data_type[70];
Then each element of data_array will be a contiguous area of memory, 1000 * 500 bytes large.
If you want to know the difference between an array of arrays (as in my solution) and your pointer to pointer (jagged array), see e.g. this old answer of mine.

Related

c++ dynamic memory allocation - matrix multiplication

I am trying to do a large matrix multiplication, e.g. 1000x1000. Unfortunately, it only works for very small matrices. For the big ones, the program just turns on and that's all - no results. Here's the code:
#include <iostream>
using namespace std;
int main() {
int matrix_1_row;
int matrix_1_column;
matrix_1_row = 10;
matrix_1_column = 10;
int** array_1 = new int* [matrix_1_row];
// dynamically allocate memory of size matrix_1_column for each row
for (int i = 0; i < matrix_1_row; i++)
{
array_1[i] = new int[matrix_1_column];
}
// assign values to allocated memory
for (int i = 0; i < matrix_1_row; i++)
{
for (int j = 0; j < matrix_1_column; j++)
{
array_1[i][j] = 3;
}
}
int matrix_2_row;
int matrix_2_column;
matrix_2_row = 10;
matrix_2_column = 10;
// dynamically create array of pointers of size matrix_2_row
int** array_2 = new int* [matrix_2_row];
// dynamically allocate memory of size matrix_2_column for each row
for (int i = 0; i < matrix_2_row; i++)
{
array_2[i] = new int[matrix_2_column];
}
// assign values to allocated memory
for (int i = 0; i < matrix_2_row; i++)
{
for (int j = 0; j < matrix_2_column; j++)
{
array_2[i][j] = 2;
}
}
// Result
int result_row = matrix_1_row;
int result_column = matrix_2_column;
// dynamically create array of pointers of size result_row
int** array_3 = new int* [result_row];
// dynamically allocate memory of size result_column for each row
for (int i = 0; i < result_row; i++)
{
array_3[i] = new int[result_column];
}
// Matrix multiplication
for (int i = 0; i < matrix_1_row; i++)
{
for (int j = 0; j < matrix_2_column; j++)
{
array_3[i][j] = 0;
for (int k = 0; k < matrix_1_column; k++)
{
array_3[i][j] += array_1[i][k] * array_2[k][j];
}
}
}
//RESULTS
for (int i = 0; i < result_row; i++)
{
for (int j = 0; j < result_column; j++)
{
std::cout << array_3[i][j] << "\t";
}
}
// deallocate memory using delete[] operator 1st matrix
for (int i = 0; i < matrix_1_row; i++)
{
delete[] array_1[i];
}
delete[] array_1;
// deallocate memory using delete[] operator 2nd matrix
for (int i = 0; i < matrix_2_row; i++)
{
delete[] array_2[i];
}
delete[] array_2;
// deallocate memory using delete[] operator result
for (int i = 0; i < result_row; i++)
{
delete[] array_3[i];
}
delete[] array_3;
return 0;
}
Anyone have an idea how to fix it? At what point did I go wrong? I used pointers, dynamic memory allocation.
Instead of working with arrays directly named as matrix, try something simple and scalable, then optimize. Something like this:
class matrix
{
private:
// sub-matrices
std::shared_ptr<matrix> c11;
std::shared_ptr<matrix> c12;
std::shared_ptr<matrix> c21;
std::shared_ptr<matrix> c22;
// properties
const int n;
const int depth;
const int maxDepth;
// this should be shared-ptr too. Too lazy.
int data[16]; // lowest level matrix = 4x4 without sub matrix
// multiplication memory
std::shared_ptr<std::vector<matrix>> m;
public:
matrix(const int nP=4,const int depthP=0,const int maxDepthP=1):
n(nP),depth(depthP),maxDepth(maxDepthP)
{
if(depth<maxDepth)
{
// allocate c11,c22,c21,c22
// allocate m1,m2,m3,...m7
}
}
// matrix-matrix multiplication
matrix operator * (const matrix & mat)
{
// allocate result
// multiply
if(depth!=maxDepth)
{
// Strassen's multiplication algorithm
*m[0] = (*c11 + *c22) * (*mat.c11 + *mat.c22);
...
*m[6] = (*c12 - *c22) * (*mat.c21 + *mat.c22);
*c11 = *m[0] + *m[3] - *m[4] + *m[6];
..
*c22 = ..
}
else
{
// innermost submatrices (4x4) multiplied normally
result.data[0] = data[0]*mat.data[0] + ....
...
result.data[15]= ...
}
return result;
}
// matrix-matrix adder
matrix operator + (const matrix & mat)
{
// allocate result
// add
if(depth!=maxDepth)
{
*result.c11 = *c11 + *mat.c11;
*result.c12 = *c12 + *mat.c12;
*result.c21 = *c21 + *mat.c21;
*result.c22 = *c22 + *mat.c22;
}
else
{
// innermost matrix
result.data[0] = ...
}
return result;
}
};
This way, it costs less time-complexity and still looks simple to read. After it works, you can use single-block of matrix array inside of class to optimize for more speed, preferably only allocating once at root matrix and use
std::span
for access from submatrices for newer C++ versions. It is even parallelizable easily as each matrix can distribute its work to at least 4 threads and they can to 16 threads, 64 threads, etc. But of course too many threads are just as bad as too many allocations and should be optimized in a better way.

Dynamically allocated two-dimensional array access issue C++

I am having an issue with accessing 2d array elements.
While creating an object I allocate memory for my array in the constructor.
Once the object is created when I try to access array elements I am getting EXC_BAD_ACCESS.
I checked and when still in constructor I can access array elements.
I don’t know what I am doing wrong.
This is my class where I allocate memory for data array;
class TableData
{
public:
TableData(std::string name, int rows, int columns) : tableName(name), rowCount(rows), columnCount(columns)
{
data = new std::string*[rowCount];
for (int count = 0; count < rowCount; ++count)
data[count] = new std::string[columnCount];
columnID.resize(columnCount);
//Below I did a test where can see that I can access array elements
data[0][0]=“test1111”;
std::string test = data[0][0];
}
~TableData()
{
for (int count = 0; count < rowCount; ++count)
delete[] data[count];
delete[] data;
}
std::string **data;
std::string tableName = "";
const int rowCount;
const int columnCount;
std::vector<std::string> columnID;
};
When I try to fill an array with data I am getting "Thread 1: EXC_BAD_ACCESS (code=1, address=0x0)"
ecuData.tables.push_back(TableData(name, tableRowCount, tableColumnCount));
ecuData.tables[i].data[0][0]=“test2222”; // Thread 1: EXC_BAD_ACCESS (code=1, address=0x0)
“tables” is a vector of TableData objects
Here is a full function:
void Decoder::getTables(std::fstream &fs, std::vector<char> & buffer, SGBDdata &ecuData)
{
const int ptr_offset = 0x84;
int tableOffset = *reinterpret_cast<int32_t*>(&buffer[0] + ptr_offset);
const int tableCountBufferLength = 4;
vector<char> tableCountBuffer;
fs.seekg(tableOffset, fs.beg);
readAndDecryptBytes(fs, tableCountBuffer, tableCountBufferLength);
int tableCount = *reinterpret_cast<int32_t*>(&tableCountBuffer[0] + 0);
for(int i = 0; i < tableCount; ++i) //iterate through tables
{
int tableBufferLength = 0x50;
vector<char> tableBuffer;
readAndDecryptBytes(fs, tableBuffer, tableBufferLength);
string name(tableBuffer.begin(), tableBuffer.begin() + 0x40);
TrimEnd(name);
int tableColumnOffset = *reinterpret_cast<int32_t*>(&tableBuffer[0] + 0x40);
int tableColumnCount = *reinterpret_cast<int32_t*>(&tableBuffer[0] + 0x48);
int tableRowCount = *reinterpret_cast<int32_t*>(&tableBuffer[0] + 0x4C);
//**This is where I am creating new “TableData” object and putting it into vector<TableData>**
ecuData.tables.push_back(TableData(name, tableRowCount, tableColumnCount));
long savedPos = fs.tellg();
fs.seekg(tableColumnOffset, fs.beg);
//Load column names
for(int j = 0; j < tableColumnCount; ++j)
{
int tableItemBufferLength = 1024;
vector<char> tableItemBuffer;
for(int k = 0; k < tableItemBufferLength; ++k)
{
readAndDecryptBytes(fs, tableItemBuffer, 1);
if (tableItemBuffer[k] == 0)
break;
}
ecuData.tables[i].columnID.push_back(string(tableItemBuffer.begin(), tableItemBuffer.end() - 1));
}
for(int j = 0; j < tableRowCount; ++j)
{
for (int k = 0; k < tableColumnCount; ++k)
{
int tableItemBufferLength = 1024;
vector<char> tableItemBuffer;
for (int l = 0; l < tableItemBufferLength; ++l)
{
readAndDecryptBytes(fs, tableItemBuffer, 1);
if (tableItemBuffer[l] == 0)
break;
}
string s(tableItemBuffer.begin(), tableItemBuffer.end() - 1);
ecuData.tables[i].data[j][k] = s; //**This is where I get my fault**
}
}
fs.seekg(savedPos, fs.beg);
}
}
Here is SGBDdata class. The ecuData object of this class contains a vector data
class SGBDdata
{
public:
std::string sgbdPath;
std::string ecuName;
std::vector<JobData> jobs;
std::vector<TableData> tables;
};
lose all the new, delete, and pointer magic. and use a vector of vectors?
// Example program
#include <iostream>
#include <string>
#include <vector>
class TableData
{
public:
TableData(std::string name, int rows, int columns) : tableName(name), rowCount(rows), columnCount(columns)
{
data.resize(columnCount);
for( int i = 0; i < columnCount; i++)
{
data[i].resize(rowCount);
}
columnID.resize(columnCount);
//Below I did a test where can see that I can access array elements
data[0][0]="test1111";
std::string test = data[0][0];
}
~TableData()
{
}
std::vector<std::vector<std::string>> data;
std::string tableName = "";
const int rowCount;
const int columnCount;
std::vector<std::string> columnID;
};
int main()
{
TableData test = TableData( "me", 3,3);
test.data[0][0]="test2222";
std::cout<<test.data[0][0];
}
I have managed to get the code working. As #dratenik mentioned the problem was related to shallow copy which was taking place in default copy constructor. To get it fixed I have added a move constructor to TableData class.
class TableData
{
public:
TableData(std::string name, int rows, int columns) : tableName(name), rowCount(rows), columnCount(columns)
{
data = new std::string*[rowCount];
for (int count = 0; count < rowCount; ++count)
data[count] = new std::string[columnCount];
columnID.resize(columnCount);
}
TableData(TableData&& t) : rowCount(t.rowCount), columnCount(t.columnCount)
{
tableName=t.tableName;
data = new std::string*[rowCount];
for (int count = 0; count < rowCount; ++count)
data[count] = new std::string[columnCount];
for (int r_count = 0; r_count < rowCount; ++r_count)
for (int c_count = 0; c_count < columnCount; ++c_count)
data[r_count][c_count] = t.data[r_count][c_count];
}
~TableData()
{
for (int count = 0; count < rowCount; ++count)
delete[] data[count];
delete[] data;
}
std::string **data;
std::string tableName = "";
const int rowCount;
const int columnCount;
std::vector<std::string> columnID;
};

C++ Delete array inside 2D vector

Unfortunately I have to use arrays in order to use another function I have copied. Changing this function to work with vectors would be way over my head. So I wrote a function declaring me bunch of arrays in heap to be stored inside a vector.
I now have trouble freeing up that memory at the end.
void _get_X_Y_arrays(std::vector<std::vector<float> > *voronoi, std::vector<std::vector<int*> > *rtrn)
{
int numberPolygons = voronoi->size();
for (int i = 0; i < numberPolygons; i++)
{
int *x_heap = new int[((*voronoi)[i].size()) / 2];
int *y_heap = new int[((*voronoi)[i].size()) / 2];
std::vector<int> x(((*voronoi)[i].size()) / 2);
std::vector<int> y(((*voronoi)[i].size()) / 2);
unsigned j = 0;
int count = 0;
for (; j < (*voronoi)[i].size(); j += 2, count++)
{
x[count] = (int)(*voronoi)[i][j];
y[count] = (int)(*voronoi)[i][j + 1];
}
std::copy(x.begin(), x.end(), &x_heap[0]);
std::copy(y.begin(), y.end(), &y_heap[0]);
(*rtrn)[i].push_back(x_heap);
(*rtrn)[i].push_back(y_heap);
}
}
The function works well and everything acts like intended. I wrote another function to free up that memory at the end when it's no longer needed:
void _cleanup(std::vector<std::vector<int*> > *rtrn)
{
for (unsigned i = 0; i < rtrn->size(); i++)
{
for (unsigned j = 0; j < (*rtrn)[i].size(); j++)
{
delete[] rtrn[i][j][0];
delete[] rtrn[i][j][1];
}
}
}
Unfortunately this causes the program to crash. I don't really know where the error is. It feels like there might be an vector out of scope ..?
Just by looking at it and playing with it I'm not able to solve this. What am I doing wrong?
I think you have 3 dimensions array [nbpolygons][2][nbpoints]
Your code :
delete[] rtrn[i][j][0]; // delete rtrn[i][j] index 0
delete[] rtrn[i][j][1]; // delete rtrn[i][j] (the same array) index 1
// => crash
rtrn[i].size() always egal 2
Do :
void _cleanup(std::vector<std::vector<int*> >& rtrn)
{
for (unsigned i = 0; i < rtrn.size(); i++)
{
for (unsigned j = 0; j < rtrn[i].size(); j++)
{
delete[] rtrn[i][j];
}
}
}
or
void _cleanup(std::vector<std::vector<int*> >& rtrn)
{
for (unsigned i = 0; i < rtrn.size(); i++)
{
delete[] rtrn[i][0];
delete[] rtrn[i][1];
}
}

Segmentation Error for Large String in Parameter C++

I'm finalizing a HW assignment to learn C++ coming from Java and had the code working until they tested it with the entire Gettysburg Address, now I get segment faults. I cannot figure out how to solve this problem, I've tried creating a string using the NEW keyword but still can't get things to sizzle. Any help would be greatly appreciated. Below is the function giving me problems.
void PalindromeFinder::truncateToLargestPalindrome(string& inputString){
//std::string *big = new std::string;
//*big=inputString;
int n = inputString.length();
int longestBegin = 0;
int maxLen = 1;
bool table[1000][1000] = {false};
for (int i = 0; i < n; i++) {
table[i][i] = true;
}
for (int i = 0; i < n-1; i++) {
if (inputString[i] == inputString[i+1]) {
table[i][i+1] = true;
longestBegin = i;
maxLen = 2;
}
}
for (int len = 3; len <= n; len++) {
for (int i = 0; i < n-len+1; i++) {
int j = i+len-1;
if (inputString[i] == inputString[j] && table[i+1][j-1]) {
table[i][j] = true;
longestBegin = i;
maxLen = len;
}
}
}
if(largestPalindromeFound.length()<inputString.substr(longestBegin, maxLen).length()){
this->largestPalindromeFound = inputString.substr(longestBegin, maxLen);}
}
If your input string is bigger than 999 then you'll start accessing memory positions outside of your table matrix that's only 1000x1000.
Accessing memory positions outside of your allocated memory can yield segmentation faults and Gettysburg Address is longer than 1000 characters long.

Thrust CUDA allocating char * to device_vector of objects

I am having trouble making a deep copy of a host_vector to device_vector. I think that I am having an issue modifying the value of an element stored in a device_vector. You can find a compilable version at the bottom but the code in question is the following (i put stars on the rows that trigger the segmentation fault):
thrust::host_vector<CharArr> hostToSort(size);
thrust::host_vector<long long> hostToSortRow(size);
for(int i =0; i < size; i++){
CharArr sortRow;
sortRow.value = arrayToSort[i];
sortRow.length = strlen(arrayToSort[i]);
hostToSort[i] = sortRow;
hostToSortRow[i] = arrayToSortRow[i];
}
thrust::device_vector<CharArr> deviceArrayToSort =hostToSort;
//thrust::copy(hostToSort.begin(),hostToSort.end(),deviceArrayToSort.begin());
// = ;// (arrayToSort,arrayToSort + size);
thrust::device_vector<long long> deviceArrayToSortRow = hostToSortRow;//(arrayToSortRow,arrayToSortRow + size);
// thrust::sort(deviceArrayToSort.begin(),deviceArrayToSort.end());
for(int i = 0; i < size; i++){
char * hostString = hostToSort[i].value;
int sizeString = strlen(hostString);
char * deviceString = 0;
CharArr * deviceCharArr = (&deviceArrayToSort[i]).get();
cudaMalloc((void **) deviceString,sizeString);
cudaMemcpy(deviceString,hostString,sizeString,cudaMemcpyHostToDevice);
**** deviceCharArr->length = sizeString;
**** deviceCharArr->value = deviceString;
}
What happens is that when we arrive at the actual assignment
deviceCharArr->value = deviceString
It throws a Segmentation Fault error. I am very new to CUDA and apologize if there is an obvious answer but I have not been able to find many examples of people allocating char * on devices.
Complete Compilable version is here
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <thrust/reduce.h>
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <thrust/sort.h>
#include <thrust/reduce.h>
typedef struct{
char * value;
int length;
} CharArr;
struct CharArrayCmp{
__host__ __device__
bool operator()(const CharArr & o1, const CharArr & o2) {
return this->compare(o1.value,o1.length,o2.value,o2.length);
}
__host__ __device__ bool compare (const char * src, int lenSrc, const char * dst, int lenDest)
{
int end;
if(lenSrc > lenDest){
end = lenDest;
}else{
end = lenSrc;
}
for(int i = 0; i < end; i++){
if(src[i] > dst[i]){
return false;
}else if(src[i] < dst[i]){
return true;
}
}
if(lenSrc >= lenDest){
return false;
}
return true;
}
};
void sortCharArrayHost(char ** arrayToSort, long long * arrayToSortRow,long long size){
std::cout <<"about to start LongIndex" <<std::endl;
thrust::host_vector<CharArr> hostToSort(size);
thrust::host_vector<long long> hostToSortRow(size);
for(int i =0; i < size; i++){
CharArr sortRow;
sortRow.value = arrayToSort[i];
sortRow.length = strlen(arrayToSort[i]);
hostToSort[i] = sortRow;
hostToSortRow[i] = arrayToSortRow[i];
}
/*thrust::device_vector<CharArr> deviceArrayToSort =hostToSort;
//thrust::copy(hostToSort.begin(),hostToSort.end(),deviceArrayToSort.begin());
// = ;// (arrayToSort,arrayToSort + size);
thrust::device_vector<long long> deviceArrayToSortRow = hostToSortRow;//(arrayToSortRow,arrayToSortRow + size);
// thrust::sort(deviceArrayToSort.begin(),deviceArrayToSort.end());
for(int i = 0; i < size; i++){
char * deviceString = 0;
char * hostString = hostToSort[i].value;
int size = strlen(hostString)*sizeof(char);
int cudaStatus;
CharArr * deviceCharArr = (&deviceArrayToSort[i]).get();
cudaStatus = cudaMalloc((void **) deviceString,size);
cudaStatus = cudaMemcpy(deviceString,hostString,size,cudaMemcpyHostToDevice);
(&deviceArrayToSort[i]).get()->value = "";
}
*/
// thrust::sort_by_key(deviceArrayToSort.begin(),deviceArrayToSort.end(),deviceArrayToSortRow.begin(),CharArrayCmp());
thrust::sort_by_key(hostToSort.begin(),hostToSort.end(),hostToSortRow.begin(),CharArrayCmp());
//copy the contents back into our original array to sort now sorted
// hostToSort = deviceArrayToSort;
for(int i =0; i < size; i++){
arrayToSort[i] = hostToSort[i].value;
}
// thrust::copy(deviceArrayToSortRow.begin(),deviceArrayToSortRow.end(),arrayToSortRow);
thrust::copy(hostToSortRow.begin(),hostToSortRow.end(),arrayToSortRow);
}
void sortCharArrayDevice(char ** arrayToSort, long long * arrayToSortRow,long long size){
std::cout <<"about to start LongIndex" <<std::endl;
thrust::host_vector<CharArr> hostToSort(size);
thrust::host_vector<long long> hostToSortRow(size);
for(int i =0; i < size; i++){
CharArr sortRow;
sortRow.value = arrayToSort[i];
sortRow.length = strlen(arrayToSort[i]);
hostToSort[i] = sortRow;
hostToSortRow[i] = arrayToSortRow[i];
}
thrust::device_vector<CharArr> deviceArrayToSort =hostToSort;
//thrust::copy(hostToSort.begin(),hostToSort.end(),deviceArrayToSort.begin());
// = ;// (arrayToSort,arrayToSort + size);
thrust::device_vector<long long> deviceArrayToSortRow = hostToSortRow;//(arrayToSortRow,arrayToSortRow + size);
// thrust::sort(deviceArrayToSort.begin(),deviceArrayToSort.end());
for(int i = 0; i < size; i++){
char * hostString = hostToSort[i].value;
int sizeString = strlen(hostString);
char * deviceString = 0;
CharArr * deviceCharArr = (&deviceArrayToSort[i]).get();
cudaMalloc((void **) deviceString,sizeString);
cudaMemcpy(deviceString,hostString,sizeString,cudaMemcpyHostToDevice);
deviceCharArr->length = sizeString;
deviceCharArr->value = deviceString;
}
thrust::sort_by_key(deviceArrayToSort.begin(),deviceArrayToSort.end(),deviceArrayToSortRow.begin(),CharArrayCmp());
//copy the contents back into our original array to sort now sorted
for(int i =0; i < size; i++){
arrayToSort[i] = (&deviceArrayToSort[i]).get()->value;
}
thrust::copy(deviceArrayToSortRow.begin(),deviceArrayToSortRow.end(),arrayToSortRow);
}
int main()
{
char ** charArr = new char*[10];
charArr[0] = "zyxw";
charArr[1] = "abcd";
charArr[2] = "defg";
charArr[3] = "werd";
charArr[4] = "aasd";
charArr[5] = "zwedew";
charArr[6] = "asde";
charArr[7] = "rurt";
charArr[8] = "ntddwe";
charArr[9] = "erbfde";
long long * rows = new long long[10];
for(int i = 0; i < 10;i++ ){
rows[i] = i;
}
sortCharArrayHost(charArr,rows,10);
for(int i = 0; i < 10; i++){
std::cout<<"Row is "<<rows[i]<<" String is "<<charArr[i]<<std::endl;
}
charArr[0] = "zyxw";
charArr[1] = "abcd";
charArr[2] = "defg";
charArr[3] = "werd";
charArr[4] = "aasd";
charArr[5] = "zwedew";
charArr[6] = "asde";
charArr[7] = "rurt";
charArr[8] = "ntddwe";
charArr[9] = "erbfde";
for(int i = 0; i < 10;i++ ){
rows[i] = i;
}
sortCharArrayDevice(charArr,rows,10);
for(int i = 0; i < 10; i++){
std::cout<<"Row is "<<rows[i]<<" String is "<<charArr[i]<<std::endl;
}
}
As JackOLantern has already pointed out, this is not acceptable:
// this creates an allocation on the device
thrust::device_vector<CharArr> deviceArrayToSort =hostToSort;
// this takes the (device) address an element and assigns it to a pointer variable
CharArr * deviceCharArr = (&deviceArrayToSort[i]).get();
// this then dereferences a device pointer in host code which is illegal
deviceCharArr->length = sizeString;
In CUDA you are not allowed to dereference a device pointer in host code or vice-versa.
It seems you have the following data sets:
The strings to be sorted
An array of string "handles" consisting of CharArr objects each containing pointer to start of string and length
An array of string indices (i.e. 0, 1, 2, ...)
You want to sort 2, and 3 above, based on 1. Thrust "likes" to have everything in one or 2 vectors, if possible. Let's try the following:
concatenate all strings together into a single char vector.
mark the start index of each string in another int vector. The difference in successive start indices will constitute the length of each string. We'll combine the start and the length of each string into a thrust::tuple for use in the comparator, by using a zip_iterator
sort the "tuple array" (i.e. sort index and length at the same time) using the desired comparison functor. Any necessary rearrangement of other data can be accomplished using the reordered index vector.
If you want a re-ordered string index also (i.e. 0, 1, 2, ...) you can create that vector easily enough and add it as a third element to the tuple to be sorted.
Note that the above approach entirely avoids the use of pointers, which as you've seen can be troublesome to manage between host and device copies of the same data.
Here's a fully worked example:
$ cat t439.cu
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <thrust/sort.h>
#include <thrust/copy.h>
#define NUM_STRINGS 10
struct stringCmp{
const char * strings;
stringCmp(char * _strings) : strings(_strings) {}
template<typename myTuple>
__host__ __device__
bool operator()(const myTuple & o1, const myTuple & o2) {
int idxSrc = thrust::get<0>(o1);
int lenSrc = thrust::get<1>(o1);
int idxDst = thrust::get<0>(o2);
int lenDst = thrust::get<1>(o2);
int end;
if(lenSrc > lenDst){
end = lenDst;
}else{
end = lenSrc;
}
for(int i = 0; i < end; i++){
if(strings[idxSrc+i] > strings[idxDst+i]){
return false;
}else if(strings[idxSrc+i] < strings[idxDst+i]){
return true;
}
}
if(lenSrc >= lenDst){
return false;
}
return true;
}
};
void sortCharArrayDevice(char ** arr, int *rows, int num_str){
thrust::host_vector<char> h_strings;
thrust::host_vector<int> h_st_idx(num_str);
thrust::host_vector<int> h_len(num_str);
thrust::host_vector<int> h_rows(num_str);
// concatenate strings
// assume no zero length strings
h_st_idx[0] = 0;
for (int i = 0; i < num_str; i++){
int sidx = 0;
while (arr[i][sidx] != '\0'){
h_strings.push_back(arr[i][sidx]);
sidx++;}
h_len[i] = sidx;
if (i < num_str-1) h_st_idx[i+1] = h_st_idx[i] + sidx;
h_rows[i] = rows[i];
}
// copy data to device
thrust::device_vector<char> d_strings = h_strings;
thrust::device_vector<int> d_st_idx = h_st_idx;
thrust::device_vector<int> d_len = h_len;
thrust::device_vector<int> d_rows = h_rows;
// sort on device
thrust::sort(thrust::make_zip_iterator(thrust::make_tuple(d_st_idx.begin(), d_len.begin(), d_rows.begin())), thrust::make_zip_iterator(thrust::make_tuple(d_st_idx.end(), d_len.end(), d_rows.end())), stringCmp(thrust::raw_pointer_cast(d_strings.data())));
thrust::copy(d_rows.begin(), d_rows.end(), rows);
}
int main()
{
char ** charArr = new char*[NUM_STRINGS];
charArr[0] = "zyxw";
charArr[1] = "abcd";
charArr[2] = "defg";
charArr[3] = "werd";
charArr[4] = "aasd";
charArr[5] = "zwedew";
charArr[6] = "asde";
charArr[7] = "rurt";
charArr[8] = "ntddwe";
charArr[9] = "erbfde";
int * rows = new int[NUM_STRINGS];
for(int i = 0; i < NUM_STRINGS;i++ ){
rows[i] = i;
}
sortCharArrayDevice(charArr,rows,NUM_STRINGS);
for(int i = 0; i < NUM_STRINGS; i++){
std::cout<<"Row is "<<rows[i]<<" String is "<<charArr[rows[i]]<<std::endl;
}
}
$ nvcc -arch=sm_20 -o t439 t439.cu
$ ./t439
Row is 4 String is aasd
Row is 1 String is abcd
Row is 6 String is asde
Row is 2 String is defg
Row is 9 String is erbfde
Row is 8 String is ntddwe
Row is 7 String is rurt
Row is 3 String is werd
Row is 5 String is zwedew
Row is 0 String is zyxw
$