I am having trouble making a deep copy of a host_vector to device_vector. I think that I am having an issue modifying the value of an element stored in a device_vector. You can find a compilable version at the bottom but the code in question is the following (i put stars on the rows that trigger the segmentation fault):
thrust::host_vector<CharArr> hostToSort(size);
thrust::host_vector<long long> hostToSortRow(size);
for(int i =0; i < size; i++){
CharArr sortRow;
sortRow.value = arrayToSort[i];
sortRow.length = strlen(arrayToSort[i]);
hostToSort[i] = sortRow;
hostToSortRow[i] = arrayToSortRow[i];
}
thrust::device_vector<CharArr> deviceArrayToSort =hostToSort;
//thrust::copy(hostToSort.begin(),hostToSort.end(),deviceArrayToSort.begin());
// = ;// (arrayToSort,arrayToSort + size);
thrust::device_vector<long long> deviceArrayToSortRow = hostToSortRow;//(arrayToSortRow,arrayToSortRow + size);
// thrust::sort(deviceArrayToSort.begin(),deviceArrayToSort.end());
for(int i = 0; i < size; i++){
char * hostString = hostToSort[i].value;
int sizeString = strlen(hostString);
char * deviceString = 0;
CharArr * deviceCharArr = (&deviceArrayToSort[i]).get();
cudaMalloc((void **) deviceString,sizeString);
cudaMemcpy(deviceString,hostString,sizeString,cudaMemcpyHostToDevice);
**** deviceCharArr->length = sizeString;
**** deviceCharArr->value = deviceString;
}
What happens is that when we arrive at the actual assignment
deviceCharArr->value = deviceString
It throws a Segmentation Fault error. I am very new to CUDA and apologize if there is an obvious answer but I have not been able to find many examples of people allocating char * on devices.
Complete Compilable version is here
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <thrust/reduce.h>
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <thrust/sort.h>
#include <thrust/reduce.h>
typedef struct{
char * value;
int length;
} CharArr;
struct CharArrayCmp{
__host__ __device__
bool operator()(const CharArr & o1, const CharArr & o2) {
return this->compare(o1.value,o1.length,o2.value,o2.length);
}
__host__ __device__ bool compare (const char * src, int lenSrc, const char * dst, int lenDest)
{
int end;
if(lenSrc > lenDest){
end = lenDest;
}else{
end = lenSrc;
}
for(int i = 0; i < end; i++){
if(src[i] > dst[i]){
return false;
}else if(src[i] < dst[i]){
return true;
}
}
if(lenSrc >= lenDest){
return false;
}
return true;
}
};
void sortCharArrayHost(char ** arrayToSort, long long * arrayToSortRow,long long size){
std::cout <<"about to start LongIndex" <<std::endl;
thrust::host_vector<CharArr> hostToSort(size);
thrust::host_vector<long long> hostToSortRow(size);
for(int i =0; i < size; i++){
CharArr sortRow;
sortRow.value = arrayToSort[i];
sortRow.length = strlen(arrayToSort[i]);
hostToSort[i] = sortRow;
hostToSortRow[i] = arrayToSortRow[i];
}
/*thrust::device_vector<CharArr> deviceArrayToSort =hostToSort;
//thrust::copy(hostToSort.begin(),hostToSort.end(),deviceArrayToSort.begin());
// = ;// (arrayToSort,arrayToSort + size);
thrust::device_vector<long long> deviceArrayToSortRow = hostToSortRow;//(arrayToSortRow,arrayToSortRow + size);
// thrust::sort(deviceArrayToSort.begin(),deviceArrayToSort.end());
for(int i = 0; i < size; i++){
char * deviceString = 0;
char * hostString = hostToSort[i].value;
int size = strlen(hostString)*sizeof(char);
int cudaStatus;
CharArr * deviceCharArr = (&deviceArrayToSort[i]).get();
cudaStatus = cudaMalloc((void **) deviceString,size);
cudaStatus = cudaMemcpy(deviceString,hostString,size,cudaMemcpyHostToDevice);
(&deviceArrayToSort[i]).get()->value = "";
}
*/
// thrust::sort_by_key(deviceArrayToSort.begin(),deviceArrayToSort.end(),deviceArrayToSortRow.begin(),CharArrayCmp());
thrust::sort_by_key(hostToSort.begin(),hostToSort.end(),hostToSortRow.begin(),CharArrayCmp());
//copy the contents back into our original array to sort now sorted
// hostToSort = deviceArrayToSort;
for(int i =0; i < size; i++){
arrayToSort[i] = hostToSort[i].value;
}
// thrust::copy(deviceArrayToSortRow.begin(),deviceArrayToSortRow.end(),arrayToSortRow);
thrust::copy(hostToSortRow.begin(),hostToSortRow.end(),arrayToSortRow);
}
void sortCharArrayDevice(char ** arrayToSort, long long * arrayToSortRow,long long size){
std::cout <<"about to start LongIndex" <<std::endl;
thrust::host_vector<CharArr> hostToSort(size);
thrust::host_vector<long long> hostToSortRow(size);
for(int i =0; i < size; i++){
CharArr sortRow;
sortRow.value = arrayToSort[i];
sortRow.length = strlen(arrayToSort[i]);
hostToSort[i] = sortRow;
hostToSortRow[i] = arrayToSortRow[i];
}
thrust::device_vector<CharArr> deviceArrayToSort =hostToSort;
//thrust::copy(hostToSort.begin(),hostToSort.end(),deviceArrayToSort.begin());
// = ;// (arrayToSort,arrayToSort + size);
thrust::device_vector<long long> deviceArrayToSortRow = hostToSortRow;//(arrayToSortRow,arrayToSortRow + size);
// thrust::sort(deviceArrayToSort.begin(),deviceArrayToSort.end());
for(int i = 0; i < size; i++){
char * hostString = hostToSort[i].value;
int sizeString = strlen(hostString);
char * deviceString = 0;
CharArr * deviceCharArr = (&deviceArrayToSort[i]).get();
cudaMalloc((void **) deviceString,sizeString);
cudaMemcpy(deviceString,hostString,sizeString,cudaMemcpyHostToDevice);
deviceCharArr->length = sizeString;
deviceCharArr->value = deviceString;
}
thrust::sort_by_key(deviceArrayToSort.begin(),deviceArrayToSort.end(),deviceArrayToSortRow.begin(),CharArrayCmp());
//copy the contents back into our original array to sort now sorted
for(int i =0; i < size; i++){
arrayToSort[i] = (&deviceArrayToSort[i]).get()->value;
}
thrust::copy(deviceArrayToSortRow.begin(),deviceArrayToSortRow.end(),arrayToSortRow);
}
int main()
{
char ** charArr = new char*[10];
charArr[0] = "zyxw";
charArr[1] = "abcd";
charArr[2] = "defg";
charArr[3] = "werd";
charArr[4] = "aasd";
charArr[5] = "zwedew";
charArr[6] = "asde";
charArr[7] = "rurt";
charArr[8] = "ntddwe";
charArr[9] = "erbfde";
long long * rows = new long long[10];
for(int i = 0; i < 10;i++ ){
rows[i] = i;
}
sortCharArrayHost(charArr,rows,10);
for(int i = 0; i < 10; i++){
std::cout<<"Row is "<<rows[i]<<" String is "<<charArr[i]<<std::endl;
}
charArr[0] = "zyxw";
charArr[1] = "abcd";
charArr[2] = "defg";
charArr[3] = "werd";
charArr[4] = "aasd";
charArr[5] = "zwedew";
charArr[6] = "asde";
charArr[7] = "rurt";
charArr[8] = "ntddwe";
charArr[9] = "erbfde";
for(int i = 0; i < 10;i++ ){
rows[i] = i;
}
sortCharArrayDevice(charArr,rows,10);
for(int i = 0; i < 10; i++){
std::cout<<"Row is "<<rows[i]<<" String is "<<charArr[i]<<std::endl;
}
}
As JackOLantern has already pointed out, this is not acceptable:
// this creates an allocation on the device
thrust::device_vector<CharArr> deviceArrayToSort =hostToSort;
// this takes the (device) address an element and assigns it to a pointer variable
CharArr * deviceCharArr = (&deviceArrayToSort[i]).get();
// this then dereferences a device pointer in host code which is illegal
deviceCharArr->length = sizeString;
In CUDA you are not allowed to dereference a device pointer in host code or vice-versa.
It seems you have the following data sets:
The strings to be sorted
An array of string "handles" consisting of CharArr objects each containing pointer to start of string and length
An array of string indices (i.e. 0, 1, 2, ...)
You want to sort 2, and 3 above, based on 1. Thrust "likes" to have everything in one or 2 vectors, if possible. Let's try the following:
concatenate all strings together into a single char vector.
mark the start index of each string in another int vector. The difference in successive start indices will constitute the length of each string. We'll combine the start and the length of each string into a thrust::tuple for use in the comparator, by using a zip_iterator
sort the "tuple array" (i.e. sort index and length at the same time) using the desired comparison functor. Any necessary rearrangement of other data can be accomplished using the reordered index vector.
If you want a re-ordered string index also (i.e. 0, 1, 2, ...) you can create that vector easily enough and add it as a third element to the tuple to be sorted.
Note that the above approach entirely avoids the use of pointers, which as you've seen can be troublesome to manage between host and device copies of the same data.
Here's a fully worked example:
$ cat t439.cu
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <thrust/sort.h>
#include <thrust/copy.h>
#define NUM_STRINGS 10
struct stringCmp{
const char * strings;
stringCmp(char * _strings) : strings(_strings) {}
template<typename myTuple>
__host__ __device__
bool operator()(const myTuple & o1, const myTuple & o2) {
int idxSrc = thrust::get<0>(o1);
int lenSrc = thrust::get<1>(o1);
int idxDst = thrust::get<0>(o2);
int lenDst = thrust::get<1>(o2);
int end;
if(lenSrc > lenDst){
end = lenDst;
}else{
end = lenSrc;
}
for(int i = 0; i < end; i++){
if(strings[idxSrc+i] > strings[idxDst+i]){
return false;
}else if(strings[idxSrc+i] < strings[idxDst+i]){
return true;
}
}
if(lenSrc >= lenDst){
return false;
}
return true;
}
};
void sortCharArrayDevice(char ** arr, int *rows, int num_str){
thrust::host_vector<char> h_strings;
thrust::host_vector<int> h_st_idx(num_str);
thrust::host_vector<int> h_len(num_str);
thrust::host_vector<int> h_rows(num_str);
// concatenate strings
// assume no zero length strings
h_st_idx[0] = 0;
for (int i = 0; i < num_str; i++){
int sidx = 0;
while (arr[i][sidx] != '\0'){
h_strings.push_back(arr[i][sidx]);
sidx++;}
h_len[i] = sidx;
if (i < num_str-1) h_st_idx[i+1] = h_st_idx[i] + sidx;
h_rows[i] = rows[i];
}
// copy data to device
thrust::device_vector<char> d_strings = h_strings;
thrust::device_vector<int> d_st_idx = h_st_idx;
thrust::device_vector<int> d_len = h_len;
thrust::device_vector<int> d_rows = h_rows;
// sort on device
thrust::sort(thrust::make_zip_iterator(thrust::make_tuple(d_st_idx.begin(), d_len.begin(), d_rows.begin())), thrust::make_zip_iterator(thrust::make_tuple(d_st_idx.end(), d_len.end(), d_rows.end())), stringCmp(thrust::raw_pointer_cast(d_strings.data())));
thrust::copy(d_rows.begin(), d_rows.end(), rows);
}
int main()
{
char ** charArr = new char*[NUM_STRINGS];
charArr[0] = "zyxw";
charArr[1] = "abcd";
charArr[2] = "defg";
charArr[3] = "werd";
charArr[4] = "aasd";
charArr[5] = "zwedew";
charArr[6] = "asde";
charArr[7] = "rurt";
charArr[8] = "ntddwe";
charArr[9] = "erbfde";
int * rows = new int[NUM_STRINGS];
for(int i = 0; i < NUM_STRINGS;i++ ){
rows[i] = i;
}
sortCharArrayDevice(charArr,rows,NUM_STRINGS);
for(int i = 0; i < NUM_STRINGS; i++){
std::cout<<"Row is "<<rows[i]<<" String is "<<charArr[rows[i]]<<std::endl;
}
}
$ nvcc -arch=sm_20 -o t439 t439.cu
$ ./t439
Row is 4 String is aasd
Row is 1 String is abcd
Row is 6 String is asde
Row is 2 String is defg
Row is 9 String is erbfde
Row is 8 String is ntddwe
Row is 7 String is rurt
Row is 3 String is werd
Row is 5 String is zwedew
Row is 0 String is zyxw
$
Related
I am having an issue with accessing 2d array elements.
While creating an object I allocate memory for my array in the constructor.
Once the object is created when I try to access array elements I am getting EXC_BAD_ACCESS.
I checked and when still in constructor I can access array elements.
I don’t know what I am doing wrong.
This is my class where I allocate memory for data array;
class TableData
{
public:
TableData(std::string name, int rows, int columns) : tableName(name), rowCount(rows), columnCount(columns)
{
data = new std::string*[rowCount];
for (int count = 0; count < rowCount; ++count)
data[count] = new std::string[columnCount];
columnID.resize(columnCount);
//Below I did a test where can see that I can access array elements
data[0][0]=“test1111”;
std::string test = data[0][0];
}
~TableData()
{
for (int count = 0; count < rowCount; ++count)
delete[] data[count];
delete[] data;
}
std::string **data;
std::string tableName = "";
const int rowCount;
const int columnCount;
std::vector<std::string> columnID;
};
When I try to fill an array with data I am getting "Thread 1: EXC_BAD_ACCESS (code=1, address=0x0)"
ecuData.tables.push_back(TableData(name, tableRowCount, tableColumnCount));
ecuData.tables[i].data[0][0]=“test2222”; // Thread 1: EXC_BAD_ACCESS (code=1, address=0x0)
“tables” is a vector of TableData objects
Here is a full function:
void Decoder::getTables(std::fstream &fs, std::vector<char> & buffer, SGBDdata &ecuData)
{
const int ptr_offset = 0x84;
int tableOffset = *reinterpret_cast<int32_t*>(&buffer[0] + ptr_offset);
const int tableCountBufferLength = 4;
vector<char> tableCountBuffer;
fs.seekg(tableOffset, fs.beg);
readAndDecryptBytes(fs, tableCountBuffer, tableCountBufferLength);
int tableCount = *reinterpret_cast<int32_t*>(&tableCountBuffer[0] + 0);
for(int i = 0; i < tableCount; ++i) //iterate through tables
{
int tableBufferLength = 0x50;
vector<char> tableBuffer;
readAndDecryptBytes(fs, tableBuffer, tableBufferLength);
string name(tableBuffer.begin(), tableBuffer.begin() + 0x40);
TrimEnd(name);
int tableColumnOffset = *reinterpret_cast<int32_t*>(&tableBuffer[0] + 0x40);
int tableColumnCount = *reinterpret_cast<int32_t*>(&tableBuffer[0] + 0x48);
int tableRowCount = *reinterpret_cast<int32_t*>(&tableBuffer[0] + 0x4C);
//**This is where I am creating new “TableData” object and putting it into vector<TableData>**
ecuData.tables.push_back(TableData(name, tableRowCount, tableColumnCount));
long savedPos = fs.tellg();
fs.seekg(tableColumnOffset, fs.beg);
//Load column names
for(int j = 0; j < tableColumnCount; ++j)
{
int tableItemBufferLength = 1024;
vector<char> tableItemBuffer;
for(int k = 0; k < tableItemBufferLength; ++k)
{
readAndDecryptBytes(fs, tableItemBuffer, 1);
if (tableItemBuffer[k] == 0)
break;
}
ecuData.tables[i].columnID.push_back(string(tableItemBuffer.begin(), tableItemBuffer.end() - 1));
}
for(int j = 0; j < tableRowCount; ++j)
{
for (int k = 0; k < tableColumnCount; ++k)
{
int tableItemBufferLength = 1024;
vector<char> tableItemBuffer;
for (int l = 0; l < tableItemBufferLength; ++l)
{
readAndDecryptBytes(fs, tableItemBuffer, 1);
if (tableItemBuffer[l] == 0)
break;
}
string s(tableItemBuffer.begin(), tableItemBuffer.end() - 1);
ecuData.tables[i].data[j][k] = s; //**This is where I get my fault**
}
}
fs.seekg(savedPos, fs.beg);
}
}
Here is SGBDdata class. The ecuData object of this class contains a vector data
class SGBDdata
{
public:
std::string sgbdPath;
std::string ecuName;
std::vector<JobData> jobs;
std::vector<TableData> tables;
};
lose all the new, delete, and pointer magic. and use a vector of vectors?
// Example program
#include <iostream>
#include <string>
#include <vector>
class TableData
{
public:
TableData(std::string name, int rows, int columns) : tableName(name), rowCount(rows), columnCount(columns)
{
data.resize(columnCount);
for( int i = 0; i < columnCount; i++)
{
data[i].resize(rowCount);
}
columnID.resize(columnCount);
//Below I did a test where can see that I can access array elements
data[0][0]="test1111";
std::string test = data[0][0];
}
~TableData()
{
}
std::vector<std::vector<std::string>> data;
std::string tableName = "";
const int rowCount;
const int columnCount;
std::vector<std::string> columnID;
};
int main()
{
TableData test = TableData( "me", 3,3);
test.data[0][0]="test2222";
std::cout<<test.data[0][0];
}
I have managed to get the code working. As #dratenik mentioned the problem was related to shallow copy which was taking place in default copy constructor. To get it fixed I have added a move constructor to TableData class.
class TableData
{
public:
TableData(std::string name, int rows, int columns) : tableName(name), rowCount(rows), columnCount(columns)
{
data = new std::string*[rowCount];
for (int count = 0; count < rowCount; ++count)
data[count] = new std::string[columnCount];
columnID.resize(columnCount);
}
TableData(TableData&& t) : rowCount(t.rowCount), columnCount(t.columnCount)
{
tableName=t.tableName;
data = new std::string*[rowCount];
for (int count = 0; count < rowCount; ++count)
data[count] = new std::string[columnCount];
for (int r_count = 0; r_count < rowCount; ++r_count)
for (int c_count = 0; c_count < columnCount; ++c_count)
data[r_count][c_count] = t.data[r_count][c_count];
}
~TableData()
{
for (int count = 0; count < rowCount; ++count)
delete[] data[count];
delete[] data;
}
std::string **data;
std::string tableName = "";
const int rowCount;
const int columnCount;
std::vector<std::string> columnID;
};
unfortunately I can't see the forest for the trees and need help. I call a result set via a SELECT statement, which has 70 columns. I initialize my buffer for all 70 columns which are chars and set them with setDataBuffer. Unfortunately I can only retrieve 15-17 records. After that I get an Access Violation error message. If I try next(1000) it does not work at all. I think it has something to do with the pointers but I don't see the error. Does anyone know what I am doing wrong?
#pragma region Arrays
char*** data_array = new char**[70];
for (unsigned int i = 0; i < 70; ++i)
{
data_array[i] = new char*[1000];
for (unsigned int j = 0; j < 1000; ++j)
{
data_array[i][j] = new char[500];
}
}
ub2** size_array = new ub2 * [70];
for (unsigned int i = 0; i < 70; ++i)
{
size_array[i] = new ub2[1000];
}
sb2** ind_array = new sb2 * [70];
for (unsigned int i = 0; i < 70; ++i)
{
ind_array[i] = new sb2[1000];
}
ub2** rc_array = new ub2 * [70];
for (unsigned int i = 0; i < 70; ++i)
{
rc_array[i] = new ub2[1000];
}
#pragma endregion
#pragma region setDataBuffer
for (unsigned int i = 0; i < 70; ++i)
{
resultSet->setDataBuffer(i + 1, data_array[i][0], OCCI_SQLT_STR, 500, size_array[i], ind_array[i], rc_array[i]);
}
#pragma endregion
try
{
ResultSet::Status resultSetStatus = resultSet->next(25);
if (resultSetStatus == ResultSet::Status::DATA_AVAILABLE)
{
unsigned int rowCount = resultSet->getNumArrayRows();
for (unsigned int row = 0; row < rowCount; ++row)
{
for (unsigned int column = 0; column < 70; ++column)
{
auto value = data_array[column][row];
auto vsize = *size_array[column];
std::string cellContent(value, vsize);
}
}
}
}
catch(SQLException& sqlEx)
{
std::string msg = sqlEx.getMessage();
int i = 0;
}
The problem is your allocation of data_array, because you create it as a jagged array, not a contiguous array of memory as needed by setDataBuffer.
If you want do use dynamic allocation using new[] I suggest something like this instead:
using data_type = int8_t[1000][500];
auto data_array = new data_type[70];
Then each element of data_array will be a contiguous area of memory, 1000 * 500 bytes large.
If you want to know the difference between an array of arrays (as in my solution) and your pointer to pointer (jagged array), see e.g. this old answer of mine.
I am trying to use the suggestion from this post to free up time being spent in _platform_memmove$VARIANT$Haswell. According to a time profiler, this is occurring when I send a pointer to several class instances to a function. I have tried changing the way I declare the class instances, changing what the function takes, etc. but have not been able to resolve this.
The chunk of my code that may help:
Inputs *tables = new Inputs(OutputFolder, DataFolder);
ScreenStrat *strat_burnin = new ScreenStrat(ScreenStrat::NoScreen, ScreenStrat::NoScreen,
tables->ScreenStartAge, tables->ScreenStopAgeHIV,
tables->ScreenStopAge, ScreenStrat::NoVaccine);
calibrate *calib_output = new calibrate ();
StateMachine *Machine = new StateMachine();
for (int i = 0; i < n_sims; i++){
calib_output->saved_output[i] = RunCalibration(calib_output->calib_params[i], *strat_burnin, *tables, *Machine);
}
auto ret_val = *calib_output;
delete strat_burnin;
delete tables;
delete Machine;
delete calib_output;
return(ret_val);
and then the function declaration:
vector<double> RunCalibration(vector<double> calib_params, ScreenStrat &strat_burnin, Inputs &tables, StateMachine &Machine)
EDIT
I addressed the points #Botje suggest and it hasn't fixed the problems. Updated code:
void RunCalibration(calibrate &calib, ScreenStrat &strat_burnin, Inputs &tables, StateMachine &Machine, int i);
unique_ptr<calibrate> RunChain(string RunsFileName, string CurKey, string OutputFolder, string DataFolder);
int main(int argc, char* argv[]) {
string DataFolder;
string OutputFolder;
DataFolder = "../Data/";
OutputFolder = "../Output/";
unsigned int run;
string CurKey;
string RunsFileName(DataFolder);
if(argc == 1){
RunsFileName.append("test.ini");
}
else if(argc > 1){
RunsFileName.append(argv[1]);
}
CIniFile RunsFile(RunsFileName);
if (!RunsFile.ReadFile()) {
cout << "Could not read Runs File: " << RunsFileName << endl;
exit(1);
}
CurKey = RunsFile.GetKeyName (0);
if (RunsFile.GetValue(CurKey, "RunType") == "Calibration"){
int totaliters = RunsFile.GetValueI(CurKey, "Iterations");
int n_sims = RunsFile.GetValueI(CurKey, "Simulations");
vector<future<unique_ptr<calibrate>>> futures;
vector<unique_ptr<calibrate>> modeloutputs;
for (run = 0; run < totaliters; run++){
futures.push_back (async(launch::async, RunChain, RunsFileName, CurKey, OutputFolder, DataFolder));
}
for (int i = 0; i < futures.size(); i++){
modeloutputs.push_back (futures[i].get());
} return(0)}
unique_ptr<calibrate> RunChain(string RunsFileName, string CurKey, string OutputFolder, string DataFolder) {
Inputs *tables = new Inputs(OutputFolder, DataFolder);
tables->loadRFG (RunsFileName, CurKey);
tables->loadVariables ();
int n_sims = tables->Simulations;
int n_params = tables->Multipliers.size();
int n_targs = tables->CalibTargs.size();
ScreenStrat *strat_burnin = new ScreenStrat(ScreenStrat::NoScreen, ScreenStrat::NoScreen,
tables->ScreenStartAge, tables->ScreenStopAgeHIV,
tables->ScreenStopAge, ScreenStrat::NoVaccine);
calibrate *calib_output = new calibrate (n_sims, n_params, n_targs);
calib_output->multipliers_names = tables->MultipliersNames;
calib_output->calib_targs_names = tables->CalibTargsNames;
for (int i = 0; i < n_targs; i ++){
calib_output->calib_targs[i] = tables->CalibTargs[i][0];
calib_output->calib_targs_SD[i] = tables->CalibTargs[i][1];
}
for (int i = 0; i < n_params; i++){
for (int j = 0; j < 3; j++){
calib_output->multipliers[i][j] = tables->Multipliers[i][j];
}
}
StateMachine *Machine = new StateMachine();
for (int i = 0; i < n_sims; i++){
RunCalibration(*calib_output, *strat_burnin, *tables, *Machine, i);
}
unique_ptr<calibrate> ret_val = make_unique<calibrate>(*calib_output);
delete strat_burnin;
delete tables;
delete Machine;
delete calib_output;
return(ret_val);
}
void RunCalibration(calibrate &calib, ScreenStrat &strat_burnin, Inputs &tables, StateMachine &Machine, int i){
Adding in Calibrate definition per request from #botje
#include "calibrate.h"
using namespace std;
calibrate::calibrate(int n_sims, int n_params, int n_targs) {
calib_targs.resize (n_targs);
calib_targs_SD.resize (n_targs);
multipliers.resize(n_params);
for(int i = 0; i < n_params; i++){
multipliers[i].resize(3);
}
calib_params.resize (n_sims);
for (int i = 0; i < calib_params.size(); i++){
calib_params[i].resize (n_params);
}
saved_output.resize (n_sims);
for (int i = 0; i < saved_output.size(); i++){
saved_output[i].resize (n_targs);
}
best_params.resize (n_params);
GOF.clear();
tuned_SD.resize(n_params);
}
calibrate::~calibrate(void) {
}
void calibrate::CalculateGOF(int n_sims) {
GOF.push_back (WeightedDistance (saved_output[n_sims][0], calib_targs[0], calib_targs_SD[0]));
for (int i = 1; i < calib_targs.size(); i ++){
GOF[n_sims] += WeightedDistance (saved_output[n_sims][i], calib_targs[i], calib_targs_SD[i]);
}
if (n_sims == 0){
GOF_min = GOF[0];
best_params = calib_params[0];
} else {
auto it = std::min_element(std::begin(GOF), std::end(GOF));
int index = distance(GOF.begin(), it);
GOF_min_run = GOF[index];
if (GOF_min_run < GOF_min){
GOF_min = GOF_min_run;
best_params = calib_params[index];
}
}
}
std::vector<double> calibrate::loadCalibData(int n_params, int n_sim, int tuning_factor) {
if(n_sim == 0){
random_device rd;
mt19937 gen(rd());
for (int i = 0; i < n_params; i ++ ){
uniform_real_distribution<> dis(multipliers[i][0], multipliers[i][1]);
calib_params[n_sim][i] = dis(gen);
}
} else {
tuned_SD = tuningparam (n_sim, n_params, tuning_factor);
for (int i = 0; i < n_params; i ++ ){
calib_params[n_sim][i] = rnormal_trunc (best_params[i], tuned_SD[i], multipliers[i][1], multipliers[i][0]);
}
}
return(calib_params[n_sim]);
}
double calibrate::WeightedDistance(double data, double mean, double SD) {
double distance = pow((data - mean)/(SD * 2),2);
return distance;
}
double calibrate::rnormal_trunc(double mu, double sigma, double upper, double lower) {
std::default_random_engine generator;
std::normal_distribution<double> distribution(mu, sigma);
double prob = distribution(generator);
while (prob < lower || prob > upper){
prob = distribution(generator);
}
return(prob);
}
vector<double> calibrate::tuningparam(int n_sims, int n_param, int tuning_factor) {
vector<double> newSD;
for (int i = 0; i < n_param; i++){
newSD.push_back (multipliers[i][2]/pow(tuning_factor,n_sims));
}
return newSD;
}
I improved RunCalibration as follows. Note the comments for further improvement opportunities.
using std::make_unique;
using std::unique_ptr;
void RunCalibration(calibrate &calib, ScreenStrat &strat_burnin, Inputs &tables, StateMachine &Machine, int i);
unique_ptr<calibrate> RunChain(string RunsFileName, string CurKey, string OutputFolder, string DataFolder) {
auto tables = make_unique<Inputs>(OutputFolder, DataFolder);
tables->loadRFG (RunsFileName, CurKey);
tables->loadVariables ();
int n_sims = tables->Simulations;
int n_params = tables->Multipliers.size();
int n_targs = tables->CalibTargs.size();
auto strat_burnin = make_unique<ScreenStrat>(
ScreenStrat::NoScreen, ScreenStrat::NoScreen,
tables->ScreenStartAge, tables->ScreenStopAgeHIV,
tables->ScreenStopAge, ScreenStrat::NoVaccine);
auto calib_output = make_unique<calibrate>(n_sims, n_params, n_targs);
// I don't know the type of these fields, but IF you do not modify them in
// `RunCalibration`, consider making them `shared_ptr<vector<...>>`
// both in `calibrate` and in `Inputs` so you can simply copy
// the pointer instead of the full table.
calib_output->multipliers_names = tables->MultipliersNames;
calib_output->calib_targs_names = tables->CalibTargsNames;
// Same applies here. If you do not modify CalibTargs, make `calib_targs` a shared_ptr
// and only copy by pointer.
for (int i = 0; i < n_targs; i ++){
calib_output->calib_targs[i] = tables->CalibTargs[i][0];
calib_output->calib_targs_SD[i] = tables->CalibTargs[i][1];
}
// and again...
for (int i = 0; i < n_params; i++){
for (int j = 0; j < 3; j++){
calib_output->multipliers[i][j] = tables->Multipliers[i][j];
}
}
auto Machine = make_unique<StateMachine>();
for (int i = 0; i < n_sims; i++){
RunCalibration(*calib_output, *strat_burnin, *tables, *Machine, i);
}
// This will return the unique_ptr without copying.
return calib_output;
}
My code crashes the terminal each time I execute it. I have tried it with Geany and Visual C++ for debugging and it also crashes when just going through cmd prompt.
I got rid of all the bugs using the debugger, but it didn't help. I am supposed to be creating a simulator of a cache, and looking for hits and misses using random numbers, here is my code:
#include <iostream>
#include <cmath>
#include <stdlib.h>
#include <ctime>
#include <iomanip>
#include <fstream>
using namespace std;
const int r = 8192;
const int c = 3;
const int r1 = 262144;
int i;
int j;
int k;
int max;
int min;
int numberW0;
int index_max;
int index_min;
int numberX0;
int tag_max;
int tag_min;
int numberY0;
unsigned int data_max;
int data_min;
int numberZ0;
int numberW1;
int index_max1;
int index_min1;
int numberX1;
int tag_max1;
int tag_min1;
int numberY1;
unsigned int data_max1;
int data_min1;
int numberZ1;
int numberW2;
int index_max2;
int index_min2;
int numberX2;
int tag_max2;
int tag_min2;
int numberY2;
unsigned int data_max2;
int data_min2;
int numberZ2;
int numberW3;
int index_max3;
int index_min3;
int numberX3;
int tag_max3;
int tag_min3;
int numberY3;
unsigned int data_max3;
int data_min3;
int numberZ3;
int numberW4;
int index_max4;
int index_min4;
int numberX4;
int tag_max4;
int tag_min4;
int numberY4;
unsigned int data_max4;
int data_min4;
int numberZ4;
double new_valid = 0;
double new_tag = -9999999999;
double new_data = -9999999999;
double tag_value;
int s;
int t;
double block0;
double block1;
double block2;
double block3;
int v;
double levBlock;
fstream armRes;
int main()
{
double way0 [r][c]; //declaration of the arrays
double way1 [r][c];
double way2 [r][c];
double way3 [r][c];
double lev2[r1][c];
for (i = 0; i < r; i++) // initialization of the arrays
{
way0[i][0] = new_valid;
way0[i][1] = new_tag;
way0[i][2] = new_data;
way1[i][0] = new_valid;
way1[i][1] = new_tag;
way1[i][2] = new_data;
way2[i][0] = new_valid;
way2[i][1] = new_tag;
way2[i][2] = new_data;
way3[i][0] = new_valid;
way3[i][1] = new_tag;
way3[i][2] = new_data;
}
for (j = 0; j < r1; j++)
{
lev2[j][0] = new_valid;
lev2[j][1] = new_tag;
lev2[j][2] = new_data;
}
srand (time(0)); // random number generator seed
int max = r;
int min = 0;
int max1 = r1;
int min1 = 0;
int numberW0 = (rand()%max+min+1); // number of entires to populate way0
for (k = 0; k <= numberW0; k++)
{
int index_max = 8192;
int index_min = 0;
numberX0 = rand()%index_max+index_min+1; // random index to use
int tag_max = 32768;
int tag_min = 0;
numberY0 = rand()%tag_max+tag_min+1; // random tag for the index
unsigned int data_max = 429496729; // random data for the index
int data_min = 0;
numberZ0 = rand()%data_max+data_min+1;
way0 [numberX0][0] = 1;
way0 [numberX0][1] = numberY0;
way0 [numberX0][2] = numberZ0;
}
int numberW1 = (rand()%max+min+1); // number of entires to populate way1
for (k = 0; k <= numberW1; k++)
{
int index_max1 = 8192;
int index_min1 = 0;
numberX1 = rand()%index_max1+index_min1+1; // random index to use
int tag_max1 = 32768;
int tag_min1 = 0;
numberY1 = rand()%tag_max1+tag_min1+1; // random tag for the index
unsigned int data_max1 = 429496729; // random data for the index
int data_min1 = 0;
numberZ1 = rand()%data_max1+data_min1+1;
way1 [numberX1][0] = 1;
way1 [numberX1][1] = numberY1;
way1 [numberX1][2] = numberZ1;
}
int numberW2 = (rand()%max+min+1); // number of entires to populate way2
for (k = 0; k <= numberW2; k++)
{
int index_max2 = 8192;
int index_min2 = 0;
numberX2 = rand()%index_max2+index_min2+1; // random index to use
int tag_max2 = 32768;
int tag_min2 = 0;
numberY2 = rand()%tag_max2+tag_min2+1; // random tag for the index
unsigned int data_max2 = 429496729; // random data for the index
int data_min2 = 0;
numberZ2 = rand()%data_max2+data_min2+1;
way2 [numberX2][0] = 1;
way2 [numberX2][1] = numberY2;
way2 [numberX2][2] = numberZ2;
}
int numberW3 = (rand()%max+min+1); // number of entires to populate way3
for (k = 0; k <= numberW3; k++)
{
int index_max3 = 8192;
int index_min3 = 0;
numberX3 = rand()%index_max3+index_min3+1; // random index to use
int tag_max3 = 32768;
int tag_min3 = 0;
numberY3 = rand()%tag_max3+tag_min3+1; // random tag for the index
unsigned int data_max3 = 429496729; // random data for the index
int data_min3 = 0;
numberZ3 = rand()%data_max3+data_min3+1;
way3 [numberX3][0] = 1;
way3 [numberX3][1] = numberY3;
way3 [numberX3][2] = numberZ3;
}
int numberW4 = (rand()%max1+min1+1); // number of entires to populate level 2
for (k = 0; k <= numberW4; k++)
{
int index_max4 = r1;
int index_min4 = 0;
numberX4 = rand()%index_max4+index_min4+1; // random index to use
int tag_max4 = 32768;
int tag_min4 = 0;
numberY4 = rand()%tag_max4+tag_min4+1; // random tag for the index
unsigned int data_max4 = 429496729; // random data for the index
int data_min4 = 0;
numberZ4 = rand()%data_max4+data_min4+1;
lev2 [numberX4][0] = 1;
lev2 [numberX4][1] = numberY4;
lev2 [numberX4][2] = numberZ4;
}
armRes.open("C:\\Users\\Max\\Documents\\CSIT4\\Project\\Practice\\ARMresults.txt", ios::out | ios::app );
for (t = 0; t<= 15000; t++)
{
int tag_value = rand()%32768+0+1;
for (s = 0; s <= r; s++)
{
block0 = way0 [s][1];
block1 = way1 [s][1];
block2 = way2 [s][1];
block3 = way3 [s][1];
if (tag_value == block0)
{
armRes << "L1 Hit (Way 0) -- AT INDEX: " << s << endl;
}
else if (tag_value == block1)
{
armRes << "L1 Hit (Way1) -- AT INDEX: " << s << endl;
}
else if (tag_value == block2)
{
armRes << "L1 Hit (Way2) -- AT INDEX: " << s << endl;
}
else if (tag_value == block3)
{
armRes << "L1 Hit (Way3) -- AT INDEX: " << s << endl;
}
else
{
for (v = 0; v <= r1; v++)
{
double levBlock = lev2 [v][1];
if (tag_value == levBlock)
{
armRes << "L2 Hit -- AT INDEX: " << v << endl;
}
else
{
armRes << "Cache Miss For Tag: " << tag_value << endl;
}
}
}
}
}
armRes.close();
return 0;
}
Any help would be greatly appreciated as I no longer have any academic help
One issue right away: You are more than likely blowing out the stack with the memory taken up by those arrays in main(). Since you're using Visual C++, I can assume the following:
double way0 [r][c]; //declaration of the arrays
double way1 [r][c];
double way2 [r][c];
double way3 [r][c];
double lev2[r1][c];
Given this:
const int r = 8192;
const int c = 3;
const int r1 = 262144;
If the sizeof(double) == 8, then that is over 7,000,000 bytes of stack required to hold those arrays. The default stack is (I believe) 1 megabyte (maybe 2 megabytes, can't recall off the top of my head), but nowhere near close to 7 megabytes.
Therefore you need to use dynamic allocation to create those arrays. The simplest way is to use a std::vector and size accordingly.
#include <vector>
//...
typedef std::vector<double> Double1D;
typedef std::vector<Double1D> Double2D;
Double2D way0(r, Double1D(c));
Double2D way1(r, Double1D(c));
Double2D way2(r, Double1D(c));
Double2D way3(r, Double1D(c));
Double2D lev2(r1, Double1D(c));
This now creates the data away from the stack and instead onto the heap. The rest of the code should be able to compile with no further changes.
Note that I did not check any of the array indices you're using in the rest of your program, so you could still have an array access violation somewhere in the rest of the code. Others have commented that you do have some index boundary issues using rand(), but at least you won't run into stack space issues.
So I have written a heap-sort program in C++ which takes in an array of doubles and the size of the array and then sorts it. The program works however when I attempt to pass it arrays larger then 1000 I get "Bus error: 10" I think this has to do with how memory is being allocated, however I can not seem to find a solution.
#ifndef _HEAPSORT_
#define _HEAPSORT_
void Heapsort(double arrayToSort[], int sizeOfArray);
void Heapsort(double arrayToSort[], int sizeOfArray)
{
// Building Heap:
// ==========================
int halfSize = sizeOfArray-1 / 2;
for(int i = halfSize; i >= 0; i--){
double temp = arrayToSort[i];
int I1 = i, I2 = i+i;
do {
if( I2 < sizeOfArray - 1 && arrayToSort[I2+1] > arrayToSort[I2] ) { I2++; }
if( arrayToSort[I2] > temp ){
arrayToSort[I1] = arrayToSort[I2];
I1 = I2;
I2 = I1+I1;
} else {
I2 = sizeOfArray;
}
} while ( I2 < sizeOfArray );
arrayToSort[I1] = temp;
}
// Sorting Heap:
// =========================
for(int i = sizeOfArray-1; i >= 2; i--){ // i is the number of still competing elements
double temp = arrayToSort[i];
arrayToSort[i] = arrayToSort[0]; // store top of the heap
int I1 = 0, I2 = 1;
do {
if((I2+1) < i && arrayToSort[I2+1] > arrayToSort[I2] ) { I2++; }
if(arrayToSort[I2] > temp ){
arrayToSort[I1] = arrayToSort[I2];
I1 = I2;
I2 = I1+I1;
} else {
I2 = i;
}
} while( I2 < i );
arrayToSort[I1] = temp;
}
double Temp = arrayToSort[1];
arrayToSort[1] = arrayToSort[0];
arrayToSort[0] = Temp;
}
#endif /* _HEAPSORT_ */
Any insight into how I can fix this would be greatly appreciated.
Here is the code where I allocate the memory.
#include <iostream>
#include "heapsort.h"
#include "rmaset.h"
#include "ranmar.h"
#include "common.h"
using namespace std;
int main(void)
{
const int size = 1000;
struct Common block;
rmaset(block);
double array[size];
for(int i = 0; i < size; i++){
array[i] = ranmar(block);
}
Heapsort(array,size);
return 0;
}
This just creates a struct which then gets passed to a function which initializes it and then to another function ranmar which populates it with random numbers. I have checked all other functions thoroughly and am sure that the error is coming from the Heapsort function.
In the following line int halfSize = sizeOfArray-1 / 2; the right side is evaluated as sizeOfArray-(1 / 2). The integer division (1 / 2) results in 0 so it initializes halfSize with the value sizeOfArray. You begin the loop off the end of the array. I think you meant to do (sizeOfArray-1) / 2 instead.