So I am testing out some object arrays in C++, and I am trying to delete these objects afterwards, as I am supposed to.
But here's the problem: the deleteInputPattern variable works fine, so I am able to fully delete "inputs" within the CSVFile header class, but its equivalent in the main file, "inputArray", triggers a breakpoint.
What is the problem here? Am I trying to delete non-existent memory? Do any of the pointers need
Code wall below:
InputTest.h:
#pragma once
class InputTest
{
private:
float r;
float g;
float b;
float t;
public:
InputTest();
~InputTest();
InputTest(float r, float g, float b, float t);
void setR(float newT);
float getR();
void setG(float newT);
float getG();
void setB(float newT);
float getB();
void setT(float newT);
float getT();
void print(int count);
};
InputTest.cpp:
#include "InputTest.h"
#include <stdio.h>
InputTest::InputTest()
{
printf("Input constructor\n");
}
InputTest::~InputTest()
{
printf("Input destructor\n");
}
InputTest::InputTest(float r, float g, float b, float t)
{
this->r = r;
this->g = g;
this->b = b;
this->t = t;
}
void InputTest::setR(float newT)
{
r = newT;
}
float InputTest::getR()
{
return r;
}
void InputTest::setG(float newT)
{
g = newT;
}
float InputTest::getG()
{
return g;
}
void InputTest::setB(float newT)
{
b = newT;
}
float InputTest::getB()
{
return b;
}
void InputTest::setT(float newT)
{
t = newT;
}
float InputTest::getT()
{
return t;
}
void InputTest::print(int count)
{
printf("R: %.2f\n", r);
printf("G: %.2f\n", g);
printf("B: %.2f\n", b);
printf("T: %.2f\n", t);
}
Copy.h:
#pragma once
class InputTest;
class Copy
{
private:
int patternCount;
InputTest** inputs;
public:
Copy();
~Copy();
InputTest* getInputPattern(int index);
void addInputPattern(InputTest* in);
void deleteInputPattern();
};
Copy.cpp:
#include "Copy.h"
#include "InputTest.h"
#include <string.h>
#include <stdio.h>
Copy::Copy()
{
printf("CSV File constructor\n");
inputs = NULL;
patternCount = 0;
inputs = new InputTest*[3];
int i;
for (i = 0; i < 3; i++)
{
inputs[i] = new InputTest();
}
}
Copy::~Copy()
{
printf("CSV File destructor\n");
}
InputTest * Copy::getInputPattern(int index)
{
printf("input gotten: %d\n", index);
return inputs[index];
}
void Copy::addInputPattern(InputTest * in)
{
inputs[patternCount] = in;
patternCount++;
printf("input added: %d\n", patternCount);
}
void Copy::deleteInputPattern()
{
int i;
for (i = 0; i < patternCount; i++)
{
delete inputs[i];
}
delete inputs;
inputs = NULL;
}
main.cpp:
#include "Copy.h"
#include "InputTest.h"
#include <string.h>
#include <stdio.h>
int main(int argv, char** argc)
{
bool testResult = false;
Copy *test = NULL;
test = new Copy();
InputTest **inputArray;
inputArray = new InputTest*[3];
int count;
for (count = 0; count < 3; count++)
{
inputArray[count] = new InputTest();
inputArray[count]->setR(0.2f);
inputArray[count]->setG(0.6f);
inputArray[count]->setB(0.8f);
inputArray[count]->setT(0.5f);
test->addInputPattern(inputArray[count]);
inputArray[count] = test->getInputPattern(count);
printf("next\n");
}
for (count = 0; count < 3; count++)
{
printf("round %d\n", count);
printf("R: %f\n", inputArray[count]->getR());
printf("G: %f\n", inputArray[count]->getG());
printf("B: %f\n", inputArray[count]->getB());
printf("T: %f\n", inputArray[count]->getT());
}
test->deleteInputPattern();
for (count = 0; count < 3; count++)
{
delete inputArray[count];
}
delete inputArray;
delete test;
inputArray = NULL;
test = NULL;
return testResult;
}
These seem to be the problematic line:
test->deleteInputPattern();
for (count = 0; count < 3; count++)
{
delete inputArray[count];
}
Since you have already deleted using test->deleteInputPattern(), that memory is freed. Now you are deleting the same objects (to which you are still holding a reference via inputArray) explicitly in main using delete inputArray. But that memory is already deleted in deleteInputPattern and hence you should be getting a memory access error.
You need to free any allocated memory only once. There is no need to do it again in main(). Either call deleteInputPattern or call delete explicitly in main, but not both. I can recommend 2 best practices:
Use smart pointers
The allocating module should delete the memory (this may not be applicable in many situations though such as factories)
Related
I have been attempting to write this program where I am required to utilize dynamically allocated arrays to print out a 2d matrix. I am only to write the cpp files and not allowed to modify anything in the header files.
I keep getting an exception
0 [main] review2_cis17c_objectarray 4018 cygwin_exception::open_stackdumpfile: Dumping stack trace to review2_cis17c_objectarray.exe.stackdump
I am relatively new to learning c++; after contemplating, I think something is wrong in my PlusTab.cpp, where I am trying to assign an allocated address to a constructor-defined array in a class. Can someone please help and let me know here I did wrong in the project? Thank you very much!
AbsRow.h:
class AbsRow {
protected:
int size;
int *rowData;
public:
virtual int getSize()const = 0;
virtual int getData(int)const = 0;
};
AbsTabl.h:
class AbsTabl {
protected:
int szRow;
int szCol;
RowAray **columns;
public:
virtual int getSzRow()const = 0;
virtual int getSzCol()const = 0;
virtual int getData(int,int)const = 0; };
PlusTab.h
class PlusTab:public Table {
public:
PlusTab(unsigned int r,unsigned int c):Table(r,c){};
PlusTab operator+(const PlusTab &);
};
RowAray.h
class RowAray:public AbsRow {
public:
RowAray(unsigned int);
virtual ~RowAray();
int getSize()const{return size;}
int getData(int i)const{
if(i>=0&&i<size)return rowData[i];
else return 0;}
void setData(int,int);
};
Table.h
#include "AbsTabl.h"
class Table:public AbsTabl {
public:
Table(unsigned int,unsigned int);
Table(const Table &);
virtual ~Table();
int getSzRow()const {return szRow;}
int getSzCol()const {return szCol;}
int getData(int,int)const;
void setData(int,int,int);
};
PlusTab.cpp:
#include "PlusTab.h"
PlusTab PlusTab::operator+(const PlusTab &t) {
PlusTab tab(this->getSzRow(), this->getSzCol());
for(int i = 0; i < tab.getSzRow(); i++) {
for (int j = 0; j <tab.getSzCol(); j++) {
(tab.columns[i])->setData(j, this->getData(i,j) + t.getData(i,j));
}
}
return tab;
}
RowAray.cpp:
#include "RowAray.h"
RowAray::RowAray(unsigned int c) {
size = c;
rowData = new int[c];
}
RowAray::~RowAray() {
delete []rowData;
}
void RowAray::setData(int i, int value) {
rowData[i] = value;
}
Table.cpp:
#include "Table.h"
#include <cstdlib>
Table::Table(unsigned int r, unsigned int c) {
szRow = r;
szCol = c;
columns = new RowAray*[r];
for (int i = 0; i < r; i++) {
columns[i] = new RowAray(c);
}
for (int i = 0; i < r; i++) {
for (int j = 0; j < c; j++) {
columns[i]->setData(j, (rand()%90 + 10));
}
}
}
Table::~Table() {
for (int i = 0; i < szRow; i++) {
delete []columns[i];
}
delete []columns;
}
Table::Table(const Table &t) {
szRow = t.szRow;
szCol = t.szCol;
columns = t.columns;
};
int Table::getData(int r ,int c) const {
return columns[r]->getData(c);
};
void Table::setData(int r, int c, int value) {
columns[r]->setData(c,value);
}
and finally my main.cpp, which I am not allowed to modify either.
#include <ctime>
#include <iostream>
#include <iomanip>
using namespace std;
//User Libraries
#include "PlusTab.h"
//Global Constants
//Function Prototype
void prntTab(const Table &);
//Execution Begins Here!
int main(int argc, char** argv) {
//Initialize the random seed
srand(static_cast<unsigned int>(time(0)));
//Declare Variables
int rows=3,cols=4;
//Test out the Tables
PlusTab tab1(rows,cols);
PlusTab tab2(tab1);
PlusTab tab3=tab1+tab2;
// Print the tables
cout<<"Abstracted and Polymorphic Print Table 1 size is [row,col] = ["
<<rows<<","<<cols<<"]";
prntTab(tab1);
cout<<"Copy Constructed Table 2 size is [row,col] = ["
<<rows<<","<<cols<<"]";
prntTab(tab2);
cout<<"Operator Overloaded Table 3 size is [row,col] = ["
<<rows<<","<<cols<<"]";
prntTab(tab3);
//Exit Stage Right
return 0;
}
void prntTab(const Table &a){
cout<<endl;
for(int row=0;row<a.getSzRow();row++){
for(int col=0;col<a.getSzCol();col++){
cout<<setw(4)<<a.getData(row,col);
}
cout<<endl;
}
cout<<endl;
}
I apologize for this massive amount of code. This is my first time posting, will learn to use the website! I appreciate your help:)
This is my code:
#include <cstdlib>
#include <ctime>
#include <stdio.h>
#include <vector>
#define ENTITY(A) entity##A
#define ALM(A) alm##A
struct TEntity{
private:
int sumx;
int sumy;
const char * rep;
int m_ix;
int m_iy;
public:
TEntity(int x, int y, int sum_x, int sum_y, const char * txt);
};
TEntity::TEntity(int x, int y, int sum_x, int sum_y, const char * txt) {
m_ix = x;
m_iy = y;
sumx = sum_x;
sumy = sum_y;
rep = txt;
}
class IAlmacenable {
private:
void * element;
public:
IAlmacenable(void * e);
IAlmacenable();
void * getValue();
};
IAlmacenable::IAlmacenable(void *e) {
element = e;
}
IAlmacenable::IAlmacenable() {
element = nullptr;
}
void * IAlmacenable::getValue() {
return element;
}
class TList {
private:
std::vector<IAlmacenable*> elementos;
int position;
public:
TList();
int Size();
int Push(IAlmacenable* psz);
};
TList::TList() {
elementos = std::vector<IAlmacenable*>();
position = 0;
}
int TList::Size() {
return elementos.size();
}
int TList::Push(IAlmacenable* psz) {
int res = 0;
if (elementos.size() >= elementos.max_size()) {
res = -1;
}
else {
elementos.push_back(psz);
}
return res;
}
int main(){
srand(time(NULL));
TList *list = new TList();
//we can put entities in the list and the rest will be filled up to 5
int size = list->Size();
for(int i = size; i<5;i++){
const char c[] = {(rand() % 2 ? 65 + rand() % 25 : 97 + rand() % 25), '\0'};
TEntity ENTITY(i)(rand() % 10, rand() % 10, rand() % 5, rand() % 5, c);
IAlmacenable ALM(i)(&ENTITY(i));
list->Push(&ALM(i));
size++;
}
//do things like printing their value...
delete list;
return 0;
}
I need to create a new variable everytime it run the "TEntity ENTITY(i)" line,
the problem is that it creates the same variable always, I think it is because it creates the variable entityi and therefore it is overwriting on the same variable, besides it seems that the random it generates is always the same number since all entities have the same values in all its parameters. The c variable create a const char * random variable between a-z, A-Z , I don't put the print code because it is unnecessary, so what can I do? Is there any way to dynamically create variables of entities whose values are random?
EDIT
Here is the new code fixed (the macros have been eliminated since they were not necessary and the necessary code has been included to be able to execute it) but there is still the same problem that they are generated with the same parameters (since they are still the same variable):
#include <cstdlib>
#include <ctime>
#include <stdio.h>
#include <vector>
#include <conio.h>
#include <windows.h>
struct TEntity{
private:
int sumx;
int sumy;
const char * rep;
int m_ix;
int m_iy;
public:
TEntity(int x, int y, int sum_x, int sum_y, const char * txt);
void movimiento();
void pinta();
};
TEntity::TEntity(int x, int y, int sum_x, int sum_y, const char * txt) {
m_ix = x;
m_iy = y;
sumx = sum_x;
sumy = sum_y;
rep = txt;
}
void TEntity::movimiento() {
m_ix += sumx;
m_iy += sumy;
}
void TEntity::pinta() {
gotoxy(static_cast<short int>(m_ix), static_cast<short int>(m_iy));
printf("%s", rep);
}
void gotoxy(short int x, short int y)
{
COORD pos = {x, y};
HANDLE output = GetStdHandle(STD_OUTPUT_HANDLE);
SetConsoleCursorPosition(output, pos);
}
void clear()
{
system("cls");
}
class IAlmacenable {
private:
void * element;
public:
IAlmacenable(void * e);
IAlmacenable();
void * getValue();
};
IAlmacenable::IAlmacenable(void *e) {
element = e;
}
IAlmacenable::IAlmacenable() {
element = nullptr;
}
void * IAlmacenable::getValue() {
return element;
}
class TList {
private:
std::vector<IAlmacenable*> elementos;
int position;
public:
TList();
int Size();
int Push(IAlmacenable* psz);
IAlmacenable* First();
IAlmacenable* Next();
};
TList::TList() {
elementos = std::vector<IAlmacenable*>();
position = 0;
}
int TList::Size() {
return elementos.size();
}
int TList::Push(IAlmacenable* psz) {
int res = 0;
if (elementos.size() >= elementos.max_size()) {
res = -1;
}
else {
elementos.push_back(psz);
}
return res;
}
IAlmacenable* TList::First() {
IAlmacenable* res;
if (elementos.empty()) {
res = nullptr;
}
else {
res = elementos.front();
position = 1;
}
return res;
}
IAlmacenable* TList::Next() {
IAlmacenable* res;
if (elementos.empty()) {
res = nullptr;
}
else {
int pos = position;
int size = elementos.size();
if (pos < size) {
res = elementos.at(position);
position++;
}
else {
res = this->First();
}
}
return res;
}
int main(){
srand(time(NULL));
TList *list = new TList();
//we can put entities in the list and the rest will be filled up to 5
int size = list->Size();
for(int i = size; i<5;i++){
const char c[] = {(rand() % 2 ? 65 + rand() % 25 : 97 + rand() % 25), '\0'};
TEntity *entity = new TEntity(rand() % 10, rand() % 10, rand() % 5, rand() % 5, c);
IAlmacenable *alm = new IAlmacenable(entity);
list->Push(alm);
size++;
}
while(true){
clear();
for (int i = 0; i < size; i++) {
reinterpret_cast<TEntity *>(list->Next()->getValue())->pinta();
reinterpret_cast<TEntity *>(list->Next()->getValue())->movimiento();
}
Sleep(2000);
}
delete list;
return 0;
}
There is some confusion here.
Some points:
The macro is not fit-for-purpose, as you already know; you're just creating a variable name entityi each time;
That doesn't matter! The object only exists for the duration of the loop iteration anyway; C++ doesn't let you create multiple objects with the same name at the same time. In fact you can get rid of the entire macro stuff and just call the object entity;
Now that that's out of the way, you're getting repeated results because you're storing a pointer to each iteration of that local variable — on each occasion, that's a dangling pointer to an object that's been destroyed. Don't store dangling pointers!
You can either:
Dynamically allocate the objects that you're adding to the list, or
Store actual objects rather than pointers-to-objects.
Either way, the local-scope name is irrelevant and certainly need not change repeatedly for each loop iteration.
I am trying to create multiple instances of a static c++ library I wrote, but I can't create multiple instances of it... When I create two instances and write different data to them, I read the same data from both of the instances. Here is my code:
.cpp file:
// MathFuncsLib.cpp
// compile with: cl /c /EHsc MathFuncsLib.cpp
// post-build command: lib MathFuncsLib.obj
/*
DECLARING VECTORS
|3,6,4|
|9,1,5|
|2,0,2|
|5,3,6|
Should be inputted as:
int a[] = {3,6,4,9,1,5,2,0,2,5,3,6} with x = 3 and y = 4
Inputting training vals:
|0.1 (inp1),0.1 (inp2) ,0.1 (targeted output)| depends on the number of inputs and outputs
|9,1,5|
|2,0,2|
|5,3,6|
*/
//#include "stdafx.h"
#include "vector.h"
#include "iostream"
#define DEBUG
#include <stdexcept>
//using namespace std;
double* vectorLib::arrayPtr;
int vectorLib::x;
int vectorLib::y;
vectorLib::vectorLib(int xInp, int yInp) {
vectorLib::arrayPtr = new double[xInp*yInp];
vectorLib::x = xInp;
vectorLib::y = yInp;
//return 0;
}
double vectorLib::sigmoid(double inp) {
return 1 / (1 + exp(-inp));
}
double* vectorLib::getArrayPtr() {
return vectorLib::arrayPtr;
}
double vectorLib::read(int xInp, int yInp) {
#ifdef DEBUG
if (xInp >= vectorLib::x) {
std::cout << "X_OUT_OF_BOUNDS_VECTOR_READ\n";
while (1);
}
if (yInp >= vectorLib::y) {
std::cout << "X_OUT_OF_BOUNDS_VECTOR_READ\n";
while (1);
}
#endif // DEBUG
return *(arrayPtr + xInp + vectorLib::x*yInp);
}
void vectorLib::write(int xInp, int yInp, double data) {
#ifdef DEBUG
if (xInp >= vectorLib::x) {
std::cout << "X_OUT_OF_BOUNDS_VECTOR_WRITE\n";
while (1);
}
if (yInp >= vectorLib::y) {
std::cout << "X_OUT_OF_BOUNDS_VECTOR_WRITE\n";
while (1);
}
#endif // DEBUG
vectorLib::arrayPtr[xInp + vectorLib::x*yInp] = data;
}
void vectorLib::writeArr(double* inpArr) {
int i;
for (i = 0; i < vectorLib::x*vectorLib::y; i++) {
vectorLib::arrayPtr[i] = *(inpArr + i);
}
}
void vectorLib::sigmoidVect() {
int yy;
int xx;
for (yy = 0; yy < vectorLib::y; yy++) {
for (xx = 0; xx < vectorLib::x; xx++) {
write(xx, yy, sigmoid(read(xx, yy)));
}
}
write(0, vectorLib::y - 1, 1);
}
int vectorLib::getX() {
return vectorLib::x;
}
int vectorLib::getY() {
return vectorLib::y;
}
int vectorLib::totalVectSize() {
return vectorLib::x * vectorLib::y;
}
void vectorLib::printVector() {
int yy;
int xx;
for (yy = 0; yy < y; yy++) {
for (xx = 0; xx < x; xx++) {
std::cout << vectorLib::read(xx, yy);
if (xx + 1 != x) {
std::cout << ",";
}
}
std::cout << "\n";
}
}
vectorLib* vectorLib::vectorMult(vectorLib* vect1, vectorLib* vect2) {
#ifdef DEBUG
if (vect1->getX() != vect2->getY()) {
std::cout << "INPUTS_DONT_MATCH_VECTORMULT\n";
while (1);
}
#endif // DEBUG
vectorLib toRet(vect1->getX(), vect2->getY());
int i;
for (i = 0; i < vect2->getX(); i++) {
int p;
for (p = 0; p < vect1->getY(); p++) {
double tempOut = 0;
int q;
for (q = 0; q < vect1->getX(); q++)
{
tempOut += vect1->read(q, p) * vect2->read(i, q);
}
toRet.write(i, p, tempOut);
}
}
return &toRet;
}
.h file:
//#include "stdafx.h"
using namespace std;
class vectorLib
{
//int x, y;
public:
static double* arrayPtr;
static int x;
static int y;
//Constructor takes x and y of the vector
vectorLib(int xInp, int yInp);
//The pointer to the array that holds all the doubles in the vector
static double* getArrayPtr();
//Read the vector at a specified x and y
static double read(int xInp, int yInp);
//Write one double to a specific location
static void write(int xInp, int yInp, double data);
//Write the array inside the vector class
static void writeArr(double* inpArr);
//Takes sigmoid of whole vector
static void sigmoidVect();
//Returns x of vector
static int getX();
//Returns y of vector
static int getY();
//Returns total size of vector
static int totalVectSize();
//Returns a vector pointer to the multiplication result
static vectorLib* vectorMult(vectorLib* vect1, vectorLib* vect2);
//Prints vector
static void printVector();
private:
static double sigmoid(double inp);
};
Main file:
#define DEBUG
#include "stdafx.h"
#include "vector.h"
#include "iostream"
using namespace std;
int main()
{
vectorLib testVectLol(1, 3);
vectorLib testVect(3, 4);
double vectInp[] = { 1,1,1,
1,1,1,
1,1,1,
1,1,1};
double vectInp2[] = { 0.5,0.5,0.5 };
testVect.writeArr(vectInp);
testVectLol.writeArr(vectInp2);
testVect.printVector();// Both print 0.5, 0.5, 0,5
testVectLol.printVector();// Both print 0.5, 0.5, 0,5
while (1);
return 0;
}
Thanks in advance! I've been struggling with this for hours. I would really appreciate any help!
Jasper
I am using Cuda with C++ to do some parallel computing. Recently, I noticed something that I cannot understand and I didn't find informations about it when looking for it. In my code, one line which is very seldom exectued (but needed) slows down the program even when it is not executed at all. Here is some code to make it more clear:
The class I created:
class Foo
{
void myFunction(Foo *listFoo);
//some other functions that I need
...
int myAttribute;
//some other attributes that I need
...
}
The definition of myFunction:
void Foo::myFunction(Foo *listFoo)
{
//do some computations on the listFoo
if( condition seldom verified )
{ myAttribute = myAttribute + 1; }
}
The global function:
__global__ void compute(Foo *listFoo, int numberOfFoo)
{
int i = threadIdx.x + blockIdx.x * blockDim.x;
if( i < numberOfFoo)
{ listFoo[i].myFunction(listFoo); }
}
The host code:
compute<<<(numberOfFoo + 511)/512, 512>>> (listFoo, numberOfFoo)
The line slowing down everything is myAttribute = myAttribute + 1. Even when it is executed 0 times, the code is really slow compared to when the line is put in the comment. I tried to replace this line with a simple printf. The result is the same, the line is never executed but it slows down everything.
If you have any suggestion on the reason and on eventually how to solve this problem, it would be very much appreciated. My level in programing is not so advanced, so please use relatively easy explanations.
Thanks a lot
First Edit: few people requested the code, so here it is! I reduced it to 700 lines, I know it is still very long but not much would work if I keep removing some parts of it. It compiles without problems for me. All you have to do is press enter, wait few seconds and the time taken will be shown in the command window.
It is in the function findContactwithGrain() that the problem occurs. The line addContact(grainContact) is slowing down everything. On my computer, if this line is active, one computation takes around 3.5 sec. If I put it in comment, it takes 0.07 sec. That's a huge difference for one line that is never executed.
Hope this helps to understand the problem
#include <cuda.h>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <fstream> // to read and write files
#include <stdio.h>
#include <iostream>
#include <time.h>
#include <string>
#include <sstream>
#define n 200
using namespace std;
int global_totalNumberBlock = 0;
int global_totalNumberGrain = 0;
//tell the compiler that those classes exist
class Vec3d2;
class Block;
class Grain;
class Contact;
class Analysis;
class Vec3d2
{
public:
__host__ __device__ Vec3d2(void);
__host__ __device__ Vec3d2(double x_value, double y_value, double z_value);
__host__ __device__ ~Vec3d2(void);
__host__ __device__ double dot(Vec3d2 a) const;
__host__ __device__ Vec3d2 cross(Vec3d2 a) const;
__host__ __device__ double norm() const;
__host__ __device__ void normalize();
// to be able to use cout easily
__host__ __device__ friend ostream & operator <<(ostream &s,const Vec3d2 &vec)
{
s << vec.x << endl;
s << vec.y << endl;
s << vec.z << endl;
return s;
}
//to be able to use brackets
__host__ __device__ double operator [](int i) const
{
if( i == 0)
{
return x;
}
else if( i == 1)
{
return y;
}
else if( i == 2)
{
return z;
}
else
{
cout << "ERROR IN USING VEC3D2" << endl;
system("PAUSE");
}
}
__host__ __device__ double & operator [](int i)
{
if( i == 0)
{
return x;
}
else if( i == 1)
{
return y;
}
else if( i == 2)
{
return z;
}
else
{
cout << "ERROR IN USING VEC3D2" << endl;
system("PAUSE");
}
}
//attributes
double x, y, z;
};
//Class Vec3d2 functions and operators
Vec3d2::Vec3d2()
{
x = 0;
y = 0;
z = 0;
}
Vec3d2::Vec3d2(double x_value, double y_value, double z_value)
{
x = x_value;
y = y_value;
z = z_value;
}
Vec3d2::~Vec3d2()
{
}
double Vec3d2::dot(Vec3d2 a) const
{
return x*a.x + y*a.y + z*a.z;
}
Vec3d2 Vec3d2::cross(Vec3d2 a) const
{
Vec3d2 result( y*a.z - z*a.y, x*a.z - z*a.x, x*a.y - y*a.x);
return result;
}
double Vec3d2::norm() const
{
return sqrt((double) x*x + y*y + z*z);
}
void Vec3d2::normalize()
{
double norm = this->norm();
if (norm > 0)
{
x = x/norm;
y = y/norm;
z = z/norm;
}
else //the vector has a null norm so nothing to do
{
}
}
__host__ __device__ Vec3d2 operator+(Vec3d2 const& a, Vec3d2 const& b)
{
return Vec3d2(a.x + b.x, a.y + b.y, a.z + b.z);
}
__host__ __device__ Vec3d2 operator-(Vec3d2 const& a, Vec3d2 const& b)
{
return Vec3d2(a.x - b.x, a.y - b.y, a.z - b.z);
}
__host__ __device__ Vec3d2 operator*(Vec3d2 const& a, double const& b)
{
return Vec3d2(b*a.x, b*a.y, b*a.z);
}
__host__ __device__ Vec3d2 operator*(double const& b, Vec3d2 const& a)
{
return Vec3d2(b*a.x, b*a.y, b*a.z);
}
__host__ __device__ Vec3d2 operator/(Vec3d2 const& a, double const& b)
{
return Vec3d2(a.x/b, a.y/b, a.z/b);
}
__host__ __device__ Vec3d2 operator/(double const& b, Vec3d2 const& a)
{
return Vec3d2(a.x/b, a.y/b, a.z/b);
}
__host__ __device__ bool operator==(Vec3d2 const& a, Vec3d2 const& b)
{
if(a.x == b.x && a.y == b.y && a.z == b.z)
{
return true;
}
else
{
return false;
}
}
__host__ __device__ bool operator!=(Vec3d2 const& a, Vec3d2 const& b)
{
if( a.x != b.x || a.y != b.y || a.z != b.z)
{
return true;
}
else
{
return false;
}
}
class Contact
{
public:
__host__ __device__ Contact(void);
//__host__ __device__ Contact(Contact const& ContactToCopy);
__host__ __device__ ~Contact(void);
__host__ __device__ void setContact(Grain &grain1, Grain &grain2, double overlap_value);
};
class Block
{
public:
__host__ Block(void);
__host__ Block(Block const& BlockToCopy);
__host__ __device__ ~Block(void);
__host__ __device__ Contact* getContactList() const;
__host__ __device__ Contact** getContactListPtr();
__host__ __device__ int getMaxNumberContact() const;
__host__ __device__ int getNumberContact() const;
__host__ __device__ void setContactList(Contact *ptr);
__host__ __device__ void addContact(Contact contact_value);
__host__ __device__ void clearContactList();// empty the contactList
__host__ __device__ void deleteBlockData(); //clear the memory taken by the contactList
__host__ __device__ Block& operator=(Block const& BlockToCopy);
protected:
int Id; //unique Id number for each entity double mass;
int totalNumberBlock; //same value for each block, cannot use static attribute because of cuda
Contact *contactList;
int numberContact, old_numberContact; //because there is no way to find it from the pointer contactList
int maxNumberContact; //maximum number of contact per block, we have to choose this
};
class Grain: public Block
{
public:
__host__ Grain(void);
__host__ Grain(Grain const& grainToCopy);
__host__ Grain(Vec3d2 position_value, double radius_value, double mass_value);
__host__ __device__ ~Grain(void);
__host__ __device__ Vec3d2 getPositionVec() const;
__host__ __device__ Vec3d2* getPosition() const;
__host__ __device__ Vec3d2** getPositionPtr();
__host__ __device__ int getTotalNumberGrain() const;
__host__ void setTotalNumberGrain();
__host__ __device__ void setTotalNumberGrain(int number);
__host__ __device__ void setPosition(Vec3d2 *ptr);
__host__ __device__ void setPositionVec(Vec3d2 position_value);
__host__ __device__ void deleteGrainData();
__host__ __device__ void findContactwithGrain(Grain *grainList);
__host__ __device__ Grain& operator=(Grain const& grainToCopy);
__host__ __device__ friend ostream & operator <<(ostream &s,const Grain &grain)
{
s <<"position is" << endl;
s << *grain.position << endl;
s <<"grain number is" << endl;
s << grain.number << endl;
s <<"radius is" << endl;
s << grain.radius << endl;
s <<"mass is" << endl;
return s;
}
private:
Vec3d2 *position;
int totalNumberGrain;
int number; //different from Id defined in class Block because a wall could have the same number as a grain
double radius;
};
class Analysis
{
public:
Analysis(void);
Analysis(Grain *grainList);
~Analysis(void);
Grain* getGrainList();
void copyToDevice();
void copyToHost();
void runAnalysis();
private:
//should contain grainList, wallList and their equivalent for the device
//should contain an array of pointers for each attribute being a pointer in grain and wall and their equivalent in the device
int totalNumberGrain, totalNumberWall;
Grain *grainList, *d_grainList;
//for grain data
Contact **grain_contactList, **d_grain_contactList;
Vec3d2 **grain_position, **d_grain_position;
};
//class Contact functions
Contact::Contact(void)
{
}
Contact::~Contact(void)
{
}
void Contact::setContact(Grain &grain1, Grain &grain2, double overlap_value)//we are in grain1 and contact with grain2
{
}
//class Block functions
Block::Block(void)
{
Id = global_totalNumberBlock;
numberContact = 0;
old_numberContact = 0;
//contact list settings
maxNumberContact = 30;
contactList = new Contact[maxNumberContact];
//increment of block number
global_totalNumberBlock = global_totalNumberBlock + 1;
}
Block::~Block(void)
{
delete[] contactList;
//cout << "CAREFUL, YOU ARE DESTROYING A BLOCK" << endl;//because we should never erase a block
//system("PAUSE");
totalNumberBlock = totalNumberBlock - 1;
}
Block::Block(Block const& BlockToCopy)
{
Id = BlockToCopy.Id;
numberContact = BlockToCopy.numberContact;
old_numberContact = BlockToCopy.old_numberContact;
maxNumberContact = BlockToCopy.maxNumberContact;
contactList = new Contact[maxNumberContact];
for(int i =0; i <numberContact; i++)
{
contactList[i] = BlockToCopy.contactList[i];
}
}
Contact* Block::getContactList() const
{
return contactList;
}
Contact** Block::getContactListPtr()
{
return &contactList;
}
int Block::getMaxNumberContact() const
{
return maxNumberContact;
}
int Block::getNumberContact() const
{
return numberContact;
}
void Block::setContactList(Contact *ptr)
{
//no "delete contactList" here because this is executed after cuda. The contactList is pointing to nothing and deleteing it will cause an error
contactList = ptr;
}
void Block::addContact(Contact contact_value)
{
if(numberContact < maxNumberContact)
{
contactList[numberContact] = contact_value;
numberContact = numberContact + 1;
}
else //find a way to throw an error because the list is too small for all the contacts
{
printf("TOO MANY CONTACTS ON ONE GRAIN");
}
}
void Block::clearContactList()
{
//delete[] contactList;
//contactList = new Contact[maxNumberContact];
if(numberContact > 0)
{
numberContact = 0;
}
}
void Block::deleteBlockData()
{
delete[] contactList;
}
__host__ __device__ Block& Block::operator=(Block const& BlockToCopy)
{
if(this != &BlockToCopy) //to check we are not doing a = a
{
Id = BlockToCopy.Id;
numberContact = BlockToCopy.numberContact;
old_numberContact = BlockToCopy.old_numberContact;
maxNumberContact = BlockToCopy.maxNumberContact;
delete[] contactList;
contactList = new Contact[maxNumberContact];
for(int i =0; i <numberContact; i++)
{
contactList[i] = BlockToCopy.contactList[i];
}
}
return *this;
}
//class Grain functions
Grain::Grain(void)
{
number = global_totalNumberGrain;
global_totalNumberGrain = global_totalNumberGrain + 1;
totalNumberGrain = -1;//safety
//initialize Vec3d2
position = new Vec3d2;
}
Grain::Grain(Grain const& grainToCopy)
{
cout <<"COPY CONSTRUCTOR OF GRAIN IS NOT DONE YET"<<endl;
system("PAUSE");
//totalNumberGrain = grainToCopy.totalNumberGrain;
//radius = grainToCopy.radius;
//diameter = grainToCopy.diameter;
//volume = grainToCopy.volume;
//inertia = grainToCopy.inertia;
//position = new Vec3d2;
//old_position = new Vec3d2;
//old_velocity = new Vec3d2;
//old_acceleration = new Vec3d2;
//old_angularVelocity = new Vec3d2;
//old_angularAcceleration = new Vec3d2;
//gravityForce = new Vec3d2;
//*position = *grainToCopy.position;
//*old_position = *grainToCopy.old_position;
//*old_velocity = *grainToCopy.old_velocity;
//*old_acceleration = *grainToCopy.old_acceleration;
//*old_angularVelocity = *grainToCopy.old_angularVelocity;
//*old_angularAcceleration = *grainToCopy.old_angularAcceleration;
//*gravityForce = *grainToCopy.gravityForce;
}
Grain::Grain(Vec3d2 position_value, double radius_value,double mass_value)//, number(totalNumberGrain)
{
number = global_totalNumberGrain;
global_totalNumberGrain = global_totalNumberGrain + 1;
totalNumberGrain = -1;//safety
radius = radius_value;
//initialize all the Vec3d2 parameters
position = new Vec3d2;
*position = position_value;
}
Grain::~Grain(void)
{
//cout << "CAREFUL, YOU ARE DESTROYING A GRAIN" << endl;//because we should never erase a block
//system("PAUSE");
totalNumberGrain = totalNumberGrain - 1;
delete position;
}
Vec3d2 Grain::getPositionVec() const
{
return *position;
}
Vec3d2* Grain::getPosition() const
{
return position;
}
Vec3d2** Grain::getPositionPtr()
{
return &position;
}
int Grain::getTotalNumberGrain() const
{
return totalNumberGrain;
}
void Grain::setTotalNumberGrain()
{
totalNumberGrain = global_totalNumberGrain;
}
void Grain::setTotalNumberGrain(int number)
{
totalNumberGrain = number;
}
void Grain::setPosition(Vec3d2 *ptr)
{
position = ptr;
}
void Grain::setPositionVec(Vec3d2 position_value)
{
*position = position_value;
}
void Grain::deleteGrainData()
{
delete position;
}
void Grain::findContactwithGrain(Grain *grainList)
{
for(int m = 0; m < n; m++)
{
double length;
length = (*position - (*grainList[m].position)).norm();
if( length < radius + grainList[m].radius)
{
if( number != grainList[m].number) //faster than number != sortedGrainList[m]
{
Vec3d2 relativePosition = *position - (*grainList[m].position) ;
double overlap = radius + grainList[m].radius - relativePosition.norm();
//define the contact
Contact grainContact;
grainContact.setContact(*this, grainList[m], overlap);
addContact(grainContact); //IF YOU PUT THIS LINE IN COMMENT, EVERYTHING GOES A LOT FASTER
}
}
}
}
__host__ __device__ Grain& Grain::operator=(Grain const& grainToCopy)
{
if(this != &grainToCopy)
{
Block::operator=(grainToCopy); //this lines call the operator = defined for Block. So it copies the block attributes of the first grain into the second grain
//totalNumberGrain = grainToCopy.totalNumberGrain;
radius = grainToCopy.radius;
*position = *grainToCopy.position;
}
return *this;
}
//class Analysis functions
Analysis::Analysis(void)
{
}
Analysis::Analysis(Grain *grainList_value)
{
totalNumberGrain = grainList_value[0].getTotalNumberGrain();
grainList = new Grain[totalNumberGrain];
//copy grains
for(int i = 0; i < totalNumberGrain; i++)
{
grainList[i] = grainList_value[i];
grainList[i].setTotalNumberGrain(grainList_value[i].getTotalNumberGrain());
}
}
Analysis::~Analysis(void)
{
delete[] grainList;
//a lot more delete should be made here
}
Grain* Analysis::getGrainList()
{
return grainList;
}
void Analysis::copyToDevice()
{
//declare device grainList and wallList and copy the values
cudaMalloc(&d_grainList, totalNumberGrain*sizeof(Grain));
cudaMemcpy(d_grainList, grainList, totalNumberGrain*sizeof(Grain), cudaMemcpyHostToDevice);
////declare device list of pointer to pass pointer values of grain
d_grain_contactList = new Contact*[totalNumberGrain];
d_grain_position = new Vec3d2*[totalNumberGrain];
for(int i = 0; i < totalNumberGrain; i++)
{
cudaMalloc(&d_grain_contactList[i], grainList[i].getMaxNumberContact()*sizeof(Contact));
cudaMalloc(&d_grain_position[i], sizeof(Vec3d2));
}
//copy pointers and values for grains
for(int i = 0; i < totalNumberGrain; i++)
{
//pointers
cudaMemcpy(d_grainList[i].getContactListPtr(), &d_grain_contactList[i], sizeof(Contact*), cudaMemcpyHostToDevice);
cudaMemcpy(d_grainList[i].getPositionPtr(), &d_grain_position[i], sizeof(Vec3d2*), cudaMemcpyHostToDevice);
//values
cudaMemcpy(d_grain_contactList[i], grainList[i].getContactList(), grainList[i].getMaxNumberContact()*sizeof(Contact), cudaMemcpyHostToDevice);
cudaMemcpy(d_grain_position[i], grainList[i].getPosition(), sizeof(Vec3d2), cudaMemcpyHostToDevice);
}
}
void Analysis::copyToHost()
{
//delete the pointer value or it will create a memory leak
for(int i = 0; i < totalNumberGrain; i++)
{
grainList[i].deleteBlockData();
grainList[i].deleteGrainData();
}
//copy non pointer value
cudaMemcpy(grainList, d_grainList, totalNumberGrain*sizeof(Grain),cudaMemcpyDeviceToHost);
//copy pointer values for grains
grain_contactList = new Contact*[totalNumberGrain];
grain_position = new Vec3d2*[totalNumberGrain];
for(int i = 0; i < totalNumberGrain; i++)
{
grain_contactList[i] = new Contact[grainList[i].getMaxNumberContact()];
grain_position[i] = new Vec3d2;
grainList[i].setContactList(grain_contactList[i]);
grainList[i].setPosition(grain_position[i]);
cudaMemcpy(grain_contactList[i], d_grain_contactList[i], grainList[i].getMaxNumberContact()*sizeof(Contact), cudaMemcpyDeviceToHost);
cudaMemcpy(grain_position[i], d_grain_position[i], sizeof(Vec3d2), cudaMemcpyDeviceToHost);
}
}
__global__ void compute( Grain *g)
{
int i = threadIdx.x + blockIdx.x * blockDim.x;
//__syncthreads();
if( i < n )
{
g[i].findContactwithGrain(g);
}
}
void Analysis::runAnalysis()
{
for(int i = 0; i < 3; i ++)
{
clock_t begin = clock();
for(int j = 0; j < 10000; j++)
{
compute<<<(n + 511)/512, 512>>>(d_grainList);
}
clock_t end = clock();
cout << (double)(end-begin)/CLOCKS_PER_SEC << endl;
system("PAUSE");
}
}
int main(void)
{
//grain
Vec3d2 position1; position1[0] = 0;position1[1] = 0;position1[2] = 0;
double radius1 = 1;
////cuda
cout << "PRESS ENTER TO START" << endl;
system("PAUSE");
clock_t begin = clock();
Grain *g, *d_g;
g = new Grain[n];
for(int i = 0; i<n; i++)
{
g[i].setTotalNumberGrain();
}
Grain g1(position1, radius1, 0.1);
for(int i = 0; i <n; i++)
{
g[i] = g1;
g[i].setPositionVec(Vec3d2(3*i+1.5, 1.5, 0));
}
Analysis a(g);
a.copyToDevice();
a.runAnalysis();
clock_t end = clock();
cout << (double)(end-begin)/CLOCKS_PER_SEC << endl;
return 0;
}
I would need more code to verify but, the most likely explanation is that when you do not include code you are actually not writing any data to global memory. When you don't write anything to global memory nvcc will optimize just about everything out to the point where you will be running just about nothing.
The same is true when you include a print statement. Print statements are viewed as output, therefore nvcc can't compile out code that it is dependent on.
For example:
__global__ empty_kernel(int* huge_array, int num_elements){
int local_memory;
for(int i=0; i<num_elements; i++){
local_memory+=huge_array[i];
}
}
will run faster than:
__global__ empty_kernel(int* small_array, int num_elements, int* smaller_array){
int tid = ThreadIdx.x+BlockIdx.x*BlockDim.x;
int local_memory;
for(int i=0; i<5; i++){
local_memory+=huge_array[tid*i];
}
smaller_array[tid]=local_memory;
}
The bottom line being, your first kernel isn't faster, it just isn't being run.
The problem in my opinion is simply the if statement, not the statement that it conditionally executes. Conditional branching can be quite expensive on GPU architectures (though it seems to get better with newer architectures), and just having a branching statement could definitely slow down your code.
If you remove the statement within the if clause, the compiler sees that no code is left and therefore can optimize also the if itself away. So this is why you see the speedup when you remove this line of code.
I'm using an example code given to me by another C++ coder for a project. I'm a new student of C++ language and I wondered is there a possible memory leak / bugs in this class file given to me (PlacementHead.cpp):
#include "PlacementHead.h"
#include <string>
#include <iostream>
#include <string.h>
PlacementHead::PlacementHead(int width, int height, int gap, char* s) {
width_ = width;
height_ = height;
gap_ = gap;
size_ = (width*height)+1;
set_ = new char[size_ + 1];
from_ = new int[size_ + 1];
original_ = new char[size_ + 1];
strcpy(set_,s);
strcpy(original_,s);
}
PlacementHead::~PlacementHead() {
}
int PlacementHead::getSize() { return size_; }
int PlacementHead::getHeight() { return height_; }
int PlacementHead::getWidth() { return width_; }
int PlacementHead::getGap() { return gap_; }
// Palauttaa indeksissä i olevan suuttimen
char PlacementHead::getNozzle(int i) {
return set_[i-1];
}
// Asettaa indeksissä i olevan suuttimen
void PlacementHead::setNozzle(int i, char c) {
set_[i-1] = c;
}
// Merkitsee suuttimen poimituksi poistamalla sen listasta
void PlacementHead::markNozzle(int i, int bankPos) {
set_[i-1] = ' ';
from_[i-1] = bankPos;
}
// Palauttaa seuraavan poimimattoman suuttimen indeksin
int PlacementHead::getNextUnmarkedPos() {
for (int i=0; i<size_; i++) {
if (set_[i]!=' ') {
return i+1;
}
}
return 0;
}
// Palauttaa suuttimen alkuperäisen sijainnin pankissa
int PlacementHead::getBankPos(int i) {
return from_[i-1];
}
// Plauttaa alkuperäisen ladontapaan suutinjärjestyksen
void PlacementHead::reset() {
//for (int i=0; i<size_; i++) {
// set_[i] = original_[i];
//}
strcpy(set_,original_);
}
// Tulostusmetodi
void PlacementHead::print() {
std::cout << "ladontapaa:\n";
for (int h=height_; h>0; h--) {
for (int w=width_; w>0; w--) {
int i = ((h-1)*width_)+w;
std::cout << getNozzle(i);
}
std::cout << "\n";
}
}
PlacementHead.h:
#ifndef PLACEMENTHEAD_H
#define PLACEMENTHEAD_H
class PlacementHead {
public:
PlacementHead(int size, int rows, int gap, char* s);
~PlacementHead();
int getSize();
int getHeight();
int getWidth();
int getGap();
char getNozzle(int i);
void setNozzle(int i, char c);
void markNozzle(int i, int bankPos);
int getNextUnmarkedPos();
int getBankPos(int i);
void reset();
void print();
private:
char* set_;
int* from_;
char* original_;
int size_;
int width_;
int height_;
int gap_;
};
#endif
I notice that there is dynamic allocation of memory, but I don't see a delete anywhere...is this a problem? How could I fix this if it is a problem?
Thnx for any help!
P.S.
I noticed there is no keyword class used in this example?...Can you define a class like this?
It's impossible to say without seeing the class definition (the
header); if size_, etc. are something like
boost::shared_array, or std::unique_ptr, there is no leak.
If they are simply int*, there is a leak.
Of course, no C++ programmer would write this sort of code
anyway. The class would contain std::vector<int> and
std::string. Judging from what we see here, the author
doesn't know C++.
the code has leak . the constructor allocates the memory .Destructor or some other function have to clean that before the object gets destroyed
Another problem is that your code does not obey Rule of three (links here and here)
once you will write code like:
{
PlacementHead a(0,0,0,"asdsa");
PlacementHead b(0,0,0,"asdsa");
a = b; // line 1
} // here segfault
you will get segfault, in line 1, pointers will be copied from b to a, and once you will finally have destructors, pointers will be deleted twice, which is wrong. This is called shallow copy, you need deep copy, where new array will be allocated.