CUDA - copy to array within array of Objects - c++

I have a CUDA application I'm working on with an array of Objects; each object has a pointer to an array of std::pair<int, double>. I'm trying to cudaMemcpy the array of objects over, then cudaMemcpy the array of pairs to each of the objects, however this is giving me all kinds of grief. It crashes attempting to copy to the inner array; I don't understand how to move this over...
#include <cuda.h>
#include <cuda_runtime.h>
#include <iostream>
using namespace std;
class Object
{
public:
int id;
float something;
std::pair<int, float> *somePairs;
};
Object *objects;
void initObjects()
{
objects = new Object[10];
for( int idx = 0; idx < 10; idx++ )
{
objects[idx].id = idx;
objects[idx].something = (float) idx;
objects[idx].somePairs = new std::pair<int, float>[10];
for ( int jdx = 10; jdx < 10; jdx++ )
{
objects[idx].somePairs[jdx] = std::pair<int, float>( jdx, (float) jdx );
}
}
}
void cudaMemcpyObjects()
{
Object *devObjects;
cudaMalloc( &devObjects, sizeof(Object) * 10 );
cudaMemcpy( devObjects, objects, sizeof(Object) * 10, cudaMemcpyHostToDevice );
for ( int idx = 0; idx < 10; idx++ )
{
size_t pairSetSize = sizeof(std::pair<int, float>) * 10;
// CRASH HERE ... v
cudaMalloc( &(devObjects[idx].somePairs), pairSetSize );
cudaMemcpy( devObjects[idx].somePairs, objects[idx].somePairs,
sizeof( std::pair<int, float> ) * 10, cudaMemcpyHostToDevice );
}
}
int main()
{
initObjects();
cudaMemcpyObjects();
return 0;
}

My CUDA experience is only in its infancy, but I believe the error is like this:
cudaMalloc is a host function that wants to write the pointer into host memory. However, you are passing to it a pointer in device memory!
To fix this, you should first create the device pointers and fill them into your host object structure, and only then copy the whole thing over to the device, and also copy the individual pairs over to the device as well.
Schematically:
struct Bar;
struct Foo
{
int tag;
Bar * bp;
};
void setup()
{
Foo * hFoo = new Foo[10];
Foo * dFoo;
cudaMalloc(dFoo, sizeof(Foo) * 10);
for (size_t i = 0; i != 10; ++i)
{
Bar * dBar;
cudaMalloc(&dbar, sizeof(Bar));
Bar b; // automatic temporary -- we never keep a host copy of this
cudaMemcpy(dBar, &b, sizeof(Bar));
hFoo[i].bp = dBar; // this is already a device pointer!
}
cudaMemcpy(dFoo, hFoo, sizeof(Foo) * 10);
}
On the return, don't forget that the Foo::bp are device pointers that you still need to copy back one by one!
It would probably be easier to just have one self-contained class that you can move in one go, but that may not be practical, or desirable for reasons of memory locality. You have to thing carefully about this. If the member is just a pair, why not put the two items in the main class directly?

Related

CUDA pointer inside kernel becomes null

I'm trying to pass a pointer to triangle data to a kernel, but when debugging I find the pointer becomes null, d_list contains the triangles and both d_list and d_world are members of the main window class, also the error checking returns "no error"
d_list is of type hittable* and d_world is hittable_list*
__global__ void create_world(hittable* d_list, hittable_list* d_world, int num_triangles) {
if (threadIdx.x == 0 && blockIdx.x == 0) {
// the class hittable_list contains a counter for the list size, which no matter the
// scene size it always becomes zero
d_world = new hittable_list(&d_list, num_triangles);
}
}
checkCudaErrors(cudaMalloc((void**)&d_list, num_hittables * sizeof(triangle)));
checkCudaErrors(cudaMalloc((void**)&d_world, sizeof(hittable_list)));
cudaMemcpy(d_list, m_triangles.data(), num_hittables * sizeof(triangle), cudaMemcpyHostToDevice);
create_world << <1, 1 >> > (d_list, d_world, num_hittables);
checkCudaErrors(cudaGetLastError());
checkCudaErrors(cudaDeviceSynchronize());
I tried initializing the "world" in the host then cudaMemcpy'ing to the d_world, but it also fails
EDIT: minimal exmple
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
#include <vector>
struct make_list {
__device__ make_list(float** list, int n) { contents = list; size = n; };
float** contents;
int size;
};
__global__ void render(make_list** world) {
int size = (*world)->size; // set a breakpoint here, the size is 0
}
__global__ void create_world(float* d_list, make_list* d_world, int num_triangles) {
if (threadIdx.x == 0 && blockIdx.x == 0) {
// the class hittable_list contains a counter for the list size, which no matter the
// scene size it always becomes zero
d_world = new make_list(&d_list, num_triangles);
}
}
int main () {
float* d_list;
make_list* d_world;
int size = 8;
std::vector<float> m_triangles(size);
cudaMalloc((void**)&d_list, size * sizeof(float));
cudaMalloc((void**)&d_world, sizeof(make_list));
cudaMemcpy(d_list, m_triangles.data(), size * sizeof(float), cudaMemcpyHostToDevice);
create_world << <1, 1 >> > (d_list, d_world, size);
cudaDeviceSynchronize();
render << <1, 1 >> > (&d_world);
cudaDeviceSynchronize();
return 0;
}
EDIT 2: updated with virtual function call, it's causing crashes
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
#include <vector>
#include <cstdio>
class hittable {
public:
__device__ virtual int hit() const = 0;
};
struct make_list : public hittable {
__device__ make_list(float** list, int n) { contents = list; size = n; };
__device__ virtual int hit() const {
return size;
}
float** contents;
int size;
};
__global__ void render(make_list** world) {
int size = (*world)->size; // set a breakpoint here, the size is 0
printf("size = %d\n", size);
int new_size = (*world)->hit();
printf("new size = %d\n", new_size);
}
__global__ void create_world(float* d_list, make_list** d_world, int num_triangles) {
if (threadIdx.x == 0 && blockIdx.x == 0) {
// the class hittable_list contains a counter for the list size, which no matter the
// scene size it always becomes zero
*d_world = new make_list(&d_list, num_triangles);
}
}
int main() {
float* d_list;
make_list** d_world;
cudaMalloc(&d_world, sizeof(make_list*));
int size = 8;
std::vector<float> m_triangles(size);
cudaMalloc((void**)&d_list, size * sizeof(float));
cudaMemcpy(d_list, m_triangles.data(), size * sizeof(float), cudaMemcpyHostToDevice);
create_world << <1, 1 >> > (d_list, d_world, size);
cudaDeviceSynchronize();
render << <1, 1 >> > (d_world);
cudaDeviceSynchronize();
return 0;
}
There are at least a few issues.
In C++, when you pass a variable to a function via the function parameters, a copy of that variable is made for local use by the function. Any modifications made to that variable will not show up globally, i.e. in the calling environment, because the function is operating on a copy of the variable. Therefore this could never do what you want:
d_world = new make_list(&d_list, num_triangles);
There is nothing illegal about it, per se, but it will not have the desired effect. The global copy of d_world is unchanged by that assignment. This is a C++ concept, not unique or specific to CUDA, and it trips people up from time to time.
This is almost never legal in CUDA:
render << <1, 1 >> > (&d_world);
^
In typical usage, it is not possible to pass the address of a host location to device code via a kernel call parameter. Any attempt to dereference that pointer &d_world will result in dereferencing the address of a host location. That is illegal in CUDA device code.
While not necessarily a problem at this point, you should be aware of the fact that in-kernel new operates against the device heap which has a default limit of 8MB, and furthermore allocations created this way cannot take part in host-issued cudaMemcpy* calls. These topics are covered in the programming guide.
When I make changes to address those first 2 items, I get what appear to be sensible results:
$ cat t2190.cu
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
#include <vector>
#include <cstdio>
struct make_list {
__device__ make_list(float** list, int n) { contents = list; size = n; };
float** contents;
int size;
};
__global__ void render(make_list** world) {
int size = (*world)->size; // set a breakpoint here, the size is 0
printf("size = %d\n", size);
}
__global__ void create_world(float* d_list, make_list** d_world, int num_triangles) {
if (threadIdx.x == 0 && blockIdx.x == 0) {
// the class hittable_list contains a counter for the list size, which no matter the
// scene size it always becomes zero
*d_world = new make_list(&d_list, num_triangles);
}
}
int main () {
float* d_list;
make_list** d_world;
cudaMalloc(&d_world, sizeof(make_list*));
int size = 8;
std::vector<float> m_triangles(size);
cudaMalloc((void**)&d_list, size * sizeof(float));
cudaMemcpy(d_list, m_triangles.data(), size * sizeof(float), cudaMemcpyHostToDevice);
create_world << <1, 1 >> > (d_list, d_world, size);
cudaDeviceSynchronize();
render << <1, 1 >> > (d_world);
cudaDeviceSynchronize();
return 0;
}
$ nvcc -o t2190 t2190.cu
$ compute-sanitizer ./t2190
========= COMPUTE-SANITIZER
size = 8
========= ERROR SUMMARY: 0 errors
$
Although you don't show how you are using the contents member of the make_list object, I'm doubtful that this could possibly do anything useful for you, for the same reason as I have indicated in item 1 above:
*d_world = new make_list(&d_list,
^^^^^^^
The address you are using there is the address of a temporary local variable made by the function. My guess is you probably want d_list there or possibly *d_list, and this might necessitate changes in your contents object member of the handling of that object member. Whatever you are doing there will almost certainly require changes not unlike the refactoring I have done to address items 1 and 2.
For now, without knowing anything further about your intent, something that seems sensible to me would be like this:
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
#include <vector>
#include <cstdio>
struct make_list {
__device__ make_list(float* list, int n) { contents = list; size = n; };
float* contents;
int size;
};
__global__ void render(make_list** world) {
int size = (*world)->size; // set a breakpoint here, the size is 0
printf("size = %d\n", size);
}
__global__ void create_world(float* d_list, make_list** d_world, int num_triangles) {
if (threadIdx.x == 0 && blockIdx.x == 0) {
// the class hittable_list contains a counter for the list size, which no matter the
// scene size it always becomes zero
*d_world = new make_list(d_list, num_triangles);
}
}
int main () {
float* d_list;
make_list** d_world;
cudaMalloc(&d_world, sizeof(make_list*));
int size = 8;
std::vector<float> m_triangles(size);
cudaMalloc((void**)&d_list, size * sizeof(float));
cudaMemcpy(d_list, m_triangles.data(), size * sizeof(float), cudaMemcpyHostToDevice);
create_world << <1, 1 >> > (d_list, d_world, size);
cudaDeviceSynchronize();
render << <1, 1 >> > (d_world);
cudaDeviceSynchronize();
return 0;
}

How to resize a dynamically allocated array inside a class?

I have a private array inside a class, allocated dynamically. As I insert more and more items, I need to resize the array at some point. The question is how to do that correctly? The code below ends in an error: munmap_chunk(): invalid pointer while inserting a third item.
#include <string>
#include <cstring>
#include <cassert>
using namespace std;
template<typename T>
class Set
{
private:
T * array;
size_t arraySize;
unsigned int itemCount;
public:
Set() {
arraySize = 1;
itemCount = 0;
array = new T[arraySize];
};
bool Insert(const T item) {
if (itemCount == arraySize) {
T * tmpArray = new T[arraySize * 2];
memcpy(tmpArray, array, arraySize * sizeof(T));
arraySize *= 2;
delete [] array;
array = tmpArray;
}
array[itemCount] = item;
itemCount++;
return true;
}
};
int main ()
{
Set<string> x0;
assert( x0 . Insert( "apple" ) );
assert( x0 . Insert( "orange" ) );
assert( x0 . Insert( "pineapple" ) );
return 0;
}
I know I could use for example a vector to don't care about the allocation, but I would like to know how to do that properly this way.
Please forgive, if the question is somehow inappropriate. It is my first time questioning
What the commenters said, plus the fact that you can replace your memcpy with a call to std::copy to get the correct behaviour:
std::copy (array, array + arraySize, tmpArray);
Don't forget to #include <algorithm>.

Issue accessing free list nodes by memory address in pool allocator

I'm attempting to write a simple pool allocator for allocation and deallocation in c++ as part of a uni course task. We were given a reference to to git-user floooh's oryol engine, https://github.com/floooh/oryol/blob/master/code/Modules/Core/Memory/poolAllocator.h, hence I try to do something similar by splitting the pool into puddles that are allocated when needed. Starting with one and then incrementing as memory demands increase.
Each puddle in my case maintains its own free list of nodes and I fail already in creating the first puddle: I get segmentation fault when I try to access node struct data members. Below is my pool allocator class description along with constructor and function for adding a puddle. I commented in caps lock "SEGMENTATION FAULT" in allocNewPuddle() where it fails, line 10 in that function.
Class description:
template<class T> class memAllocator {
public:
memAllocator();
~memAllocator();
struct Puddle;
struct mNode {
mNode* nextN;
mNode* prevN;
uint puddle;
};
struct Puddle {
mNode* headN_free;
mNode* headN_occ;
};
uint numPuddles;
static const uint nodesInPuddle = 512;
static const uint maxPuddles = 512;
Puddle* puddles[maxPuddles];
uint nodeSize;
uint elemSize;
uint puddleStructSize;
void allocNewPuddle();
void* allocate();
void deallocate(void* obj);
void* findNextFreeNode();
template<typename... ARGS> T* create(ARGS&&... args);
void destroy(T* obj);
};
Constructor:
template<class T>
memAllocator<T>::memAllocator() // creates instance of allocator starting with one puddle allocated
{
this->numPuddles = 0;
this->nodeSize = sizeof(mNode);
this->elemSize = nodeSize + sizeof(T);
this->puddleStructSize = sizeof(Puddle);
allocNewPuddle();
}
Add a new puddle:
template<class T>
void memAllocator<T>::allocNewPuddle() // allocates a new puddle
{
// allocate memory for one puddle
assert(numPuddles < maxPuddles);
Puddle* newPuddle = (Puddle*) malloc(puddleStructSize + nodesInPuddle * elemSize);
// allocate nodes in free list pointed to by puddle struct
newPuddle->headN_free = (mNode*) (newPuddle + puddleStructSize + (nodesInPuddle-1)*elemSize);
for (int i = nodesInPuddle-2; i >= 0; i--) {
mNode* curNode = (mNode*) (newPuddle + puddleStructSize + i*elemSize);
// Fails here when attempting to access mNode struct members
curNode->puddle = numPuddles; // SEGMENTATION FAULT HERE ON FIRST ITERATION
curNode->prevN = nullptr;
curNode->nextN = newPuddle->headN_free;
curNode->nextN->prevN = curNode;
newPuddle->headN_free = curNode;
}
newPuddle->headN_occ = nullptr;
puddles[numPuddles] = newPuddle;
numPuddles++;
}
Here is my main.cc:
#include "memAllocator.h"
#include <iostream>
class Test {
public:
Test();
~Test();
int arr[5];
};
Test::Test() {
for (int i = 0; i < 5; i++) {
this->arr[i] = i;
}
}
Test::~Test() {
std::cout << "destructor called" << std::endl;
}
int main(int argc, char* argv[]) {
memAllocator<Test> memPool = memAllocator<Test> ();
Test* test = memPool.create();
for (int i = 0; i < 5; i++) {
std::cout << test->arr[i] << std::endl;
}
memPool.destroy(test);
for (int i = 0; i < 5; i++) {
std::cout << test->arr[i] << std::endl;
}
}
My guess is that I am doing something horribly naïve with c++ pointers, but from what I know the above should work. If not then I look forward to a good scolding.
Oh, and as you can see I'm not bothering to align memory since it is a small assignment, and as I understand this is not essential for it to work it only makes it faster, but is it possible this can cause wrong memory to be read and written to as more is demanded?
You have incorrect address calculation in line
mNode* curNode = (mNode*) (newPuddle + puddleStructSize + i*elemSize);
newPuddle is Puddle pointer, but you trying to add bytes. Thus you have new address far beyond end of allocated memory buffer. So you must add explicit cast to byte pointer (char, uint8_t etc)
mNode* curNode = (mNode*) ((char*)newPuddle + puddleStructSize + i*elemSize);
You must fix this line too
newPuddle->headN_free = (mNode*) (newPuddle + puddleStructSize + (nodesInPuddle-1)*elemSize);

A shared pointer query from a novice user. I have code below that crashes when using a container of class with member shared pointers

//this is my main Method ,this was an experiment to understand shared pointer usage
#include <iostream>
#include "shared_ptrtestA.h"
int main(int argc, const char * argv[]) {
// declare a shared pointer to the class
sharedptr_testA* A = new sharedptr_testA(5);
//class has a vector , push back a new instance into the vector
A->mvect.push_back(sharedptr_testA::Aptr(new sharedptr_testA::testA(
sharedptr_testA::sharedptr_testB::Create(1) , sharedptr_testA::sharedptr_testC::Create(1)
)));
//class has a vector , push back a new instance into the vector
A->mvect.push_back(sharedptr_testA::Aptr(new sharedptr_testA::testA(
sharedptr_testA::sharedptr_testB::Create(2),sharedptr_testA::sharedptr_testC::Create(2)
)));
//iterate the vector populated above
for(std::vector<sharedptr_testA::Aptr>::iterator it = A->mvect.begin() ;
it!= A->mvect.end() ; it++)
{
// get members from the vector iterator
sharedptr_testA:: sharedptr_testB::Bptr B = (*it)->mb;
sharedptr_testA:: sharedptr_testC::Cptr C = (*it)->mc;
// print contents of members
for(int i = 0 ; i < B->m_size ; i++)
{
std::cout<<B->bytes[i]<<'\t';
}
std::cout <<std::endl;
for(int i = 0 ; i < C->m_size ; i++)
{
std::cout<<C->bytes[i]<<'\t';
}
std::cout <<std::endl;
}
}
//this was the main method above and the expected output was
B
C
BB
CC
The structure of the classes used are
//Header File
#ifndef shared_ptrtest_shared_ptrtestA_h
#define shared_ptrtest_shared_ptrtestA_h
#include <memory>
#include <functional>
#include <vector>
class sharedptr_testA
{
public:
// constructor and destructor
sharedptr_testA(int vsize);
~sharedptr_testA();
// an internal class member defined
class sharedptr_testB
{
public:
typedef std::shared_ptr<sharedptr_testB> Bptr;
//static create method
static Bptr Create(int msize)
{
return Bptr(new sharedptr_testB(msize));
}
//members
int m_size;
char *bytes;
//private contructor
private:
sharedptr_testB(int size)
{
m_size = size;
bytes = new char[size];
for(int i = 0 ; i < size ; i++)
bytes[size]= 'B';
}
};
//class c has same structure as class B above
class sharedptr_testC
{
public:
typedef std::shared_ptr<sharedptr_testC> Cptr;
static Cptr Create(int msize)
{
return Cptr(new sharedptr_testC(msize));
}
int m_size;
char *bytes;
private:
sharedptr_testC(int size)
{
m_size = size;
bytes = new char[size];
for(int i = 0 ; i < size ; i++)
bytes[size]= 'C';
}
};
// struct containing shared pointers to classes defined above
struct testA
{
testA(sharedptr_testB::Bptr B, sharedptr_testC::Cptr C)
{
mb = B;
mc = C;
}
sharedptr_testB::Bptr mb;
sharedptr_testC::Cptr mc;
};
typedef std::shared_ptr<testA> Aptr;
std::vector<Aptr> mvect;
};
#endif
//The short cpp file for the above class contains only constructor and destructor
#include "shared_ptrtestA.h"
sharedptr_testA::sharedptr_testA(int vsize)
:mvect(vsize)
{
}
sharedptr_testA::~sharedptr_testA()
{
}
What is wrong in the above code ? I wrote this to understand shared pointer usage
You have two bugs in your program:
The loops in constructors of sharedptr_testB and sharedptr_testC use size instead of i for indexing. It should be:
sharedptr_testB(int size)
{
m_size = size;
bytes = new char[size];
for(int i = 0 ; i < size ; i++)
bytes[i]= 'B';
}
(DTTO) for sharedptr_testC)
You start with a vector of size 5, which means it stores five null pointers. Then you append two elements to it (size 7, five nulls + two valid pointers). The you iterate over it, dereferencing each pointer. This of course crashes, since there are nulls at the beginning. Simply initialise the vector as empty.
sharedptr_testA* A = new sharedptr_testA(0);
With these two fixes, the code works.
Side notes 1 (C++):
The code is next to impossible to read. I strongly suggest you use a better naming scheme.
sharedptr_testB and sharedptr_testC leak memory. I understand it's just a learning excercise, I'd just like to point it out. You'd be better off with std::vector<char> in them instead of char*.
Side notes 2 (Stack Overflow):
If you have a crashing program, you should generally try to debug it yourself before asking an SO question. Stepping through the program through a debugger would easily have uncovered both the issues.

What's the proper way to pass a pointer to a function for deletion?

I have a matrix declared like int **matrix, and I know that the proper way to pass it to a function to allocate memory should be like this:
void AllocMat(int ***mat, int size);
But now I need to delete these memory in another function and am not sure about what to pass:
void DeallocMat(int **mat, int size);
or
void DeallocMat(int ***mat, int size);
I think the second one should be right, but neither way gives me segmentation fault as I tried.
The question is tagged C++, and yet the answers only use the C subset...
Well, first of all, I would recommend against the whole thing. Create a class that encapsulates your matrix and allocate it in a single block, offer operator()(int,int) to gain access to the elements...
But back to the problem. In C++ you should use references rather than pointers to allow the function to change the argument, so your original allocate signature should be:
void AllocMat(int **&mat, int size);
And call it like:
int **matrix = 0;
AllocMat( matrix, 5 );
Or better, just return the pointer:
int **AllocMat( int size );
int **matrix = AllocMat( 5 );
For the deallocation function, since you don't need to modify the outer pointer, you can just use:
void DeallocMat( int**mat, int size ); // size might be required to release the
// internal pointers
Now, for a sketch of the C++ solution:
template <typename T> // no need to limit this to int
class square_matrix {
const unsigned size;
T * data;
public:
square_matrix( unsigned size ) : size(size), data( new T[size*size]() ) {}
square_matrix( matrix const & m ) : size( m.size ), data( new T[m.size*m.size] ) {
std::copy( m.data, m.data+size*size, data );
}
~matrix() {
delete [] data;
}
T const & operator()( unsigned x, unsigned y ) const {
// optional range check and throw exception
return data[ x + y*size ];
}
void set( unsigned x, unsigned y, T const & value ) {
// optional range check and throw exception
data[ x + y*size ] = value;
}
};
First is correct. But your real problem is that you are using pointers when there are better alternatives. For a 2d matrix you should use a vector of vectors
#include <vector>
typedef std::vector<std::vector<int> > Matrix;
Matix m;
Now there is no need to delete anything, so one less thing to go wrong.
void DeallocMat(int **mat, int size) - allows you to deallocate memory (since you have passed the value of mat only allowing to deallocate memory but not change mat)
void DeallocMat(int ***mat, int size) - allows you to deallocate memory and change the value of mat to NULL (since you have now passed a pointer to mat allowing you to change its value)
The extra "*" just handles the pointer to be behaved as call by reference. If you want to get the output from your function, you need an extra "*" in your declaration. In this case, you should pass the reference of your pointer (using &) to these functions.
The reason why you required to pass a pointer to double pointer because your local variable must required to reflect with the new updated memory
void Foo(int * a)
{
a = new int[10];
}
int main()
{
int *a = 0;
Foo( a );
}
Now the memory will be allocated but the pointer A will not be update because the value of pointer A is simply copied to another pointer variable which is parameter of Foo. Once the Foo is returned, a will remain 0. To make it refect that, you should write code like follows
void Foo(int ** a)
{
*a = new int[10];
}
int main()
{
int *a = 0;
Foo( &a );
}
Here you're passing the address of a pointer. The which means that, the value which contains in the pointer will be updated from the Foo function.You can debug through and see how it works.
If you're sure that you will not access the pointer anymore, please use the first type. Otherwise use the second one. Make sure that you set the pointer to NULL to avoid further memory corruptions or dangling pointers.
The thing that confuses me about your question is that most people would not declare a matrix as an int **. The reason for this is that you would be forced to then allocate it in a loop. Your allocation function would require two parameters, which are the dimensions of the array like this:
void AllocMat(int *** mat, int n, int m) {
int ** result = new int * [ n ];
for (int x=0; x<n; x++) {
result[x] = new int [ m ];
}
*mat = result;
}
If this were the case, the corresponding deallocation function would require knowledge of the size of n as follows:
void DeallocMat(int *** mat, int n) {
if (mat == NULL || *mat == NULL) return;
int ** tmp = *mat;
for (int x=0; x<n; x++) {
if (tmp[x] != NULL) delete [] tmp[x];
}
delete [] tmp;
*mat = NULL;
}
With this approach, you could access your matrix like this:
int ** mat = NULL;
AllocMat(&mat, n, m);
for (int x=0; x<n; x++) {
for (int y=0; y<m; y++) {
mat[x][y] = 1;
}
}
DeallocMat(&mat, n);
Usually, people allocate matrices as a single buffer of memory to avoid extra allocations and pointer indirections, which is how I recommend you do it. In that case, you allocation function would look like this:
void AllocMat2(int ** mat, int n, int m) {
*mat = new int [ n * m ];
}
And the corresponding deallocation function like this:
void DeallocMat2(int ** mat) {
if (mat != NULL && *mat != NULL) {
delete [] *mat;
*mat = NULL;
}
}
And you would access it follows:
int * mat2 = NULL;
AllocMat2(&mat2, n, m);
for (int x=0; x<n; x++) {
for (int y=0; y<m; y++) {
mat2[x * n + y] = 1;
}
}
DeallocMat2(&mat2);
Either way works, but if you pass a pointer to the pointer you need to dereference it first. And the size parameter is redundant.
void DeallocMat(int **mat)
{
delete[] mat;
}
void DeallocMat(int ***mat)
{
delete[] *mat;
*mat = NULL;
}