Transferring an array pointer into CUDA memory via separate class - c++

I have a class named "Coordinate" which consist of an int array pointer and a bool variable. I want to send this pointer into CUDA, modify it and then use it back in CPU memory.
Here is Coordinate.h :
#ifndef __COORDINATE_H
#define __COORDINATE_H
#include <stdlib.h>
#include <cuda.h>
using namespace std;
class Coordinate {
public:
int *array_pointer;
bool flag;
Coordinate() { flag = false; }
Coordinate(int array_length) {
flag = false;
array_pointer = new int[array_length];
for (int i = 0; i < array_length; i++) {
array_pointer[i] = -1;
}
}
};
#endif
I have made 2 global functions in cudamain.cu Check1 and Check2, both will take a Coordinate as argument. Check1 function will change only boolean flag which Check2 will change boolean flag and also modify the array.
Here is cudamain.cu :
#include <iostream>
#include <cuda.h>
#include "Coordinate.h"
using namespace std;
__global__ void check1(Coordinate *ptr) {
c->flag = true;
}
__global__ void check2(Coordinate *c) {
c->flag = true;
for (int i = 0; i < 10; i++) {
c->array_pointer[i] = i;
}
}
int main() {
Coordinate *d_a, *d_b, a, b;
a = Coordinate(10); b = Coordinate(10);
size_t size = sizeof(Coordinate);
cudaMalloc((void**)&d_a, size); cudaMalloc((void**)&d_b, size);
cudaMemcpy(d_a, &a, size, cudaMemcpyHostToDevice); cudaMemcpy(d_b, &b, size, cudaMemcpyHostToDevice);
check1 << <1, 1 >> > (d_a);
cudaMemcpy(&a, d_a, size, cudaMemcpyDeviceToHost);
cout <<"d_a result-> " <<a.flag <<" " <<a.array_pointer[9] << endl;
check2 << <1, 1 >> > (d_b);
cudaMemcpy(&b, d_b, size, cudaMemcpyDeviceToHost);
cout << "d_b result-> " << b.flag << " " << b.array_pointer[9] << endl;
return 0;
}
I made 2 separate coordinate objects a and b, a will go with check1 and b will go with check2. Both a and b are initialized in same way.
The result I get is
d_a result-> 1 -1
d_b result-> 0 -1
Expected result:
d_a result-> 1 -1
d_b result-> 1 9
Different Coordinate objects may have different array length so I can't initialize the array pointer in the coordinate class.

You cannot access host memory from a CUDA kernel by dereferncing, unless that piece of memory was specially-allocated to allow this, e.g. using cudaMallocManaged(). So your program cannot work. Read this Parallel4All post on accessing the same memory both from the host and the device. Another alternative is the one #RobertCrovella linked to, involving allocating device-side memory.
But, frankly, I doubt any of these two options are what you should go for in this case, since a class named Coordinate does not seem to be something which would need a variable-size array of integers. Are you sure something like
template <unsigned NumDimensions>
class Coordinate<N> {
std::array<int, NumDimensions> a;
// etc. etc.
}
won't do?
(Note that the std::array class itself cannot really be used in device code, like most of the standard library. But you can easily clone std::array and then use your cuda::array class on both the host and the device side.)
Even if dynamic allocation of memory is required for some reason, it is not a good idea to have a class which, it seems, would be used many times, allocate its own memory. Consider using some pre-allocated buffer and have your Coordinates just advance an offset into it (although this would require synchronization for thread safety, or making the buffer thread-local).

Related

CUDA pointer inside kernel becomes null

I'm trying to pass a pointer to triangle data to a kernel, but when debugging I find the pointer becomes null, d_list contains the triangles and both d_list and d_world are members of the main window class, also the error checking returns "no error"
d_list is of type hittable* and d_world is hittable_list*
__global__ void create_world(hittable* d_list, hittable_list* d_world, int num_triangles) {
if (threadIdx.x == 0 && blockIdx.x == 0) {
// the class hittable_list contains a counter for the list size, which no matter the
// scene size it always becomes zero
d_world = new hittable_list(&d_list, num_triangles);
}
}
checkCudaErrors(cudaMalloc((void**)&d_list, num_hittables * sizeof(triangle)));
checkCudaErrors(cudaMalloc((void**)&d_world, sizeof(hittable_list)));
cudaMemcpy(d_list, m_triangles.data(), num_hittables * sizeof(triangle), cudaMemcpyHostToDevice);
create_world << <1, 1 >> > (d_list, d_world, num_hittables);
checkCudaErrors(cudaGetLastError());
checkCudaErrors(cudaDeviceSynchronize());
I tried initializing the "world" in the host then cudaMemcpy'ing to the d_world, but it also fails
EDIT: minimal exmple
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
#include <vector>
struct make_list {
__device__ make_list(float** list, int n) { contents = list; size = n; };
float** contents;
int size;
};
__global__ void render(make_list** world) {
int size = (*world)->size; // set a breakpoint here, the size is 0
}
__global__ void create_world(float* d_list, make_list* d_world, int num_triangles) {
if (threadIdx.x == 0 && blockIdx.x == 0) {
// the class hittable_list contains a counter for the list size, which no matter the
// scene size it always becomes zero
d_world = new make_list(&d_list, num_triangles);
}
}
int main () {
float* d_list;
make_list* d_world;
int size = 8;
std::vector<float> m_triangles(size);
cudaMalloc((void**)&d_list, size * sizeof(float));
cudaMalloc((void**)&d_world, sizeof(make_list));
cudaMemcpy(d_list, m_triangles.data(), size * sizeof(float), cudaMemcpyHostToDevice);
create_world << <1, 1 >> > (d_list, d_world, size);
cudaDeviceSynchronize();
render << <1, 1 >> > (&d_world);
cudaDeviceSynchronize();
return 0;
}
EDIT 2: updated with virtual function call, it's causing crashes
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
#include <vector>
#include <cstdio>
class hittable {
public:
__device__ virtual int hit() const = 0;
};
struct make_list : public hittable {
__device__ make_list(float** list, int n) { contents = list; size = n; };
__device__ virtual int hit() const {
return size;
}
float** contents;
int size;
};
__global__ void render(make_list** world) {
int size = (*world)->size; // set a breakpoint here, the size is 0
printf("size = %d\n", size);
int new_size = (*world)->hit();
printf("new size = %d\n", new_size);
}
__global__ void create_world(float* d_list, make_list** d_world, int num_triangles) {
if (threadIdx.x == 0 && blockIdx.x == 0) {
// the class hittable_list contains a counter for the list size, which no matter the
// scene size it always becomes zero
*d_world = new make_list(&d_list, num_triangles);
}
}
int main() {
float* d_list;
make_list** d_world;
cudaMalloc(&d_world, sizeof(make_list*));
int size = 8;
std::vector<float> m_triangles(size);
cudaMalloc((void**)&d_list, size * sizeof(float));
cudaMemcpy(d_list, m_triangles.data(), size * sizeof(float), cudaMemcpyHostToDevice);
create_world << <1, 1 >> > (d_list, d_world, size);
cudaDeviceSynchronize();
render << <1, 1 >> > (d_world);
cudaDeviceSynchronize();
return 0;
}
There are at least a few issues.
In C++, when you pass a variable to a function via the function parameters, a copy of that variable is made for local use by the function. Any modifications made to that variable will not show up globally, i.e. in the calling environment, because the function is operating on a copy of the variable. Therefore this could never do what you want:
d_world = new make_list(&d_list, num_triangles);
There is nothing illegal about it, per se, but it will not have the desired effect. The global copy of d_world is unchanged by that assignment. This is a C++ concept, not unique or specific to CUDA, and it trips people up from time to time.
This is almost never legal in CUDA:
render << <1, 1 >> > (&d_world);
^
In typical usage, it is not possible to pass the address of a host location to device code via a kernel call parameter. Any attempt to dereference that pointer &d_world will result in dereferencing the address of a host location. That is illegal in CUDA device code.
While not necessarily a problem at this point, you should be aware of the fact that in-kernel new operates against the device heap which has a default limit of 8MB, and furthermore allocations created this way cannot take part in host-issued cudaMemcpy* calls. These topics are covered in the programming guide.
When I make changes to address those first 2 items, I get what appear to be sensible results:
$ cat t2190.cu
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
#include <vector>
#include <cstdio>
struct make_list {
__device__ make_list(float** list, int n) { contents = list; size = n; };
float** contents;
int size;
};
__global__ void render(make_list** world) {
int size = (*world)->size; // set a breakpoint here, the size is 0
printf("size = %d\n", size);
}
__global__ void create_world(float* d_list, make_list** d_world, int num_triangles) {
if (threadIdx.x == 0 && blockIdx.x == 0) {
// the class hittable_list contains a counter for the list size, which no matter the
// scene size it always becomes zero
*d_world = new make_list(&d_list, num_triangles);
}
}
int main () {
float* d_list;
make_list** d_world;
cudaMalloc(&d_world, sizeof(make_list*));
int size = 8;
std::vector<float> m_triangles(size);
cudaMalloc((void**)&d_list, size * sizeof(float));
cudaMemcpy(d_list, m_triangles.data(), size * sizeof(float), cudaMemcpyHostToDevice);
create_world << <1, 1 >> > (d_list, d_world, size);
cudaDeviceSynchronize();
render << <1, 1 >> > (d_world);
cudaDeviceSynchronize();
return 0;
}
$ nvcc -o t2190 t2190.cu
$ compute-sanitizer ./t2190
========= COMPUTE-SANITIZER
size = 8
========= ERROR SUMMARY: 0 errors
$
Although you don't show how you are using the contents member of the make_list object, I'm doubtful that this could possibly do anything useful for you, for the same reason as I have indicated in item 1 above:
*d_world = new make_list(&d_list,
^^^^^^^
The address you are using there is the address of a temporary local variable made by the function. My guess is you probably want d_list there or possibly *d_list, and this might necessitate changes in your contents object member of the handling of that object member. Whatever you are doing there will almost certainly require changes not unlike the refactoring I have done to address items 1 and 2.
For now, without knowing anything further about your intent, something that seems sensible to me would be like this:
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
#include <vector>
#include <cstdio>
struct make_list {
__device__ make_list(float* list, int n) { contents = list; size = n; };
float* contents;
int size;
};
__global__ void render(make_list** world) {
int size = (*world)->size; // set a breakpoint here, the size is 0
printf("size = %d\n", size);
}
__global__ void create_world(float* d_list, make_list** d_world, int num_triangles) {
if (threadIdx.x == 0 && blockIdx.x == 0) {
// the class hittable_list contains a counter for the list size, which no matter the
// scene size it always becomes zero
*d_world = new make_list(d_list, num_triangles);
}
}
int main () {
float* d_list;
make_list** d_world;
cudaMalloc(&d_world, sizeof(make_list*));
int size = 8;
std::vector<float> m_triangles(size);
cudaMalloc((void**)&d_list, size * sizeof(float));
cudaMemcpy(d_list, m_triangles.data(), size * sizeof(float), cudaMemcpyHostToDevice);
create_world << <1, 1 >> > (d_list, d_world, size);
cudaDeviceSynchronize();
render << <1, 1 >> > (d_world);
cudaDeviceSynchronize();
return 0;
}

Why does 'new' fail in class initialization

win7
gcc 6.4.0
cygwin 2.9.0
the following code fails in function g_block during class initialization but not when used in main. The failure is in the 'for' loop when I attempt to initialize the code (initialization is a side issue here). In both cases allocation seems successful but when used in a class, I can't use the memory allocated.
# include <iostream>
# include <iomanip>
using namespace std;
typedef struct { // gsl allocation 'block' descritpoin
size_t size; // block bytes size
double* data; // pointer to the first byte of the block
} gsl_block;
typedef struct { // matrix definition
size_t size1; // number of rows
size_t size2; // number of columns
size_t tda; // number of elements in row (stride between rows)
double* data; // pointer to matrix[0][0]
gsl_block* block; // pointer to the gsl_matrix block
int owner; // 1: deallocation permitted
} gsl_matrix;
class X {
public:
inline static gsl_matrix& g_matrix(size_t row, size_t col)
{return g_matrix(row, col, g_block(row * col));};
static gsl_block& g_block(size_t size) {
double* ptr = new double(size);
cout << "size " << setw(5)<< size << " addr range "
<< hex << setfill('0') << ptr << " - " << (ptr + size*sizeof(double))
<< dec << setfill(' ') << endl;
for(size_t ndx = 0; ndx < size; ndx++) ptr[ndx] = 0.0;
return * new gsl_block{size, ptr};
};
static gsl_matrix& g_matrix(size_t row, size_t col, gsl_block& block) {
return * new gsl_matrix{row, col, col, block.data, &block, 0}; }
gsl_matrix& g_mat;
X() : g_mat(g_matrix(92, 92)) {}
}; // class X
int main(int argc, char** argv) {
gsl_matrix& mat = X::g_matrix(92, 92);
X* x = new X();
return 0;
}
double* ptr = new double(size);
This line creates a single double with the value size on the free store, and returns a pointer to it.
for(size_t ndx = 0; ndx < size; ndx++) ptr[ndx] = 0.0;
This line then invokes undefined behavior by attempting to write to memory that your program does not own.
You should really use std::vector instead of raw pointers. As your program stands, you have a significant potential to leak memory. If you made gsl_block::data a std::vector<double>, your classes would get proper copy and move semantics for free, and you wouldn't need to directly use new anywhere in your code.
EDIT:
Now that you've mentioned you're using GNU Scientific Library, you should probably just use the functions that library provides for allocating and freeing matricies: gsl_matrix_alloc and gsl_matrix_free. I would re-write your X class to just contain a std::unique_ptr with gsl_matrix_free as its deleter:
struct X
{
struct free_matrix
{
void operator()(gsl_matrix* mat)
{
gsl_matrix_free(mat);
}
};
std::unique_ptr<gsl_matrix, free_matrix> g_mat;
X(std::size_t rows, std::size_t cols)
: g_mat(gsl_matrix_alloc(rows, cols))
{}
};
You could even go further and completely wrap gsl_matrix in a more C++-like interface, with member functions that call gsl_matrix_get/gsl_matrix_set or gsl_matrix_pointer to provide simple access to the matrix elements.

Size of an object without using sizeof in C++

This was an interview question:
Say there is a class having only an int member. You do not know how many bytes the int will occupy. And you cannot view the class implementation (say it's an API). But you can create an object of it. How would you find the size needed for int without using sizeof.
He wouldn't accept using bitset, either.
Can you please suggest the most efficient way to find this out?
The following program demonstrates a valid technique to compute the size of an object.
#include <iostream>
struct Foo
{
int f;
};
int main()
{
// Create an object of the class.
Foo foo;
// Create a pointer to it.
Foo* p1 = &foo;
// Create another pointer, offset by 1 object from p1
// It is legal to compute (p1+1) but it is not legal
// to dereference (p1+1)
Foo* p2 = p1+1;
// Cast both pointers to char*.
char* cp1 = reinterpret_cast<char*>(p1);
char* cp2 = reinterpret_cast<char*>(p2);
// Compute the size of the object.
size_t size = (cp2-cp1);
std::cout << "Size of Foo: " << size << std::endl;
}
Using pointer algebra:
#include <iostream>
class A
{
int a;
};
int main() {
A a1;
A * n1 = &a1;
A * n2 = n1+1;
std::cout << int((char *)n2 - (char *)n1) << std::endl;
return 0;
}
Yet another alternative without using pointers. You can use it if in the next interview they also forbid pointers. Your comment "The interviewer was leading me to think on lines of overflow and underflow" might also be pointing at this method or similar.
#include <iostream>
int main() {
unsigned int x = 0, numOfBits = 0;
for(x--; x; x /= 2) numOfBits++;
std::cout << "number of bits in an int is: " << numOfBits;
return 0;
}
It gets the maximum value of an unsigned int (decrementing zero in unsigned mode) then subsequently divides by 2 until it reaches zero. To get the number of bytes, divide by CHAR_BIT.
Pointer arithmetic can be used without actually creating any objects:
class c {
int member;
};
c *ptr = 0;
++ptr;
int size = reinterpret_cast<int>(ptr);
Alternatively:
int size = reinterpret_cast<int>( static_cast<c*>(0) + 1 );

Use data allocated dynamically in CUDA kernel on host

I am trying to build a container class on the device which manages some memory.
This memory is allocated dynamically and filled during object construction in the kernel.
According to the documentation that can be done with a simple new[] in the kernel (using CUDA 8.0 with compute cabability 5.0 in Visual Studio 2012).
Afterwards I want to access the data inside the containers in host code (e.g. for testing if all values are correct).
A minimal version of the DeviceContainer class looks like this:
class DeviceContainer
{
public:
__device__ DeviceContainer(unsigned int size);
__host__ __device__ ~DeviceContainer();
__host__ __device__ DeviceContainer(const DeviceContainer & other);
__host__ __device__ DeviceContainer & operator=(const DeviceContainer & other);
__host__ __device__ unsigned int getSize() const { return m_sizeData; }
__device__ int * getDataDevice() const { return mp_dev_data; }
__host__ int* getDataHost() const;
private:
int * mp_dev_data;
unsigned int m_sizeData;
};
__device__ DeviceContainer::DeviceContainer(unsigned int size) :
m_sizeData(size), mp_dev_data(nullptr)
{
mp_dev_data = new int[m_sizeData];
for(unsigned int i = 0; i < m_sizeData; ++i) {
mp_dev_data[i] = i;
}
}
__host__ __device__ DeviceContainer::DeviceContainer(const DeviceContainer & other) :
m_sizeData(other.m_sizeData)
{
#ifndef __CUDA_ARCH__
cudaSafeCall( cudaMalloc((void**)&mp_dev_data, m_sizeData * sizeof(int)) );
cudaSafeCall( cudaMemcpy(mp_dev_data, other.mp_dev_data, m_sizeData * sizeof(int), cudaMemcpyDeviceToDevice) );
#else
mp_dev_data = new int[m_sizeData];
memcpy(mp_dev_data, other.mp_dev_data, m_sizeData * sizeof(int));
#endif
}
__host__ __device__ DeviceContainer::~DeviceContainer()
{
#ifndef __CUDA_ARCH__
cudaSafeCall( cudaFree(mp_dev_data) );
#else
delete[] mp_dev_data;
#endif
mp_dev_data = nullptr;
}
__host__ __device__ DeviceContainer & DeviceContainer::operator=(const DeviceContainer & other)
{
m_sizeData = other.m_sizeData;
#ifndef __CUDA_ARCH__
cudaSafeCall( cudaMalloc((void**)&mp_dev_data, m_sizeData * sizeof(int)) );
cudaSafeCall( cudaMemcpy(mp_dev_data, other.mp_dev_data, m_sizeData * sizeof(int), cudaMemcpyDeviceToDevice) );
#else
mp_dev_data = new int[m_sizeData];
memcpy(mp_dev_data, other.mp_dev_data, m_sizeData * sizeof(int));
#endif
return *this;
}
__host__ int* DeviceContainer::getDataHost() const
{
int * pDataHost = new int[m_sizeData];
cudaSafeCall( cudaMemcpy(pDataHost, mp_dev_data, m_sizeData * sizeof(int), cudaMemcpyDeviceToHost) );
return pDataHost;
}
It just manages the array mp_dev_data.
The array is created and filled with consecutive values during construction, which should only be possible on the device. (Note that in reality the size of the containers might be different from each other.)
I think I need to provide a copy constructor and an assignment operator since I don't know any other way to fill the array in the kernel. (See question No. 3 below.)
Since copy and deletion can also happen on the host, __CUDA_ARCH__ is used to determine for which execution path we're compiling. On the host cudaMemcpy and cudaFree is used, on the device we can just use memcpy and delete[].
The kernel for object creation is rather simple:
__global__ void createContainer(DeviceContainer * pContainer, unsigned int numContainer, unsigned int containerSize)
{
unsigned int offset = blockIdx.x * blockDim.x + threadIdx.x;
if(offset < numContainer)
{
pContainer[offset] = DeviceContainer(containerSize);
}
}
Each thread in a one-dimensional grid that is in range creates a single container object.
The main-function then allocates arrays for the container (90000 in this case) on the device and host, calls the kernel and attempts to use the objects:
void main()
{
const unsigned int numContainer = 90000;
const unsigned int containerSize = 5;
DeviceContainer * pDevContainer;
cudaSafeCall( cudaMalloc((void**)&pDevContainer, numContainer * sizeof(DeviceContainer)) );
dim3 blockSize(1024, 1, 1);
dim3 gridSize((numContainer + blockSize.x - 1)/blockSize.x , 1, 1);
createContainer<<<gridSize, blockSize>>>(pDevContainer, numContainer, containerSize);
cudaCheckError();
DeviceContainer * pHostContainer = (DeviceContainer *)malloc(numContainer * sizeof(DeviceContainer));
cudaSafeCall( cudaMemcpy(pHostContainer, pDevContainer, numContainer * sizeof(DeviceContainer), cudaMemcpyDeviceToHost) );
for(unsigned int i = 0; i < numContainer; ++i)
{
const DeviceContainer & dc = pHostContainer[i];
int * pData = dc.getDataHost();
for(unsigned int j = 0; j < dc.getSize(); ++j)
{
std::cout << pData[j];
}
std::cout << std::endl;
delete[] pData;
}
free(pHostContainer);
cudaSafeCall( cudaFree(pDevContainer) );
}
I have to use malloc for array creation on the host, since i don't want to have a default constructor for the DeviceContainer.
I try to access the data inside a container via getDataHost() which internally just calls cudaMemcpy.
cudaSafeCall and cudaCheckError are simple macros that evaluate the cudaError returned by the function oder actively poll the last error. For the sake of completeness:
#define cudaSafeCall(error) __cudaSafeCall(error, __FILE__, __LINE__)
#define cudaCheckError() __cudaCheckError(__FILE__, __LINE__)
inline void __cudaSafeCall(cudaError error, const char *file, const int line)
{
if (error != cudaSuccess)
{
std::cerr << "cudaSafeCall() returned:" << std::endl;
std::cerr << "\tFile: " << file << ",\nLine: " << line << " - CudaError " << error << ":" << std::endl;
std::cerr << "\t" << cudaGetErrorString(error) << std::endl;
system("PAUSE");
exit( -1 );
}
}
inline void __cudaCheckError(const char *file, const int line)
{
cudaError error = cudaDeviceSynchronize();
if (error != cudaSuccess)
{
std::cerr << "cudaCheckError() returned:" << std::endl;
std::cerr << "\tFile: " << file << ",\tLine: " << line << " - CudaError " << error << ":" << std::endl;
std::cerr << "\t" << cudaGetErrorString(error) << std::endl;
system("PAUSE");
exit( -1 );
}
}
I have 3 problems with this code:
If it is executed as presented here i recieve an "unspecified launch failure" of the kernel. The Nsight Debugger stops me on the line mp_dev_data = new int[m_sizeData]; (either in the constructor or the assignment operator) and reports several access violation on global memory. The number of violations appears to be random between 4 and 11 and they occur in non-consecutive threads but always near the upper end of the grid (block 85 and 86).
If i reduce numContainer to 10, the kernel runs smoothly, however, the cudaMamcpy in getDataHost() fails with an invalid argument error - even though mp_dev_data is not 0. (I suspect that the assignment is faulty and the memory has already been deleted by another object.)
Even though I would like to know how to correctly implement the DeviceContainer with proper memory management, in my case it would also be sufficient to make it non-copyable and non-assignable. However, I don't know how to properly fill the container-array in the kernel. Maybe something like
DeviceContainer dc(5);
memcpy(&pContainer[offset], &dc, sizeof(DeviceContainer));
Which would lead to problems with deleting mp_dev_data in the destructor. I would need to manually manage memory deletion which feels rather dirty.
I also tried to use malloc and free in kernel code instead of new and delete but the results were the same.
I am sorry that I wasn't able to frame my question in a shorter manner.
TL;DR: How to implement a class that dynamically allocates memory in a kernel and can also be used in host code? How can I initialize an array in a kernel with objects that can not be copied or assigned?
Any help is appreciated. Thank You.
Apparently the answer is: What I am trying to do is more or less impossible.
Memory allocated with new or malloc in the kernel is not placed in global memory but rather in a special heap memory which is inaccessible from the host.
The only option to access all memory on the host is to first allocate an array in global memory which is big enough to hold all elements on the heap and then write a kernel that copies all elements from the heap to global memory.
The access violation are caused by the limited heap size (which can be changed by cudaDeviceSetLimit(cudaLimitMallocHeapSize, size_t size).

cant figure out how to acces these elements

this is my header
#include <SDL.h>
class Grid
{
public:
int** Cells;
int x;
int y;
SDL_Color* palette[255];
Grid(int,int,int);
~Grid();
void DrawGrid(SDL_Renderer*);
void SetPalette(int c, int r, int g, int b, int a);
};
and this is my source:
Grid::Grid(int a,int b,int s)
{
std::cout << "grid constructed";
x = a;
y = b;
Grid::Cells = (int**) malloc(x*s);
for (int i = 0;i < x;i++)
{
Grid::Cells[i] = (int*)malloc(y*s);
}
SetPalette(1, 255, 255, 255, 0);
}
void Grid::DrawGrid(SDL_Renderer* renderer)
{
std::cout << Grid::palette[Cells[i][o]].r << " : " << Cells[i][o];
SDL_SetRenderDrawColor(renderer, palette[Cells[i][o]].r, palette[Cells[i][o]].g, palette[Cells[i][o]].b, palette[Cells[i][o]].a);
SDL_RenderDrawPoint(renderer, i, o);
}
void Grid::SetPalette(int c, int r, int g, int b, int a)
{
palette[c].r = r;
i have this for green blue and alpha too
}
it says expression must have class type. how do i fix
i have tried hard to figure it out. so i hope i get an answer at least
i did remove some of the irellevant code so it wouldn't take too much space
You did not allocate memory for your palette elements. Without modifying data layout (which is bad, see below), you'll need at least allocate elements in your constructor (prior to SetPalette):
for(int i = 0; i != 255; i++) {
palette[i] = new SDL_Color;
}
(you will also need to release this memory e.g. in destructor).
With palette declared as SDL_Color* palette[255];, expression palette[c] have type SDL_Color*. Accessing structure field with . operator needs structure, not a pointer - so direct solution is palette[c]->r (or manually dereferencing and using ., but that's exactly what -> does).
However allocating a lot of so small objects have a relatively high cost and in given example there is no point to do so. If your palette size is constant (as it is) you could just use SDL_Color palette[255] and remove all allocation/deallocation code (and there will be no need for -> since type of palette[c] is now SDL_Color). If size isn't known at compile time - you can allocate array of colours with a single allocation (malloc or new[]). And if size is changing during runtime, it is probably easier to use vector.