I am trying to do a large matrix multiplication, e.g. 1000x1000. Unfortunately, it only works for very small matrices. For the big ones, the program just turns on and that's all - no results. Here's the code:
#include <iostream>
using namespace std;
int main() {
int matrix_1_row;
int matrix_1_column;
matrix_1_row = 10;
matrix_1_column = 10;
int** array_1 = new int* [matrix_1_row];
// dynamically allocate memory of size matrix_1_column for each row
for (int i = 0; i < matrix_1_row; i++)
{
array_1[i] = new int[matrix_1_column];
}
// assign values to allocated memory
for (int i = 0; i < matrix_1_row; i++)
{
for (int j = 0; j < matrix_1_column; j++)
{
array_1[i][j] = 3;
}
}
int matrix_2_row;
int matrix_2_column;
matrix_2_row = 10;
matrix_2_column = 10;
// dynamically create array of pointers of size matrix_2_row
int** array_2 = new int* [matrix_2_row];
// dynamically allocate memory of size matrix_2_column for each row
for (int i = 0; i < matrix_2_row; i++)
{
array_2[i] = new int[matrix_2_column];
}
// assign values to allocated memory
for (int i = 0; i < matrix_2_row; i++)
{
for (int j = 0; j < matrix_2_column; j++)
{
array_2[i][j] = 2;
}
}
// Result
int result_row = matrix_1_row;
int result_column = matrix_2_column;
// dynamically create array of pointers of size result_row
int** array_3 = new int* [result_row];
// dynamically allocate memory of size result_column for each row
for (int i = 0; i < result_row; i++)
{
array_3[i] = new int[result_column];
}
// Matrix multiplication
for (int i = 0; i < matrix_1_row; i++)
{
for (int j = 0; j < matrix_2_column; j++)
{
array_3[i][j] = 0;
for (int k = 0; k < matrix_1_column; k++)
{
array_3[i][j] += array_1[i][k] * array_2[k][j];
}
}
}
//RESULTS
for (int i = 0; i < result_row; i++)
{
for (int j = 0; j < result_column; j++)
{
std::cout << array_3[i][j] << "\t";
}
}
// deallocate memory using delete[] operator 1st matrix
for (int i = 0; i < matrix_1_row; i++)
{
delete[] array_1[i];
}
delete[] array_1;
// deallocate memory using delete[] operator 2nd matrix
for (int i = 0; i < matrix_2_row; i++)
{
delete[] array_2[i];
}
delete[] array_2;
// deallocate memory using delete[] operator result
for (int i = 0; i < result_row; i++)
{
delete[] array_3[i];
}
delete[] array_3;
return 0;
}
Anyone have an idea how to fix it? At what point did I go wrong? I used pointers, dynamic memory allocation.
Instead of working with arrays directly named as matrix, try something simple and scalable, then optimize. Something like this:
class matrix
{
private:
// sub-matrices
std::shared_ptr<matrix> c11;
std::shared_ptr<matrix> c12;
std::shared_ptr<matrix> c21;
std::shared_ptr<matrix> c22;
// properties
const int n;
const int depth;
const int maxDepth;
// this should be shared-ptr too. Too lazy.
int data[16]; // lowest level matrix = 4x4 without sub matrix
// multiplication memory
std::shared_ptr<std::vector<matrix>> m;
public:
matrix(const int nP=4,const int depthP=0,const int maxDepthP=1):
n(nP),depth(depthP),maxDepth(maxDepthP)
{
if(depth<maxDepth)
{
// allocate c11,c22,c21,c22
// allocate m1,m2,m3,...m7
}
}
// matrix-matrix multiplication
matrix operator * (const matrix & mat)
{
// allocate result
// multiply
if(depth!=maxDepth)
{
// Strassen's multiplication algorithm
*m[0] = (*c11 + *c22) * (*mat.c11 + *mat.c22);
...
*m[6] = (*c12 - *c22) * (*mat.c21 + *mat.c22);
*c11 = *m[0] + *m[3] - *m[4] + *m[6];
..
*c22 = ..
}
else
{
// innermost submatrices (4x4) multiplied normally
result.data[0] = data[0]*mat.data[0] + ....
...
result.data[15]= ...
}
return result;
}
// matrix-matrix adder
matrix operator + (const matrix & mat)
{
// allocate result
// add
if(depth!=maxDepth)
{
*result.c11 = *c11 + *mat.c11;
*result.c12 = *c12 + *mat.c12;
*result.c21 = *c21 + *mat.c21;
*result.c22 = *c22 + *mat.c22;
}
else
{
// innermost matrix
result.data[0] = ...
}
return result;
}
};
This way, it costs less time-complexity and still looks simple to read. After it works, you can use single-block of matrix array inside of class to optimize for more speed, preferably only allocating once at root matrix and use
std::span
for access from submatrices for newer C++ versions. It is even parallelizable easily as each matrix can distribute its work to at least 4 threads and they can to 16 threads, 64 threads, etc. But of course too many threads are just as bad as too many allocations and should be optimized in a better way.
Related
Unfortunately I have to use arrays in order to use another function I have copied. Changing this function to work with vectors would be way over my head. So I wrote a function declaring me bunch of arrays in heap to be stored inside a vector.
I now have trouble freeing up that memory at the end.
void _get_X_Y_arrays(std::vector<std::vector<float> > *voronoi, std::vector<std::vector<int*> > *rtrn)
{
int numberPolygons = voronoi->size();
for (int i = 0; i < numberPolygons; i++)
{
int *x_heap = new int[((*voronoi)[i].size()) / 2];
int *y_heap = new int[((*voronoi)[i].size()) / 2];
std::vector<int> x(((*voronoi)[i].size()) / 2);
std::vector<int> y(((*voronoi)[i].size()) / 2);
unsigned j = 0;
int count = 0;
for (; j < (*voronoi)[i].size(); j += 2, count++)
{
x[count] = (int)(*voronoi)[i][j];
y[count] = (int)(*voronoi)[i][j + 1];
}
std::copy(x.begin(), x.end(), &x_heap[0]);
std::copy(y.begin(), y.end(), &y_heap[0]);
(*rtrn)[i].push_back(x_heap);
(*rtrn)[i].push_back(y_heap);
}
}
The function works well and everything acts like intended. I wrote another function to free up that memory at the end when it's no longer needed:
void _cleanup(std::vector<std::vector<int*> > *rtrn)
{
for (unsigned i = 0; i < rtrn->size(); i++)
{
for (unsigned j = 0; j < (*rtrn)[i].size(); j++)
{
delete[] rtrn[i][j][0];
delete[] rtrn[i][j][1];
}
}
}
Unfortunately this causes the program to crash. I don't really know where the error is. It feels like there might be an vector out of scope ..?
Just by looking at it and playing with it I'm not able to solve this. What am I doing wrong?
I think you have 3 dimensions array [nbpolygons][2][nbpoints]
Your code :
delete[] rtrn[i][j][0]; // delete rtrn[i][j] index 0
delete[] rtrn[i][j][1]; // delete rtrn[i][j] (the same array) index 1
// => crash
rtrn[i].size() always egal 2
Do :
void _cleanup(std::vector<std::vector<int*> >& rtrn)
{
for (unsigned i = 0; i < rtrn.size(); i++)
{
for (unsigned j = 0; j < rtrn[i].size(); j++)
{
delete[] rtrn[i][j];
}
}
}
or
void _cleanup(std::vector<std::vector<int*> >& rtrn)
{
for (unsigned i = 0; i < rtrn.size(); i++)
{
delete[] rtrn[i][0];
delete[] rtrn[i][1];
}
}
I am working on a Matrix class for a CS project, and I'm trying to work on the constructors. The project calls for two different constructors, one just calling out the numbers of rows and columns and making them all 0 and another using an initializer list to assign the values. The header file so far is:
typedef unsigned int uint;
typedef std::initializer_list<std::initializer_list<double>> i_list;
class Matrix {
public:
double ** arr;
uint mainRows;
uint mainCols;
Matrix(uint rows, uint cols);
Matrix(const i_list & list);
Matrix(const Matrix & m);
~Matrix();
};
Some of the test cases require you to both define the rows and use the initializer list, for example:
Matrix d(2,2);
d = {{1,2},{3,4}};
But I noticed that every time I try and run this kind of code, the destructor will immediately delete the double ** arr which is where the values for the Matrix's are stored. Here is the code for the constructors:
Matrix::Matrix(uint rows, uint cols)
{
mainRows = rows;
mainCols = cols;
arr = new double*[rows];
for (int i = 0; i < mainRows; i++) {
arr[i] = new double[cols];
}
for (int i = 0; i < mainRows; i++) {
for (int j = 0; j < mainCols; j++) {
arr[i][j] = 0;
}
}
}
Matrix::Matrix(const i_list & list)
{
int i = 0, j = 0;
mainRows = list.size();
mainCols = (*list.begin()).size();
arr = new double*[mainRows];
for (std::initializer_list<double> I : list) {
j = 0;
arr[i] = new double[mainCols];
for (double d : I) {
arr[i][j] = d;
j++;
}
i++;
}
}
Matrix::Matrix(const Matrix & m)
{
this->arr = m.arr;
this->mainRows = m.mainRows;
this->mainCols = m.mainCols;
for (uint i = 0; i < mainRows; i++) {
for (uint j = 0; j < mainCols; j++) {
this->arr[i][j] = m.arr[i][j];
}
}
}
Matrix::~Matrix()
{
for (uint i = 0; i < mainRows; i++) {
delete[] arr[i];
}
delete[] arr;
}
I guess since its calling a constructor for the same object twice it's creating two double ** ars and that's why the Destructor want's to delete the original, but then I can't call on the values for other functions. Can somebody help me out with what I'm doing wrong?
The problem is that your copy-constructor only copies the pointer of the source object, not allocates new memory.
This is problematic because
d = {{1,2},{3,4}};
creates a temporary object out of {{1,2},{3,4}}. You statement is actually equal to
d = Matrix({{1,2},{3,4}});
which is equal to
d.operator=(Matrix({{1,2},{3,4}}));
After the assignment is made, you have two objects pointing to the same memory for arr. And then the temporary object is destructed, leading to arr inside d to become invalid, as it no longer points to allocated memory.
The naive solution is simple: Allocate memory for arr to point to in the copy-constructor. The better solution is to stop using pointers and dynamic allocation, and instead use std::vector, and live by the rule of zero, where you don't need any copy-constructor, no copy-assignment operator and no destructor.
This is wrong:
Matrix::Matrix(const Matrix & m)
{
this->arr = m.arr;
this->mainRows = m.mainRows;
this->mainCols = m.mainCols;
for (uint i = 0; i < mainRows; i++) {
for (uint j = 0; j < mainCols; j++) {
this->arr[i][j] = m.arr[i][j];
}
}
}
Note yuo do not creating actual copy here. this->arr = m.arr; makes both pointers to pointing same part of memory so new and old instance of Matrix are sharing this memory. So flowing for loops does nothing.
Than when one of instances is destroyed the other instance is pointing to memory which is freed.
My code has a 4D matrix in it for some math problem solving
int**** Sads = new int***[inputImage->HeightLines];
for (size_t i = 0; i < inputImage->HeightLines; i++)
{
Sads[i] = new int**[inputImage->WidthColumns];
for (size_t j = 0; j < inputImage->WidthColumns; j++)
{
Sads[i][j] = new int*[W_SIZE];
for (size_t k = 0; k < W_SIZE; k++)
{
Sads[i][j][k] = new int[W_SIZE];
}
}
}
//do something with Sads...
for (int i = 0; i < inputImage->HeightLines; i++)
{
int*** tempI = Sads[i];
for (int j = 0; j < inputImage->WidthColumns; j++)
{
int** tempJ = tempI[j];
for (int k = 0; k < W_SIZE; k++)
{
delete[] tempJ[k];
}
delete[] Sads[i][j];
}
delete[] Sads[i];
}
delete[] Sads;
The sizes are very large WidthColumns = 2018, HeightLines = 1332, W_SIZE =7, the memory allocation is very fast but the memory deallocation (delete) is very slow.
Is there a way to optimize it?
I tired openMP but it throws unrelated errors of missing DLL which are there... if I removed the #pragma omp parallel for everything works fine. but slow...
Using a pointer to a pointer to... is a bad idea because it will fragment your data a lot.
I would create a class ta manage the indices transform and use 1D array, it's a bit more complicated but it will be faster.
Anyway, a trick: nothing prevent you to build your int**** with pointers to a zone in memory that isn't sparse (1D array you preallocated) and then use it as a 4D array.
I'd probably be inclined to use a std::vector. Now memory allocation is taken care of for me (in one allocation/deallocation) and I get free copy/move semantics.
All I have to do is provide the offset calculations:
#include <vector>
#include <cstddef>
struct vector4
{
vector4(std::size_t lines, std::size_t columns)
: lines_(lines), columns_(columns)
, storage_(totalSize())
{}
auto totalSize() const -> std::size_t
{
return lines_ * columns_ * w_size * w_size;
}
int* at(std::size_t a)
{
return storage_.data() + (a * columns_ * w_size * w_size);
}
int* at(std::size_t a, std::size_t b)
{
return at(a) + (b * w_size * w_size);
}
int* at(std::size_t a, std::size_t b, std::size_t c)
{
return at(a, b) + (c * w_size);
}
int& at(std::size_t a, std::size_t b, std::size_t c, std::size_t d)
{
return *(at(a, b, c) + d);
}
private:
std::size_t lines_, columns_;
static constexpr std::size_t w_size = 32; // ?
std::vector<int> storage_;
};
int main()
{
auto v = vector4(20, 20);
v.at(3, 2, 5, 1) = 6;
// other things
// now let it go out of scope
}
The correct way to create, use, and delete a 4D array is this, using the closure of the statement group to delete the automatic variables.
{
const int H = 10;
const int I = 10;
const int J = 10;
const int K = 10;
int h = 0;
int i = 0;
int j = 0;
int k = 0;
int fourDimArray [H][I][J][K];
fourDimArray[h][i][j][k] = 0;
}
If you have a need to dynamically allocate, then use either STL's list or vector class or use something like this with perhaps inline methods to calculate the index of the 1D array from the 4D array indices if you need blazing speed.
int * fourDimArrayAsOneDim = new int[H*I*J*K];
fourDimArrayAsOneDim[indexFromIndices(h, i, j, k)] = 0;
delete [] fourDimArrayAsOneDim;
I'm building an image analysing program in c++. It takes in a text file which holds the values to build a grey scale image. I am using the sum of squared differences to find a specific block in this image.. This is built using a matrix class in a header file so I have two overloaded constructors and a destructor which deletes the pointer to the double which allocates memory on the heap for this huge array of values (768 x 1024). This however throws out a memory error; Debug assertion failed, expression: block type is valid. I can't fathom why this is happening.. To do the SSD calculation I use two for loops; two matrix objects are manipulated one of these amendments calls one of the constructors to create a new matrix object from the getting a block from a larger matrix object. I understand that the destructor is called twice through every loop as the objects go out of scope? Is this double deletion and why the error occurs? Below are my constructors and the loops. If anyone can see why I'm getting this error I'd be very happy.
Constructors:
// Matrix constructor creating a new matrix object where all elements are the same number
Matrix::Matrix(int sizeR, int sizeC, double val)
{
//cout << "Matrix(int sizeR, int sizeC, double val) is invoked.." << endl;
M = sizeR;
N = sizeC;
data = new double[M * N];// Initialise space for class array 'data'
for (int i = 0; i < M* N; i++)
{
data[i] = val;// Set each element of the array to the same value passed to the constructor from main
}
}
// Matrix constructor taking pointer to array as input; creates a new matrix object
Matrix::Matrix(int sizeR, int sizeC, double* input_data)
{
//cout << "Matrix::Matrix(int sizeR, int sizeC, double* input_data) is invoked...." << endl;
M = sizeR;
N = sizeC;
data = new double[M * N];// Initialise space for class array 'data'
for (int i = 0; i < M * N; i++)
{
data[i] = input_data[i];// Set elements in data as elements from input_data passed to the constructor from main
}
}
Destructor:
// Matrix destructor
Matrix::~Matrix()
{
//cout << "Matrix::~Matrix() is invoked..." << endl;
delete data;
}
Code in main:
for (int i = 0; i < (768 - 21); i++)
{
for (int j = 0; j < (1024 - 21); j++)
{
counter++;
clutteredBlock = cluttered.getBlock(i, (i + 21), j, (j + 21));
diff = clutteredBlock - wallyBlock;
diff = diff * diff;
tempVal = diff.Sum();
if (i == 0 && j == 0)
{
ssd = tempVal;
}
if (tempVal <= ssd)
{
ssd = tempVal;
co1 = i;
co2 = j;
}
}
}
So M, N and data are all private class members; M and N are int and data is a double*; data being the pointer I'm trying to delete and getting nowhere with.
UPDATE: If I ignore the error I am then given a HEAP CORRUPTION error saying that I am trying to write to the heap after the buffer?
UPDATE: Assignment Operator;
Matrix& Matrix::operator=(const Matrix& input)
{
//cout << "Matrix::operator= is invoked..." << endl;
if (this == &input)
{
return *this;
}
else
{
delete data;
M = input.getR();
N = input.getC();
data = new double[M * N];
for (int i = 0; i < M; i++)
{
for (int j = 0; j < N; j++)
{
Set(i, j, input.Get(i, j));
}
}
}
return *this;
}
Any input is greatly appreciated :)
Use a std::vector for your storage. It handles allocation and deallocation automatically. Problem solved.
I have a two dimensional array that I've allocated dynamically using new.
The problem is I want to allocate the memory as one connected block instead of in separated pieces to increase processing speed.
Does anyone know if it's possible to do this with new, or do I have to use malloc?
Here's my code:
A = new double*[m];
for (int i=0;i<m;i++)
{
A[i]= new double[n];
}
This code causes a segmentation fault
phi = new double**[xlength];
phi[0] = new double*[xlength*ylength];
phi[0][0] = new double[xlength*ylength*tlength];
for (int i=0;i<xlength;i++)
{
for (int j=0;j<ylength;j++)
{
phi[i][j] = phi[0][0] + (ylength*i+j)*tlength;
}
phi[i] = phi[0] + ylength*i;
}
You can allocate one big block and use it appropriately, something like this:
double* A = new double[m*n];
for (int i=0; i<m; i++) {
for (int j=0; j<n; j++) {
A[i*n+j] = <my_value>;
}
}
Instead of using new, you can use malloc - there is no much difference, except that new must be released with delete, and malloc() released with free().
UPDATE1:
You can create "true" 2d array as follows:
double** A = new double*[m];
double* B = new double[m*n];
for (int i=0; i<m; i++) {
A[i] = B + n*i;
}
for (int i=0; i<m; i++) {
for (int j=0; j<n; j++) {
A[i][j] = <my_value>;
}
}
Just be sure to release both A and B in the end.
UPDATE2:
By popular request, this is how you can create "true" 3-dimensional array (with dimensions m x n x o):
double*** A = new double**[m];
double** B = new double*[m*n];
double* C = new double[m*n*o];
for (int i=0; i<m; i++) {
for (int j=0; j<n; j++) {
B[n*i+j] = C + (n*i+j)*o;
}
A[i] = B + n*i;
}
for (int i=0; i<m; i++) {
for (int j=0; j<n; j++) {
for (int k=0; k<o; k++) {
A[i][j][k] = <my_value>;
}
}
}
This uses 2 relatively small "index" arrays A and B, and data array C. As usual, all three should be released after use.
Extending this for more dimensions is left as an exercise for the reader.
There is nothing you can do with malloc that you can't do with new (though the converse doesn't hold). However if you've already allocated the memory in separate blocks, you will have to allocate new (contiguous) memory in order to get a connected block (with either malloc or new). The code you show allocates m non-contiguous n-sized blocks. To get an array with contiguous memory from this, you would need
int MN = m*n;
B = new double[MN];
for (int i=0; i<MN; ++i)
B[i] = A[ i/N ] [ i%N ];
Ok, if the task is to maintain a single block of memory, but keep [][] way of addressing it, I'd try a few tricks with classes. The first one is an inside proxy:
class CoordProxy
{
private:
int coordX;
int arrayWidth;
int * dataArray;
public:
CoordProxy(int * newArray, int newArrayWidth, int newCoordX)
{
coordX = newCoordX;
arrayWidth = newArrayWidth;
dataArray = newArray;
}
int & operator [](int newCoordY)
{
return (dataArray[newCoordY * arrayWidth + coordX]);
}
};
class CoordsWrapper
{
private:
int * dataArray;
int width;
int height;
public:
CoordsWrapper(int * newArray, int newWidth, int newHeight)
{
dataArray = newArray;
width = newWidth;
height = newHeight;
}
CoordProxy operator[] (int coordX)
{
return CoordProxy(dataArray, width, coordX);
}
};
int main(int argc, char * argv[])
{
int * a = new int[4 * 4];
ZeroMemory(a, 4 * 4 * sizeof(int));
CoordsWrapper w(a, 4, 4);
w[0][0] = 10;
w[0][1] = 20;
w[3][3] = 30;
std::for_each(&a[0], &a[4 * 4], [](int x) { printf("%d ", x); });
delete[] a;
}
Note, that this is not time-efficient, but extremely memory efficient: uses 4 ints and 2 pointers more than original class.
There's even nicer and a lot faster solution, but you would have to resign from [][] notation in favor of (,) notation:
class CoordsWrapper2
{
private:
int * data;
int width;
int height;
public:
CoordsWrapper2(int * newData, int newWidth, int newHeight)
{
data = newData;
width = newWidth;
height = newHeight;
}
inline int & Data(int x, int y)
{
return data[y * width + x];
}
};
int main(int argc, char * argv[])
{
int * a = new int[4 * 4];
ZeroMemory(a, 4 * 4 * sizeof(int));
CoordsWrapper2 w(a, 4, 4);
w.Data(0, 0) = 10;
w.Data(0, 1) = 20;
w.Data(3, 3) = 30;
std::for_each(&a[0], &a[4 * 4], [](int x) { printf("%d ", x); });
delete[] a;
}
Note the inline directive. It suggests the compiler to replace the method call for actual source code, which make it a little faster. This solution is even more memory efficient and a either a tiny bit less or equally time efficient as classic indexing.