My specific case being a compute shader for doing a matrix multiplication between a n by m matrix and a n legnth vector (where n and m are not known at compile time).
Either use a 2 dimensional image (see glsl - 8.12. Image Functions):
layout(r32f) uniform image2D matrixImage;
void main()
{
// [...]
ivec2 size = ivec2(imageSize(matrixImage));
int n = size.x;
int m = size.y;
for (int j = 0; j < m; j ++)
{
for (int i = 0; i < n; i ++)
{
float val = imageLoad(matrixImage, ivec2(i, j)).x;
// [...]
}
}
// [...]
}
Or write the data to a 1 dimensional open sized array in a Shader Storage Buffer Object:
layout(std430) buffer TMatrix
{
int n;
int m;
float data[];
} matrix;
layout(r32f) uniform image2D matrixImage;
void main()
{
// [...]
for (int j = 0; j < matrix.m; j ++)
{
for (int i = 0; i < matrix.n; i ++)
{
int index = j * matrix.n + i;
float val = matrix.data[index];
// [...]
}
}
// [...]
}
If you know the size at compile time, it is not necessary to use an open sized array (see GLSL - 4.1.9. Arrays):
const int n = 10;
const int m = 10;
layout(std430) buffer TMatrix
{
float data[n][m];
} matrix;
void main()
{
// [...]
for (int j = 0; j < matrix.m; j ++)
{
for (int i = 0; i < matrix.n; i ++)
{
float val = matrix.data[i][j];
// [...]
}
}
// [...]
}
Related
I dont know why but my matrix multipication is very slow and I need to optimize it. and also the print of the matrix (1000X1000) taking long time.
The aim of the function is to calculate the matrix exponential, but my main problem is that this 2 actions are very slow for large matrices like 1000X1000.
These 2 actions implemented at poweMat() function and printeResult() function.
Here is the code:
#define M 1000
#define e 2.71828182845904523536;
//declaration of the functions
void sumMatrices(vector<vector<double> >& mat1, vector<vector<double> >& mat2);
void printResult(vector<vector<double> >&matRes);
void mulMatWithFactorial(long factorialValue);
long factorialCalculate(int n);
void initializeMatrix();
void initializeIdenticalMatrix();
void checkIfTheMatrixIsDiagonal();
void calculateExpoMatrixWithDiagonalMatrix();
void readMatrixFromFile();
void powerMat(vector<vector<double> >& mat, int powNum);
//declaration of the variables
vector<vector<double>> inputMatrix(M, vector<double>(M));
vector<vector<double>> sumMatrixResult(M, vector<double>(M));
vector<vector<double>> powerMatrixResult(M, vector<double>(M));
vector<vector<double>> mulFactorialMatrixResult(M, vector<double>(M));
vector<vector<double>> finalMatrixResult(M, vector<double>(M));
vector<vector<double>> identicalMatrix(M, vector<double>(M));
vector<vector<vector<double>>> listOfMatrices;
bool matrixIsNilpotent = false;
int diagonaMatrixlFlag = 1;
int main() {
//variables
long factorialValue;
initializeIdenticalMatrix();
readMatrixFromFile();
//check if the matrix is diagonal - so we will have easier and faster compute
checkIfTheMatrixIsDiagonal();
if (diagonaMatrixlFlag == 1) {
calculateExpoMatrixWithDiagonalMatrix();
goto endOfLoop;
}
//loop for taylor series
for (int i = 0; i < 5; i++) {
if (i == 0) { // first we add identical matrix when the power is 0
sumMatrices(finalMatrixResult, identicalMatrix); // summarize between this 2 matrices
finalMatrixResult = sumMatrixResult; //copy matrices
}
if (i == 1) { // we add the matrix itself because the power is 1
sumMatrices(finalMatrixResult, inputMatrix);
finalMatrixResult = sumMatrixResult; //copy matrices
}
if (i > 1 ) {
powerMat(inputMatrix, i);
if (matrixIsNilpotent) { // it means that A^i is 0 for some integer, so the series terminates after a finite number
goto endOfLoop;
}
factorialValue = factorialCalculate(i); // calculate the factorial of i
mulMatWithFactorial(factorialValue); // multiply (1/i) * matrix^i - like in the algorithm
sumMatrices(finalMatrixResult, mulFactorialMatrixResult); // summarize it with the previous result
finalMatrixResult = sumMatrixResult; //copy matrices
}
}
endOfLoop:
printResult(finalMatrixResult); // print the final result - e^M
return 0;
}
//Summarize matrices
void sumMatrices(vector<vector<double> >& mat1, vector<vector<double> >& mat2) {
for (int i = 0; i < M; i++)
for (int j = 0; j < M; j++)
sumMatrixResult[i][j] = mat1[i][j] + mat2[i][j];
}
//Print matrix
void printResult(vector<vector<double> >& matRes) {
for (int i = 0; i < M; i++) {
for (int j = 0; j < M; j++) {
printf("%f ", matRes[i][j]);
if (j == M - 1) {
printf("\n");
}
}
}
}
//Calculate the factorial of n
long factorialCalculate(int n) {
long factorial = 1.0;
for (int i = 1; i <= n; ++i) {
factorial *= i;
}
return factorial;
}
// mutiply the matrix with scalar
void mulMatWithFactorial(long factorialValue) {
for (int i = 0; i < M; i++) {
for (int j = 0; j < M; j++) {
mulFactorialMatrixResult[i][j] = powerMatrixResult[i][j] * 1/factorialValue;
}
}
}
//initialize matrix
void initializeMatrix() {
for (int i = 0; i < M; i++) {
for (int j = 0; j < M; j++) {
powerMatrixResult[i][j] = 0;
}
}
}
void checkIfTheMatrixIsDiagonal() {
for (int i = 0; i < M; i++) {
for (int j = 0; j < M; j++) {
if (i == j)
{
if (inputMatrix[i][j] == 0) {
diagonaMatrixlFlag = 0;
goto endOfLoop;
}
}
else
{
if (inputMatrix[i][j] != 0) {
diagonaMatrixlFlag = 0;
goto endOfLoop;
}
}
}
}
endOfLoop:
return;
}
void calculateExpoMatrixWithDiagonalMatrix() {
for (int i = 0; i < M; i++) {
for (int j = 0; j < M; j++) {
if (i == j)
{
for (int k = 0; k < inputMatrix[i][j]; ++k)// loop to calculate the pow of e^alpha
{
finalMatrixResult[i][j] *= e;
}
}
else
{
finalMatrixResult[i][j] = 0;
}
}
}
}
void readMatrixFromFile() {
ifstream f("inv_matrix(1000x1000).txt");
for (int i = 0; i < M; i++)
for (int j = 0; j < M; j++) {
f >> inputMatrix[i][j];
if (f.peek() == ',')
f.ignore();
}
listOfMatrices.push_back(inputMatrix);
}
void initializeIdenticalMatrix() {
for (int i = 0; i < M; i++) {
for (int k = 0; k < M; k++) {
if (i == k) {
identicalMatrix[i][k] = 1;
}
else {
identicalMatrix[i][k] = 0;
}
}
}
}
void powerMat(vector<vector<double> >& mat, int powNum) {
int counterForNilpotent = 0;
initializeMatrix();
auto start = high_resolution_clock::now();
for (int i = 0; i < M; i++) {
for (int k = 0; k < M; k++) {
for (int j = 0; j < M; j++) {
powerMatrixResult[i][j] += mat[i][k] * listOfMatrices[powNum-2][k][j];
}
}
}
auto stop = high_resolution_clock::now();
auto duration = duration_cast<seconds>(stop - start);
cout << duration.count() << " seconds" << endl; // checking run time
listOfMatrices.push_back(powerMatrixResult);
// check if after we we did A^i , the matrix is equal to 0
for (int i = 0; i < M; i++) {
for (int j = 0; j < M; j++) {
if (powerMatrixResult[i][j] == 0) {
counterForNilpotent++;
}
}
}
if (counterForNilpotent == M * M) {
matrixIsNilpotent = true;
}
}
Going through each element of an array of size "n" will have some computational efficiency of O(n^2), meaning for large arrays it will take a while but won't be "life-time-of-the-universe" lengths of time.
Usually to do operations on massive arrays like this, they're reduced in some form first so that the computation can be closer to O(n) or better using some truths about reduced forms of matrices.
So, a faster implementation for matrix multiplication would start with some rref() function upon both matrices and then only evaluating parts of those matrices that would have objects in the columns and rows.
Here are some great places to review/learn (for free) Linear Algebra:
"3b1b (2016): Essence of Linear Algebra" = https://www.youtube.com/watch?v=kjBOesZCoqc&list=PL0-GT3co4r2y2YErbmuJw2L5tW4Ew2O5B
"MIT OpenCourseWare (2009): Linear Algebra" = https://www.youtube.com/watch?v=ZK3O402wf1c&list=PL49CF3715CB9EF31D&index=1
Use SSE2. It’s not a library. It’s a method to use cpu vector hardware.
You set up operations to run in parallel.
https://en.wikipedia.org/wiki/SSE2
So I got a function which creates me 2D array and fill it with test data.
Now I need to assign the pointer to an array
//Fill matrix with test data
int *testArrData(int m, int n){
int arr[n][m];
int* ptr;
ptr = &arr[0][0];
for(int i = 0; i < m; i++){
for(int j = 0; j < n; j++){
*((ptr+i*n)+j) = rand()%10;
}
}
return (int *) arr;
}
int arr[m][n];
//Algorithm - transpose
for (int i = 0; i < m; i++){
for (int j = 0; j < n; j++){
arrT[j][i] = arr[i][j];
}
}
Is there any way of doing this?
There are at least four problems with the function.
//Fill matrix with test data
int *testArrData(int m, int n){
int arr[n][m];
int* ptr;
ptr = &arr[0][0];
for(int i = 0; i < m; i++){
for(int j = 0; j < n; j++){
*((ptr+i*n)+j) = rand()%10;
}
}
return (int *) arr;
}
First of all you declared a variable length array
int arr[n][m];
Variable length arrays are not a standard C++ feature.
The second problem is that these for loops
for(int i = 0; i < m; i++){
for(int j = 0; j < n; j++){
*((ptr+i*n)+j) = rand()%10;
}
}
are incorrect. It seems you mean
for(int i = 0; i < n; i++){
for(int j = 0; j < m; j++){
*((ptr+i*m)+j) = rand()%10;
}
}
You are returning a pointer to a local array with automatic storage duration that will not be alive after exiting the function. So the returned pointer will be invalid.
And arrays do not have the assignment operator.
Instead use the vector std::vector<std::vector<int>>. For example
std::vector<std::vector<int>> testArrData(int m, int n){
std::vector<std::vector<int>> v( n, std::vector<int>( m ) );
for ( auto &row : v )
{
for ( auto &item : row )
{
item = rand() % 10;
}
}
return v;
}
This is how I would accomplish this. I agree with int ** because it is easy to understand if you dont know how to use vectors. Also, the rand() can cause trouble if you are using the result to index an array. Make sure to use abs(rand() % number) if you don't want negative numbers.
I've updated the answer due to some vital missing code.
// This method creates the overhead / an array of pointers for each matrix
typedef int* matrix_cells;
int **create_row_col_matrix(int num_rows, int num_cols, bool init_rnd)
{
num_rows = min(max(num_rows, 1), 1000); // ensure num_rows = 1 - 1000
num_cols = min(max(num_cols, 1), 1000); // ensure num_cols = 1 - 1000
int *matrix_total = new int[num_rows*num_cols];
// overhead: create an array that points to each row
int **martix_row_col = new matrix_cells[num_rows];
// initialize the row pointers
for (int a = 0; a < num_rows; ++a)
{
// initialize the array of row pointers
matrix_row_col[a] = &matrix_total[num_cols*a];
}
// assign the test data
if (init_rnd)
{
for (int run_y = 0; run_y < num_rows; ++run_y)
{
for (int run_x = 0; run_x < num_cols; ++run_x)
{
matrix_row_col[run_y][run_x] = abs(rand() % 10);
}
}
}
return matrix_row_col;
}
int src_x = 7, dst_x = 11;
int src_y = 11, dst_y = 7;
int **arr_src = create_row_col_matrix(src_y, src_x, true);
int **arr_dst = create_row_col_matrix(dst_y, dst_x, false);
for (int a = 0; a < dst_y; ++a)
{
for (int b = 0; b < dst_x; ++b)
{
arr_dst[a][b] = arr_src[b][a];
}
}
delete matrix_src[0]; // int *matrix_total = new int[src_y*src_x]
delete matrix_src; // int **matrix_row_col = new matrix_cell[src_y]
delete matrix_dst[0]; // int *matrix_total = new int[dst_y*dst_x]
delete matrix_dst; // int **matrix_row_col = new matrix_cell[dst_y]
// the overhead is matrix_src and matrix_dst which are arrays of row pointers
// the row pointers makes it convenient to address the cells as [rown][coln]
First of all, I made this class.
class Matrix
{
public:
double ele[4][4];
int numOfRow;
int numOfColumns;
public:
Matrix() {
numOfRow = 0;
numOfColumns = 0;
ele[4][4] = 0;
}
Matrix(double mat[][4], int Row, int Col) {
numOfRow = Row;
numOfColumns = Col;
for (int i = 0; i < numOfRow; i++) {
for (int j = 0; i < numOfColumns; j++) {
ele[i][j] = mat[i][j];
}
}
}
Matrix Add(Matrix m) {
Matrix output;
for (int i = 0; i < numOfRow; i++) {
for (int j = 0; j < numOfColumns; j++) {
output.ele[i][j] = ele[i][j] + m.ele[i][j];
}
}
return output;
}
Matrix Subtract(Matrix m);
Matrix Multiply(Matrix m);
Matrix Transpose(void);
};
This is part of the main function. In this way, I'm going to bring up the values of the txt files that I've already made in matA and matB and replace them. It's just a process of putting numbers in.
double matA[4][4];
for (int i = 0; i < RowA; i++) {
for (int j = 0; j < ColA; j++) {
fscanf(fpInput, "%lf", &matA[i][j]);
}
}
double matB[4][4];
for (int i = 0; i < RowB; i++) {
for (int j = 0; j < ColB; j++) {
fscanf(fpInput, "%lf", &matB[i][j]);
}
}
And we substitute matrixA and matrixB class objects, respectively.
Matrix matrixA(matA, RowA, ColA);
Matrix matrixB(matB, RowB, ColB);
I tried substitute Value obtained by 'Add' function into class object called matrixO. but, The substituted values did not work smoothly. For example, if matrixA contains (1, 2, 3) in order and matrixB has (4, 5, 6), then the 'add function' requires that the array of matrixO contains (5, 7, 9), but it does not. The value of the matrixO.ele is not output at all.
Matrix matrixO = matrixA.Add(matrixB);
for (int i = 0; i < RowA; i++) {
for (int j = 0; j < ColA; j++) {
fprintf(fpOutput, "%lf ", matrixO.ele[i][j]);
printf("%lf", matrixO.ele[i][j]);
}
fprintf(fpOutput, "\n");
}
In the Matrix constructor section, I changed it like this.
public:
Matrix() {
numOfRow = 0;
numOfColumns = 0;
ele[4][4] = ele[0][0];
}
public:
Matrix() {
numOfRow = 0;
numOfColumns = 0;
ele[4][4] = {};
}
But both of these cases are wrong. How do we solve this issue?
You are assigning a value to your matrix out-of-bounds:
ele[4][4] = 0;
The last element of double ele[4][4]; is ele[3][3];
This is undefined behavior, so it makes no sense to analyze what happens after it.
You can 0-initialize your Matrix in its constructor like this:
Matrix(): ele(), numOfRow(), numOfColumns() {}
My task is to generate a square matrix of zeros in a function and return it. There are plenty ways to do this, but I decided not to go with returning the matrix by value for efficiency. I went for a pointer approach like in this answer, but since it requires manual cleaning memory (and also as far as I know it's better to use smart pointers), I decided to turn it into std::unique_ptr, but I can't get it to work. This is my code:
#include <iostream>
#include <memory>
std::unique_ptr<std::unique_ptr<int>[] > GenerateMatrix(const int &n) {
std::unique_ptr<std::unique_ptr<int>[] > matrix(new std::unique_ptr<int>[n]);
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
matrix[i].get()[j] = 0;
}
}
return matrix;
}
int main() {
int n = 4;
auto matrix = GenerateMatrix(n);
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
std::cout<<matrix[j].get()[i]<<" ";
}
std::cout<<std::endl;
}
return 0;
}
What do I do wrong here? Is this approach even correct?
Why not just make your life easier by
vector<vector<int>> generate (int m, int n)
{
return vector<vector<int>>(m ,vector<int>(n));
}
int main()
{
int m = 3, n = 4;
auto matrix = generate(m, n); // a 3-by-4 matrix of zeros
return 0;
}
Just rely on guarenteed copy elision or return value optimization:
std::vector<int> GenerateMatrix(const int &n) {
return std::vector<int>(n*n, 0);//, 0 can be omitted (as elements will then be zero-initialized)
}
You might create and initialize a matrix at compile time. For example:
template<int RowCount, int ColumnCount, int DefaultValue = 0>
struct Matrix
{
static_assert(RowCount >= 0 && ColumnCount >=0,
"The number of rows and columns should be positive");
struct Row
{
int column[ColumnCount] = { DefaultValue };
};
Row row[RowCount];
};
And use it like:
Matrix<2, 2, 33> matrix;
auto val = matrix.row[0].column[0]; // val == 33
matrix.row[0].column[0] = 55;
val = matrix.row[0].column[0]; // val == 55
Beware the matrix dimensions, when refer to its elements by row and column.
You are not allocating enough memory for your matrix. Change this line:
std::unique_ptr<std::unique_ptr<int>[] > matrix(new std::unique_ptr<int>[n*n]);
Also, I would just use i*n + j for your accesses since you are really dealing with a 1D array:
#include <iostream>
#include <memory>
std::unique_ptr<std::unique_ptr<int>[] > GenerateMatrix(const int &n) {
std::unique_ptr<std::unique_ptr<int>[] > matrix(new std::unique_ptr<int>[n*n]);
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
matrix.get()[i*n+j] = 0;
}
}
return matrix;
}
int main() {
int n = 4;
auto matrix = GenerateMatrix(n);
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
std::cout<<matrix.get()[i*n+j]<<" ";
}
std::cout<<std::endl;
}
return 0;
}
I need to implement a matrix transpose procedure in C++.
The problem is the signature, the function has to be called like this:
transpose(in_mat[0][0], n, m, out_mat[0][0])
where n and m are the dimensions.
All values are doubles, both the matrices and the dimensions.
Since the code is automatically generated, I can't fix this.
My workaround looks like this:
void transpose(double& in_mat, const double _n, const double _m, double& out_mat)
{
int n = _n, m = _m;
double* in_pointer= &in_mat;
double* out_pointer= &out_mat;
for (int i = 0; i < n; i++) {
for (int j = 0; j < m; j++) {
*(out_pointer+(j*n+i)) = *(in_pointer+(i*m + j));
}
}
}
It works fine.
I've constructed a test case with two matrices of different width and height. One is filled with random numbers, the other is filled with zeros. Then the transpose procedure is called and the two matrices are compared.
The functionality is correct.
But it corrupts the stack. When run in Visual Studio 2015 there is a warning
Run-Time Check Failure #2 - Stack around the variable 'in_mat' was corrupted.
What did I do wrong ? Why is the stack corrupted ?
Code after the invocation of transpose works correctly.
EDIT:
Here is the complete setup:
#include <random>
#include <iostream>
void transpose(double& in_mat, const double _n, const double _m, double& out_mat)
{
int n = _n, m = _m;
double* in_pointer = &in_mat;
double* out_pointer = &out_mat;
for (int i = 0; i < n; i++) {
for (int j = 0; j < m; j++) {
*(out_pointer+(j*n+i)) = *(in_pointer+(i*m + j));
}
}
}
int main()
{
double in_mat[5][4];
double out_mat[4][5];// assign matrix
for (int i = 0; i < 4; i++) {
for (int j = 0; j < 5; j++) {
in_mat[i][j] = std::rand();
out_mat[j][i] = 0;
}
}
double n = 5;
double m = 4;
transpose(in_mat[0][0], n, m, out_mat[0][0]);
for (int i = 0; i < n; i++) {
for (int j = 0; j < m; j++) {
if (in_mat[i][j] - out_mat[j][i]>0.0001) {
std::cout << "code is broken" << std::endl; //never reached
}
}
}
std::cout << "finished" << std::endl;
}
Your subscripts (or loop limits) were backwards where you initialized the matrices.
You have
double in_mat[5][4];
double out_mat[4][5];// assign matrix
for (int i = 0; i < 4; i++) {
for (int j = 0; j < 5; j++) {
in_mat[i][j] = std::rand();
out_mat[j][i] = 0;
}
}
When j==4 you are writing beyond the end of out_mat