My code is realy slow and i need optimization problem - c++
#include <iostream>
#include <chrono>
using namespace std;
int main()
{
const unsigned int m = 200;
const unsigned int n = 200;
srand(static_cast<unsigned int>(static_cast<std::chrono::duration<double>
>(std::chrono::high_resolution_clock::now().time_since_epoch()).count()));
double** matrixa;
double** matrixb;
double** matrixc;
matrixa = new double* [m];
matrixb = new double* [m];
matrixc = new double* [m];
unsigned int max = static_cast<unsigned int>(1u << 31);
for (unsigned int i = 0; i < m; i++)
matrixa[i] = new double[n];
for (unsigned int i = 0; i < m; i++)
matrixb[i] = new double[n];
for (unsigned int i = 0; i < m; i++)
matrixc[i] = new double[n];
for (unsigned int i = 0; i < m; i++)
for (unsigned int j = 0; j < n; j++)
matrixa[i]
[j] = static_cast<double>(static_cast<double>(rand()) / max * 10);
for (unsigned int i = 0; i < m; i++)
for (unsigned int j = 0; j < n; j++)
matrixb[i]
[j] = static_cast<double>(static_cast<double>(rand()) / max * 10);
auto start = std::chrono::high_resolution_clock::now();
for (unsigned int i = 0; i < m; i++)
for (unsigned int j = 0; j < n; j++)
for (unsigned int k = 0; k < m; k++)
for (unsigned int l = 0; l < m; l++)
matrixc[i][j] += matrixa[k][l] * matrixb[l][k];
auto stop = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> time_diff = stop - start;
cout << "Czas wykonania programu " << time_diff.count() << " sekund." <<
endl;
for (unsigned int i = 0; i < m; i++)
delete[] matrixa[i];
for (unsigned int i = 0; i < m; i++)
delete[] matrixb[i];
for (unsigned int i = 0; i < m; i++)
delete[] matrixc[i];
delete[] matrixa;
delete[] matrixb;
delete[] matrixc;
return 0;
}
I have this code and I would like to optimize it, unfortunately I have absolutely no idea how to go about it. Maybe someone has an idea and would like to help me? I got to the point where the program for 400 arrays executes 105 seconds but it is still too much, I would like to optimize this code to run faster. I found OpenMP library and thread class but I don't know how to use it in my program.
Firstly, your matrix multiply algorithm is over complex than a normal one(Or it's just wrong), you may reference the wiki for a typical algorithm:
Input: matrices A and B
Let C be a new matrix of the appropriate size
For i from 1 to n:
For j from 1 to p:
Let sum = 0
For k from 1 to m:
Set sum ← sum + Aik × Bkj
Set Cij ← sum
Return C
There is a critical bug in your code, you haven't initialized the result matrix.
So the fixed code may like this:
#include <chrono>
#include <iostream>
using namespace std;
int main() {
const unsigned int m = 200;
const unsigned int n = 201;
const unsigned int p = 202;
srand(static_cast<unsigned int>(
static_cast<std::chrono::duration<double> >(
std::chrono::high_resolution_clock::now().time_since_epoch())
.count()));
double** matrixa;
double** matrixb;
double** matrixc;
matrixa = new double*[m];
matrixb = new double*[n];
matrixc = new double*[m];
unsigned int max = static_cast<unsigned int>(1u << 31);
for (unsigned int i = 0; i < m; i++) matrixa[i] = new double[n];
for (unsigned int i = 0; i < n; i++) matrixb[i] = new double[p];
for (unsigned int i = 0; i < m; i++) {
matrixc[i] = new double[p];
std::fill(matrixc[i], matrixc[i] + p, 0.0);
}
for (unsigned int i = 0; i < m; i++)
for (unsigned int j = 0; j < n; j++)
matrixa[i][j] =
static_cast<double>(static_cast<double>(rand()) / max * 10);
for (unsigned int i = 0; i < n; i++)
for (unsigned int j = 0; j < p; j++)
matrixb[i][j] =
static_cast<double>(static_cast<double>(rand()) / max * 10);
auto start = std::chrono::high_resolution_clock::now();
for (unsigned int i = 0; i < m; i++)
for (unsigned int j = 0; j < p; j++)
for (unsigned int k = 0; k < n; k++)
matrixc[i][j] += matrixa[i][k] * matrixb[k][j];
auto stop = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> time_diff = stop - start;
cout << "Czas wykonania programu " << time_diff.count() << " sekund." << endl;
for (unsigned int i = 0; i < m; i++) delete[] matrixa[i];
for (unsigned int i = 0; i < n; i++) delete[] matrixb[i];
for (unsigned int i = 0; i < m; i++) delete[] matrixc[i];
delete[] matrixa;
delete[] matrixb;
delete[] matrixc;
return 0;
}
Now it's much faster than the one in question.
It still can be faster with slightly modification:
#include <chrono>
#include <iostream>
using namespace std;
int main() {
const unsigned int m = 200;
const unsigned int n = 201;
const unsigned int p = 202;
srand(static_cast<unsigned int>(
static_cast<std::chrono::duration<double> >(
std::chrono::high_resolution_clock::now().time_since_epoch())
.count()));
double** matrixa;
double** matrixb;
double** matrixc;
matrixa = new double*[m];
matrixb = new double*[n];
matrixc = new double*[m];
unsigned int max = static_cast<unsigned int>(1u << 31);
for (unsigned int i = 0; i < m; i++) matrixa[i] = new double[n];
for (unsigned int i = 0; i < n; i++) matrixb[i] = new double[p];
for (unsigned int i = 0; i < m; i++) {
matrixc[i] = new double[p];
std::fill(matrixc[i], matrixc[i] + p, 0.0);
}
for (unsigned int i = 0; i < m; i++)
for (unsigned int j = 0; j < n; j++)
matrixa[i][j] =
static_cast<double>(static_cast<double>(rand()) / max * 10);
for (unsigned int i = 0; i < n; i++)
for (unsigned int j = 0; j < p; j++)
matrixb[i][j] =
static_cast<double>(static_cast<double>(rand()) / max * 10);
auto start = std::chrono::high_resolution_clock::now();
for (unsigned int i = 0; i < m; i++)
for (unsigned int k = 0; k < n; k++)
for (unsigned int j = 0; j < p; j++)
matrixc[i][j] += matrixa[i][k] * matrixb[k][j];
auto stop = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> time_diff = stop - start;
cout << "Czas wykonania programu " << time_diff.count() << " sekund." << endl;
for (unsigned int i = 0; i < m; i++) delete[] matrixa[i];
for (unsigned int i = 0; i < n; i++) delete[] matrixb[i];
for (unsigned int i = 0; i < m; i++) delete[] matrixc[i];
delete[] matrixa;
delete[] matrixb;
delete[] matrixc;
return 0;
}
This code is more cache-friendly, the explanation can be found here.
The code can still be improved by a parallel algorithm, to speed up the previous code with OpenMP, with only one line change:
Add we need to add the build option -fopenmp to compile it.
#include <chrono>
#include <iostream>
using namespace std;
int main() {
const unsigned int m = 200;
const unsigned int n = 201;
const unsigned int p = 202;
srand(static_cast<unsigned int>(
static_cast<std::chrono::duration<double> >(
std::chrono::high_resolution_clock::now().time_since_epoch())
.count()));
double** matrixa;
double** matrixb;
double** matrixc;
matrixa = new double*[m];
matrixb = new double*[n];
matrixc = new double*[m];
unsigned int max = static_cast<unsigned int>(1u << 31);
for (unsigned int i = 0; i < m; i++) matrixa[i] = new double[n];
for (unsigned int i = 0; i < n; i++) matrixb[i] = new double[p];
for (unsigned int i = 0; i < m; i++) {
matrixc[i] = new double[p];
std::fill(matrixc[i], matrixc[i] + p, 0.0);
}
for (unsigned int i = 0; i < m; i++)
for (unsigned int j = 0; j < n; j++)
matrixa[i][j] =
static_cast<double>(static_cast<double>(rand()) / max * 10);
for (unsigned int i = 0; i < n; i++)
for (unsigned int j = 0; j < p; j++)
matrixb[i][j] =
static_cast<double>(static_cast<double>(rand()) / max * 10);
auto start = std::chrono::high_resolution_clock::now();
#pragma omp parallel for
for (unsigned int i = 0; i < m; i++)
for (unsigned int k = 0; k < n; k++)
for (unsigned int j = 0; j < p; j++)
matrixc[i][j] += matrixa[i][k] * matrixb[k][j];
auto stop = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> time_diff = stop - start;
cout << "Czas wykonania programu " << time_diff.count() << " sekund." << endl;
for (unsigned int i = 0; i < m; i++) delete[] matrixa[i];
for (unsigned int i = 0; i < n; i++) delete[] matrixb[i];
for (unsigned int i = 0; i < m; i++) delete[] matrixc[i];
delete[] matrixa;
delete[] matrixb;
delete[] matrixc;
return 0;
}
It would be better to use std::vector rather than dynamically allocated arrays, the work is left for you.
Related
Assigning value to pointer with multithreading generates wrong result
void assign(short *inPtr, int ext[4], int sz[2]) { for (int j = ext[2]; j < ext[3]; j++) for (int i = ext[0]; i < ext[1]; i++) { inPtr[i + j * sz[1]] = 100; } } int main(int, char **) { int size[2] = {16, 16}; short ptr[256]; std::vector<std::thread> pool; int block = 2; int row = size[0] / block; int col = size[1] / block; for (size_t j = 0; j < row; j++) for (size_t i = 0; i < col; i++) { int ext[4] = {i * block, (i + 1) * block, j * block, (j + 1) * block}; pool.push_back(std::thread(assign, ptr, ext, size)); } for (size_t i = 0; i < row * col; i++) { pool[i].join(); } for (size_t i = 0; i < 256; i++) { std::cout << (double)ptr[i] << ", "; } std::cout << std::endl; } I'm trying to assign value to pointer ptr by using multithreading. The code above is supposed to assign 100 to all element of ptr, however, there are many 0 in the output. Why does it happen and how to fix it ?
dijkstra's algorithm using matrix in c++. what's wrong in this code?
#include <iostream> int n, m, v1, v2, weight; cin >> n >> m; int** graph = new int*[n]; int* distance = new int[n]; int* s = new int[n]; for (int i = 0; i < n; ++i) graph[i] = new int[n]; for (int i = 0; i < n; ++i) for (int j = 0; j < n; ++j) graph[i][j] = INT_MAX; for (int i = 0; i < m; ++i) { cin >> v1 >> v2 >> weight; graph[v1][v2] = weight; graph[v2][v1] = weight; } for (int i = 0; i < n; ++i) distance[i] = INT_MAX; for (int i = 0; i < n; ++i) distance[i] = graph[0][i]; for (int i = 0; i < n; ++i) s[i] = 0; distance[0] = 0; int min = INT_MAX; int vertex = 0; for (int j = 0; j < n-1; ++j){ min = INT_MAX; for (int i = 0; i < n; ++i) if (s[i] == 0 && min >= distance[i]) { vertex = i; min = distance[i]; } s[vertex] = 1; cout << vertex << " "; for (int i = 0; i < n; ++i) if (distance[i]>distance[vertex] + graph[vertex][i]) distance[i] = distance[vertex] + graph[vertex][i]; } for (int i = 0; i < n; ++i) cout << distance[i] << " "; cout << endl; return 0; } Hi. I'm making Dijkstra's algorithm using two-dimentional matrix.. but this code doesn't work. and i don't know why! Can you fix my problem?? i want to make output all distance of graph. but output is looks like array point garbage value like -2345... Can you help me??
There are some problems in this loop: for (int i = 0; i < n; ++i) if (distance[i]>distance[vertex] + graph[vertex][i]) distance[i] = distance[vertex] + graph[vertex][i]; should change to the blew code: for (int i = 0; i < n; ++i) if (s[i] == 0 && graph[vertex][i] != INT_MAX && distance[i]>distance[vertex] + graph[vertex][i]) distance[i] = distance[vertex] + graph[vertex][i]; because if the graph[vertex][i] == INT_MAX, the sum of distance[vertex] + graph[vertex][i] is overflow. Another problem is that the vertex i should not be marked before.
dynamic array in c++
I need to implement a 5x5 dynamic array where every element in it is equal to the sum of its two indices. For example, the first element, at (0,0), has the value 0+0=0. Here is my code: # include<iostream> using namespace std; int main() { int size =5; int *array=new int[size]; for (int i = 0; i < size; i++) delete [] array; return 0; } I need help to implement sum of index.
You need at first to implement a two-dimensional array.:) Here you are. #include <iostream> int main() { const size_t N = 5; int ( *array )[N] = new int[N][N]; for ( size_t i = 0; i < N; i++ ) { for ( size_t j = 0; j < N; j++ ) array[i][j] = i + j; } for ( size_t i = 0; i < N; i++ ) { for ( size_t j = 0; j < N; j++ ) std::cout << array[i][j] << ' '; std::cout << std::endl; } delete [] array; return 0; } And do not pay attention that the answer is down voted. There is nothing wrong with the answer. :)
Firstly, you should create a 2d-array, not just a array. void foo() { int **a = new int*[5]; for (int i = 0; i < 5; i++) a[i] = new int[5]; } for (int i = 0; i < 5; i++) for (int j = 0; j < 5; j++) a[i][j] = i + j; for (int i = 0; i < 5; i++) { for (int j = 0; j < 5; j++) cout << a[i][j] << " "; cout << endl; } for (int i = 0; i < 5; i++) delete[] a[i]; delete[] a; } And, of course, don't forget to clear your memory)
Printing to a STL(stereolithography) file in binary mode
I am trying to print to a stl file and can't print correctly. There are lot of samples for Hex format printing in C++ but no sample program for binary format. My program is as follow. What is wrong with my program? string name = "Create by stlwrite.m "; name = name + currentDateTime(); pFile.setf(ios::left); pFile.width(sizeof(unsigned char)*80); //header pFile << name; unsigned int size = faces.rows; //size pFile.write((char*)&size,sizeof(size)); int height = 25; unsigned short ** data= new unsigned short *[height]; for(int i = 0; i < height; i++) { data[i] = new unsigned short[142000]; } for(int j = 0; j < 142000; j++) { int i = 0; //for one facets for(int k = 0; k < (*facets[j]).cols; k++) { for(int l = 0; l < (*facets[j]).rows; l++) { float f = (*facets[j]).at<float>(l,k); data[i][j] = *reinterpret_cast<unsigned int *>(&f); data[i+1][j] = *reinterpret_cast<unsigned int *>(&f)>>16; i = i + 2; } } //then for the last row data[height-1][j] = (unsigned short)0; } for (int i = 0; i < height; i++) for (int j = 0; j < faces.rows; j++) pFile.write ((char*)&data[i][j], sizeof(unsigned short) ); pFile.close(); EDIT1: I follow the idea of Matlab stlwrite.mat program.link
This is the C code to print in binary mode. The calling function is: stl_write_binary(X, Y, path) Where X is 3xM Vertices matrix and Y is 3xN faces matrix. Both are in OpenCV's Mat structure. Path is the path to store the STL file you can modify it according to your requirement. The whole code is: int stl_write_binary(Mat &vertemp, Mat &faces, PathToFolders &path) { Mat vertices(vertemp.cols,vertemp.rows,vertemp.type()); vertices = vertemp.t(); cv::Mat** facets = new cv::Mat*[faces.rows]; for(int i = 0; i < faces.rows; i++) { facets[i] = new cv::Mat(3,4,CV_32F); for(int j = 0; j < 3; j++) { vertices(Range::all(),Range(faces.at<int>(i,j),faces.at<int>(i,j)+1)).convertTo((*(facets[i]))(Range::all(),Range(j+1,j+2)), CV_32F); } } //Compute their normals Mat V1(3,faces.rows,CV_32F); Mat V2(3,faces.rows,CV_32F); for(int i = 0; i < faces.rows; i++) { V1(Range::all(),Range(i,i+1)) = ((*(facets[i]))(Range::all(),Range(2,3))) - ((*(facets[i]))(Range::all(),Range(1,2))); V2(Range::all(),Range(i,i+1)) = ((*(facets[i]))(Range::all(),Range(3,4))) - ((*(facets[i]))(Range::all(),Range(1,2))); } Mat V3(3,faces.rows,CV_32F); Mat V4(3,faces.rows,CV_32F); Mat V5(3,faces.rows,CV_32F); Mat V6(3,faces.rows,CV_32F); V1(Range(1,2),Range::all()).copyTo(V3(Range(0,1),Range::all())); V1(Range(2,3),Range::all()).copyTo(V3(Range(1,2),Range::all())); V1(Range(0,1),Range::all()).copyTo(V3(Range(2,3),Range::all())); V2(Range(2,3),Range::all()).copyTo(V4(Range(0,1),Range::all())); V2(Range(0,1),Range::all()).copyTo(V4(Range(1,2),Range::all())); V2(Range(1,2),Range::all()).copyTo(V4(Range(2,3),Range::all())); V2(Range(1,2),Range::all()).copyTo(V5(Range(0,1),Range::all())); V2(Range(2,3),Range::all()).copyTo(V5(Range(1,2),Range::all())); V2(Range(0,1),Range::all()).copyTo(V5(Range(2,3),Range::all())); V1(Range(2,3),Range::all()).copyTo(V6(Range(0,1),Range::all())); V1(Range(0,1),Range::all()).copyTo(V6(Range(1,2),Range::all())); V1(Range(1,2),Range::all()).copyTo(V6(Range(2,3),Range::all())); Mat normals(3,faces.rows,CV_32F); normals = V3.mul(V4) - V5.mul(V6); V1.release(); V2.release(); V3.release(); V4.release(); V5.release(); V6.release(); Mat normalsqu(3,faces.rows,CV_32F); Mat normalsqu_colm_summed(1,faces.rows,CV_32F); normalsqu = normals.mul(normals); cv::reduce(normalsqu, normalsqu_colm_summed, 0, CV_REDUCE_SUM, CV_32F); cv::sqrt(normalsqu_colm_summed,normalsqu_colm_summed);//check mem leak normalsqu.release(); normalsqu_colm_summed = 1/normalsqu_colm_summed; for(int i = 0; i < faces.rows; i++) { normals(Range::all(),Range(i,i+1)) = normals(Range::all(),Range(i,i+1)) * normalsqu_colm_summed.at<float>(i); } normalsqu_colm_summed.release(); for(int i = 0; i < faces.rows; i++) { normals(Range::all(),Range(i,i+1)).copyTo(((*(facets[i]))(Range::all(),Range(0,1)))); } normals.release(); //write to stl file //get file paths std::vector<std::string> masktoken; split(path.pathtoimages, '/', masktoken); path.pathtoSTL = path.pathtoSTL+masktoken[0]; for(int i = 1; i < masktoken.size()-1; i++) { path.pathtoSTL = path.pathtoSTL+"/"+masktoken[i]; } path.pathtoSTL = path.pathtoSTL+"/"+"mesh.stl"; cout<<"Saving file to "<<path.pathtoSTL<<endl; const char * c = path.pathtoSTL.c_str(); ofstream pFile; pFile.open (c, ios::out | ios::binary); if (pFile.is_open()) { string name = "Create by stlwrite.m "; name = name + currentDateTime(); pFile.setf(ios::left); pFile.width(sizeof(unsigned char)*80); //header pFile << name; unsigned int size = faces.rows; //size pFile.write((char*)&size,sizeof(size)); int height = (*facets[0]).rows * (*facets[0]).cols * 2 + 1; unsigned short ** data= new unsigned short *[height]; for(int i = 0; i < height; i++) { data[i] = new unsigned short[faces.rows]; } for(int j = 0; j < faces.rows; j++) { int i = 0; //for one facets for(int k = 0; k < (*facets[j]).cols; k++) { for(int l = 0; l < (*facets[j]).rows; l++) { float f = (*facets[j]).at<float>(l,k); data[i][j] = *reinterpret_cast<unsigned int *>(&f); data[i+1][j] = *reinterpret_cast<unsigned int *>(&f)>>16; i = i + 2; } } //then for the last row data[height-1][j] = (unsigned short)0; } for (int i = 0; i < faces.rows; i++) for (int j = 0; j < height; j++) pFile.write ((char*)&data[j][i], sizeof(unsigned short) ); pFile.close(); //fclose (pFile); //delete table for(int i = 0; i < height; i++) { delete[] data[i]; } delete[] data; }else{ cout<<"stlwrite:cannotWriteFile"<<" "<<"Unable to write to"<<" "<<path.pathtoSTL<<endl; } for(int i = 0; i < faces.rows; i++) { (*facets[i]).release(); } delete[] facets; return 1; }
Error in double pointer to array parameter
int main (void) { int** arr = new int*[4]; for (int i = 0; i < 4; i++) arr[i] = new int[4] {1, 0, 0, 1}; const int* p = &(arr[0][0]); TFigure* test = new TFigure(arr, 4, 4); test->resolve(); for (int i = 0; i < 4; i++) delete[] arr[i]; delete[] arr; return 0; } where constructor declaration is line 57: TFigure(int **ia, int n, int m) N = n; M =m; landscape = new int*[n]; puddles = new int*[n]; for (int i = 0; i < n; i++){ landscape[i] = new int[m]; puddles[i] = new int[n]; for (int j = 0; j < m; j++) landscape[i][j] = *ia[i][j]; } for (int i = 0; i < n; i++) for (int j = 0; j < 0; j++) if (i == 0 || i == N || j == 0 || j == M) puddles[i][j] = 0; else puddles[i][j] = 1; for (int i = 0; i < N; i++){ for (int j = 0; j < M; j++) std::cout << puddles[i][j] << ' '; std::cout << std::endl; } for (int i = 0; i < N; i++){ for (int j = 0; j < M; j++) std::cout << landscape[i][j] << ' '; std::cout << std::endl; } }; but I have an error 57:43: error: invalid type argument of unary «*» (have «int») I don't understand what causes this.
The problem is with this line: landscape[i][j] = *ia[i][j]; ia[i][j] gives you an int which you then try to dereference. It seems like you really just want: landscape[i][j] = ia[i][j]; I'm not sure if this was a mistake when copy and pasting or not, but your constructor definition is missing an opening {. TFigure(int **ia, int n, int m) { // Here ^