My code is realy slow and i need optimization problem - c++

#include <iostream>
#include <chrono>
using namespace std;
int main()
{
const unsigned int m = 200;
const unsigned int n = 200;
srand(static_cast<unsigned int>(static_cast<std::chrono::duration<double>
>(std::chrono::high_resolution_clock::now().time_since_epoch()).count()));
double** matrixa;
double** matrixb;
double** matrixc;
matrixa = new double* [m];
matrixb = new double* [m];
matrixc = new double* [m];
unsigned int max = static_cast<unsigned int>(1u << 31);
for (unsigned int i = 0; i < m; i++)
matrixa[i] = new double[n];
for (unsigned int i = 0; i < m; i++)
matrixb[i] = new double[n];
for (unsigned int i = 0; i < m; i++)
matrixc[i] = new double[n];
for (unsigned int i = 0; i < m; i++)
for (unsigned int j = 0; j < n; j++)
matrixa[i]
[j] = static_cast<double>(static_cast<double>(rand()) / max * 10);
for (unsigned int i = 0; i < m; i++)
for (unsigned int j = 0; j < n; j++)
matrixb[i]
[j] = static_cast<double>(static_cast<double>(rand()) / max * 10);
auto start = std::chrono::high_resolution_clock::now();
for (unsigned int i = 0; i < m; i++)
for (unsigned int j = 0; j < n; j++)
for (unsigned int k = 0; k < m; k++)
for (unsigned int l = 0; l < m; l++)
matrixc[i][j] += matrixa[k][l] * matrixb[l][k];
auto stop = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> time_diff = stop - start;
cout << "Czas wykonania programu " << time_diff.count() << " sekund." <<
endl;
for (unsigned int i = 0; i < m; i++)
delete[] matrixa[i];
for (unsigned int i = 0; i < m; i++)
delete[] matrixb[i];
for (unsigned int i = 0; i < m; i++)
delete[] matrixc[i];
delete[] matrixa;
delete[] matrixb;
delete[] matrixc;
return 0;
}
I have this code and I would like to optimize it, unfortunately I have absolutely no idea how to go about it. Maybe someone has an idea and would like to help me? I got to the point where the program for 400 arrays executes 105 seconds but it is still too much, I would like to optimize this code to run faster. I found OpenMP library and thread class but I don't know how to use it in my program.

Firstly, your matrix multiply algorithm is over complex than a normal one(Or it's just wrong), you may reference the wiki for a typical algorithm:
Input: matrices A and B
Let C be a new matrix of the appropriate size
For i from 1 to n:
For j from 1 to p:
Let sum = 0
For k from 1 to m:
Set sum ← sum + Aik × Bkj
Set Cij ← sum
Return C
There is a critical bug in your code, you haven't initialized the result matrix.
So the fixed code may like this:
#include <chrono>
#include <iostream>
using namespace std;
int main() {
const unsigned int m = 200;
const unsigned int n = 201;
const unsigned int p = 202;
srand(static_cast<unsigned int>(
static_cast<std::chrono::duration<double> >(
std::chrono::high_resolution_clock::now().time_since_epoch())
.count()));
double** matrixa;
double** matrixb;
double** matrixc;
matrixa = new double*[m];
matrixb = new double*[n];
matrixc = new double*[m];
unsigned int max = static_cast<unsigned int>(1u << 31);
for (unsigned int i = 0; i < m; i++) matrixa[i] = new double[n];
for (unsigned int i = 0; i < n; i++) matrixb[i] = new double[p];
for (unsigned int i = 0; i < m; i++) {
matrixc[i] = new double[p];
std::fill(matrixc[i], matrixc[i] + p, 0.0);
}
for (unsigned int i = 0; i < m; i++)
for (unsigned int j = 0; j < n; j++)
matrixa[i][j] =
static_cast<double>(static_cast<double>(rand()) / max * 10);
for (unsigned int i = 0; i < n; i++)
for (unsigned int j = 0; j < p; j++)
matrixb[i][j] =
static_cast<double>(static_cast<double>(rand()) / max * 10);
auto start = std::chrono::high_resolution_clock::now();
for (unsigned int i = 0; i < m; i++)
for (unsigned int j = 0; j < p; j++)
for (unsigned int k = 0; k < n; k++)
matrixc[i][j] += matrixa[i][k] * matrixb[k][j];
auto stop = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> time_diff = stop - start;
cout << "Czas wykonania programu " << time_diff.count() << " sekund." << endl;
for (unsigned int i = 0; i < m; i++) delete[] matrixa[i];
for (unsigned int i = 0; i < n; i++) delete[] matrixb[i];
for (unsigned int i = 0; i < m; i++) delete[] matrixc[i];
delete[] matrixa;
delete[] matrixb;
delete[] matrixc;
return 0;
}
Now it's much faster than the one in question.
It still can be faster with slightly modification:
#include <chrono>
#include <iostream>
using namespace std;
int main() {
const unsigned int m = 200;
const unsigned int n = 201;
const unsigned int p = 202;
srand(static_cast<unsigned int>(
static_cast<std::chrono::duration<double> >(
std::chrono::high_resolution_clock::now().time_since_epoch())
.count()));
double** matrixa;
double** matrixb;
double** matrixc;
matrixa = new double*[m];
matrixb = new double*[n];
matrixc = new double*[m];
unsigned int max = static_cast<unsigned int>(1u << 31);
for (unsigned int i = 0; i < m; i++) matrixa[i] = new double[n];
for (unsigned int i = 0; i < n; i++) matrixb[i] = new double[p];
for (unsigned int i = 0; i < m; i++) {
matrixc[i] = new double[p];
std::fill(matrixc[i], matrixc[i] + p, 0.0);
}
for (unsigned int i = 0; i < m; i++)
for (unsigned int j = 0; j < n; j++)
matrixa[i][j] =
static_cast<double>(static_cast<double>(rand()) / max * 10);
for (unsigned int i = 0; i < n; i++)
for (unsigned int j = 0; j < p; j++)
matrixb[i][j] =
static_cast<double>(static_cast<double>(rand()) / max * 10);
auto start = std::chrono::high_resolution_clock::now();
for (unsigned int i = 0; i < m; i++)
for (unsigned int k = 0; k < n; k++)
for (unsigned int j = 0; j < p; j++)
matrixc[i][j] += matrixa[i][k] * matrixb[k][j];
auto stop = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> time_diff = stop - start;
cout << "Czas wykonania programu " << time_diff.count() << " sekund." << endl;
for (unsigned int i = 0; i < m; i++) delete[] matrixa[i];
for (unsigned int i = 0; i < n; i++) delete[] matrixb[i];
for (unsigned int i = 0; i < m; i++) delete[] matrixc[i];
delete[] matrixa;
delete[] matrixb;
delete[] matrixc;
return 0;
}
This code is more cache-friendly, the explanation can be found here.
The code can still be improved by a parallel algorithm, to speed up the previous code with OpenMP, with only one line change:
Add we need to add the build option -fopenmp to compile it.
#include <chrono>
#include <iostream>
using namespace std;
int main() {
const unsigned int m = 200;
const unsigned int n = 201;
const unsigned int p = 202;
srand(static_cast<unsigned int>(
static_cast<std::chrono::duration<double> >(
std::chrono::high_resolution_clock::now().time_since_epoch())
.count()));
double** matrixa;
double** matrixb;
double** matrixc;
matrixa = new double*[m];
matrixb = new double*[n];
matrixc = new double*[m];
unsigned int max = static_cast<unsigned int>(1u << 31);
for (unsigned int i = 0; i < m; i++) matrixa[i] = new double[n];
for (unsigned int i = 0; i < n; i++) matrixb[i] = new double[p];
for (unsigned int i = 0; i < m; i++) {
matrixc[i] = new double[p];
std::fill(matrixc[i], matrixc[i] + p, 0.0);
}
for (unsigned int i = 0; i < m; i++)
for (unsigned int j = 0; j < n; j++)
matrixa[i][j] =
static_cast<double>(static_cast<double>(rand()) / max * 10);
for (unsigned int i = 0; i < n; i++)
for (unsigned int j = 0; j < p; j++)
matrixb[i][j] =
static_cast<double>(static_cast<double>(rand()) / max * 10);
auto start = std::chrono::high_resolution_clock::now();
#pragma omp parallel for
for (unsigned int i = 0; i < m; i++)
for (unsigned int k = 0; k < n; k++)
for (unsigned int j = 0; j < p; j++)
matrixc[i][j] += matrixa[i][k] * matrixb[k][j];
auto stop = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> time_diff = stop - start;
cout << "Czas wykonania programu " << time_diff.count() << " sekund." << endl;
for (unsigned int i = 0; i < m; i++) delete[] matrixa[i];
for (unsigned int i = 0; i < n; i++) delete[] matrixb[i];
for (unsigned int i = 0; i < m; i++) delete[] matrixc[i];
delete[] matrixa;
delete[] matrixb;
delete[] matrixc;
return 0;
}
It would be better to use std::vector rather than dynamically allocated arrays, the work is left for you.

Related

Assigning value to pointer with multithreading generates wrong result

void assign(short *inPtr, int ext[4], int sz[2])
{
for (int j = ext[2]; j < ext[3]; j++)
for (int i = ext[0]; i < ext[1]; i++)
{
inPtr[i + j * sz[1]] = 100;
}
}
int main(int, char **)
{
int size[2] = {16, 16};
short ptr[256];
std::vector<std::thread> pool;
int block = 2;
int row = size[0] / block;
int col = size[1] / block;
for (size_t j = 0; j < row; j++)
for (size_t i = 0; i < col; i++)
{
int ext[4] = {i * block, (i + 1) * block, j * block, (j + 1) * block};
pool.push_back(std::thread(assign, ptr, ext, size));
}
for (size_t i = 0; i < row * col; i++)
{
pool[i].join();
}
for (size_t i = 0; i < 256; i++)
{
std::cout << (double)ptr[i] << ", ";
}
std::cout << std::endl;
}
I'm trying to assign value to pointer ptr by using multithreading.
The code above is supposed to assign 100 to all element of ptr, however, there are many 0 in the output. Why does it happen and how to fix it ?

dijkstra's algorithm using matrix in c++. what's wrong in this code?

#include <iostream>
int n, m, v1, v2, weight;
cin >> n >> m;
int** graph = new int*[n];
int* distance = new int[n];
int* s = new int[n];
for (int i = 0; i < n; ++i)
graph[i] = new int[n];
for (int i = 0; i < n; ++i)
for (int j = 0; j < n; ++j)
graph[i][j] = INT_MAX;
for (int i = 0; i < m; ++i)
{
cin >> v1 >> v2 >> weight;
graph[v1][v2] = weight;
graph[v2][v1] = weight;
}
for (int i = 0; i < n; ++i)
distance[i] = INT_MAX;
for (int i = 0; i < n; ++i)
distance[i] = graph[0][i];
for (int i = 0; i < n; ++i)
s[i] = 0;
distance[0] = 0;
int min = INT_MAX;
int vertex = 0;
for (int j = 0; j < n-1; ++j){
min = INT_MAX;
for (int i = 0; i < n; ++i)
if (s[i] == 0 && min >= distance[i])
{
vertex = i;
min = distance[i];
}
s[vertex] = 1;
cout << vertex << " ";
for (int i = 0; i < n; ++i)
if (distance[i]>distance[vertex] + graph[vertex][i])
distance[i] = distance[vertex] + graph[vertex][i];
}
for (int i = 0; i < n; ++i)
cout << distance[i] << " ";
cout << endl;
return 0;
}
Hi. I'm making Dijkstra's algorithm using two-dimentional matrix..
but this code doesn't work. and i don't know why! Can you fix my problem??
i want to make output all distance of graph. but output is looks like array point garbage value like -2345...
Can you help me??
There are some problems in this loop:
for (int i = 0; i < n; ++i)
if (distance[i]>distance[vertex] + graph[vertex][i])
distance[i] = distance[vertex] + graph[vertex][i];
should change to the blew code:
for (int i = 0; i < n; ++i)
if (s[i] == 0 && graph[vertex][i] != INT_MAX && distance[i]>distance[vertex] + graph[vertex][i])
distance[i] = distance[vertex] + graph[vertex][i];
because if the graph[vertex][i] == INT_MAX, the sum of distance[vertex] + graph[vertex][i] is overflow. Another problem is that the vertex i should not be marked before.

dynamic array in c++

I need to implement a 5x5 dynamic array where
every element in it is equal to the sum of its two indices. For example, the first element, at (0,0), has the value 0+0=0.
Here is my code:
# include<iostream>
using namespace std;
int main()
{
int size =5;
int *array=new int[size];
for (int i = 0; i < size; i++)
delete [] array;
return 0;
}
I need help to implement sum of index.
You need at first to implement a two-dimensional array.:)
Here you are.
#include <iostream>
int main()
{
const size_t N = 5;
int ( *array )[N] = new int[N][N];
for ( size_t i = 0; i < N; i++ )
{
for ( size_t j = 0; j < N; j++ ) array[i][j] = i + j;
}
for ( size_t i = 0; i < N; i++ )
{
for ( size_t j = 0; j < N; j++ ) std::cout << array[i][j] << ' ';
std::cout << std::endl;
}
delete [] array;
return 0;
}
And do not pay attention that the answer is down voted. There is nothing wrong with the answer. :)
Firstly, you should create a 2d-array, not just a array.
void foo() {
int **a = new int*[5];
for (int i = 0; i < 5; i++)
a[i] = new int[5];
}
for (int i = 0; i < 5; i++)
for (int j = 0; j < 5; j++)
a[i][j] = i + j;
for (int i = 0; i < 5; i++) {
for (int j = 0; j < 5; j++)
cout << a[i][j] << " ";
cout << endl;
}
for (int i = 0; i < 5; i++)
delete[] a[i];
delete[] a;
}
And, of course, don't forget to clear your memory)

Printing to a STL(stereolithography) file in binary mode

I am trying to print to a stl file and can't print correctly.
There are lot of samples for Hex format printing in C++ but no sample program for binary format. My program is as follow. What is wrong with my program?
string name = "Create by stlwrite.m ";
name = name + currentDateTime();
pFile.setf(ios::left);
pFile.width(sizeof(unsigned char)*80);
//header
pFile << name;
unsigned int size = faces.rows;
//size
pFile.write((char*)&size,sizeof(size));
int height = 25;
unsigned short ** data= new unsigned short *[height];
for(int i = 0; i < height; i++)
{
data[i] = new unsigned short[142000];
}
for(int j = 0; j < 142000; j++)
{
int i = 0;
//for one facets
for(int k = 0; k < (*facets[j]).cols; k++)
{
for(int l = 0; l < (*facets[j]).rows; l++)
{
float f = (*facets[j]).at<float>(l,k);
data[i][j] = *reinterpret_cast<unsigned int *>(&f);
data[i+1][j] = *reinterpret_cast<unsigned int *>(&f)>>16;
i = i + 2;
}
}
//then for the last row
data[height-1][j] = (unsigned short)0;
}
for (int i = 0; i < height; i++)
for (int j = 0; j < faces.rows; j++)
pFile.write ((char*)&data[i][j], sizeof(unsigned short) );
pFile.close();
EDIT1: I follow the idea of Matlab stlwrite.mat program.link
This is the C code to print in binary mode.
The calling function is:
stl_write_binary(X, Y, path)
Where X is 3xM Vertices matrix and Y is 3xN faces matrix. Both are in OpenCV's Mat structure.
Path is the path to store the STL file you can modify it according to your requirement.
The whole code is:
int stl_write_binary(Mat &vertemp, Mat &faces, PathToFolders &path)
{
Mat vertices(vertemp.cols,vertemp.rows,vertemp.type());
vertices = vertemp.t();
cv::Mat** facets = new cv::Mat*[faces.rows];
for(int i = 0; i < faces.rows; i++)
{
facets[i] = new cv::Mat(3,4,CV_32F);
for(int j = 0; j < 3; j++)
{
vertices(Range::all(),Range(faces.at<int>(i,j),faces.at<int>(i,j)+1)).convertTo((*(facets[i]))(Range::all(),Range(j+1,j+2)), CV_32F);
}
}
//Compute their normals
Mat V1(3,faces.rows,CV_32F);
Mat V2(3,faces.rows,CV_32F);
for(int i = 0; i < faces.rows; i++)
{
V1(Range::all(),Range(i,i+1)) = ((*(facets[i]))(Range::all(),Range(2,3))) - ((*(facets[i]))(Range::all(),Range(1,2)));
V2(Range::all(),Range(i,i+1)) = ((*(facets[i]))(Range::all(),Range(3,4))) - ((*(facets[i]))(Range::all(),Range(1,2)));
}
Mat V3(3,faces.rows,CV_32F);
Mat V4(3,faces.rows,CV_32F);
Mat V5(3,faces.rows,CV_32F);
Mat V6(3,faces.rows,CV_32F);
V1(Range(1,2),Range::all()).copyTo(V3(Range(0,1),Range::all()));
V1(Range(2,3),Range::all()).copyTo(V3(Range(1,2),Range::all()));
V1(Range(0,1),Range::all()).copyTo(V3(Range(2,3),Range::all()));
V2(Range(2,3),Range::all()).copyTo(V4(Range(0,1),Range::all()));
V2(Range(0,1),Range::all()).copyTo(V4(Range(1,2),Range::all()));
V2(Range(1,2),Range::all()).copyTo(V4(Range(2,3),Range::all()));
V2(Range(1,2),Range::all()).copyTo(V5(Range(0,1),Range::all()));
V2(Range(2,3),Range::all()).copyTo(V5(Range(1,2),Range::all()));
V2(Range(0,1),Range::all()).copyTo(V5(Range(2,3),Range::all()));
V1(Range(2,3),Range::all()).copyTo(V6(Range(0,1),Range::all()));
V1(Range(0,1),Range::all()).copyTo(V6(Range(1,2),Range::all()));
V1(Range(1,2),Range::all()).copyTo(V6(Range(2,3),Range::all()));
Mat normals(3,faces.rows,CV_32F);
normals = V3.mul(V4) - V5.mul(V6);
V1.release();
V2.release();
V3.release();
V4.release();
V5.release();
V6.release();
Mat normalsqu(3,faces.rows,CV_32F);
Mat normalsqu_colm_summed(1,faces.rows,CV_32F);
normalsqu = normals.mul(normals);
cv::reduce(normalsqu, normalsqu_colm_summed, 0, CV_REDUCE_SUM, CV_32F);
cv::sqrt(normalsqu_colm_summed,normalsqu_colm_summed);//check mem leak
normalsqu.release();
normalsqu_colm_summed = 1/normalsqu_colm_summed;
for(int i = 0; i < faces.rows; i++)
{
normals(Range::all(),Range(i,i+1)) = normals(Range::all(),Range(i,i+1)) * normalsqu_colm_summed.at<float>(i);
}
normalsqu_colm_summed.release();
for(int i = 0; i < faces.rows; i++)
{
normals(Range::all(),Range(i,i+1)).copyTo(((*(facets[i]))(Range::all(),Range(0,1))));
}
normals.release();
//write to stl file
//get file paths
std::vector<std::string> masktoken;
split(path.pathtoimages, '/', masktoken);
path.pathtoSTL = path.pathtoSTL+masktoken[0];
for(int i = 1; i < masktoken.size()-1; i++)
{
path.pathtoSTL = path.pathtoSTL+"/"+masktoken[i];
}
path.pathtoSTL = path.pathtoSTL+"/"+"mesh.stl";
cout<<"Saving file to "<<path.pathtoSTL<<endl;
const char * c = path.pathtoSTL.c_str();
ofstream pFile;
pFile.open (c, ios::out | ios::binary);
if (pFile.is_open())
{
string name = "Create by stlwrite.m ";
name = name + currentDateTime();
pFile.setf(ios::left);
pFile.width(sizeof(unsigned char)*80);
//header
pFile << name;
unsigned int size = faces.rows;
//size
pFile.write((char*)&size,sizeof(size));
int height = (*facets[0]).rows * (*facets[0]).cols * 2 + 1;
unsigned short ** data= new unsigned short *[height];
for(int i = 0; i < height; i++)
{
data[i] = new unsigned short[faces.rows];
}
for(int j = 0; j < faces.rows; j++)
{
int i = 0;
//for one facets
for(int k = 0; k < (*facets[j]).cols; k++)
{
for(int l = 0; l < (*facets[j]).rows; l++)
{
float f = (*facets[j]).at<float>(l,k);
data[i][j] = *reinterpret_cast<unsigned int *>(&f);
data[i+1][j] = *reinterpret_cast<unsigned int *>(&f)>>16;
i = i + 2;
}
}
//then for the last row
data[height-1][j] = (unsigned short)0;
}
for (int i = 0; i < faces.rows; i++)
for (int j = 0; j < height; j++)
pFile.write ((char*)&data[j][i], sizeof(unsigned short) );
pFile.close();
//fclose (pFile);
//delete table
for(int i = 0; i < height; i++)
{
delete[] data[i];
}
delete[] data;
}else{
cout<<"stlwrite:cannotWriteFile"<<" "<<"Unable to write to"<<" "<<path.pathtoSTL<<endl;
}
for(int i = 0; i < faces.rows; i++)
{
(*facets[i]).release();
}
delete[] facets;
return 1;
}

Error in double pointer to array parameter

int main (void)
{
int** arr = new int*[4];
for (int i = 0; i < 4; i++) arr[i] = new int[4] {1, 0, 0, 1};
const int* p = &(arr[0][0]);
TFigure* test = new TFigure(arr, 4, 4);
test->resolve();
for (int i = 0; i < 4; i++) delete[] arr[i];
delete[] arr;
return 0;
}
where constructor declaration is
line 57:
TFigure(int **ia, int n, int m)
N = n;
M =m;
landscape = new int*[n];
puddles = new int*[n];
for (int i = 0; i < n; i++){
landscape[i] = new int[m];
puddles[i] = new int[n];
for (int j = 0; j < m; j++)
landscape[i][j] = *ia[i][j];
}
for (int i = 0; i < n; i++)
for (int j = 0; j < 0; j++)
if (i == 0 || i == N || j == 0 || j == M)
puddles[i][j] = 0;
else
puddles[i][j] = 1;
for (int i = 0; i < N; i++){
for (int j = 0; j < M; j++)
std::cout << puddles[i][j] << ' ';
std::cout << std::endl;
}
for (int i = 0; i < N; i++){
for (int j = 0; j < M; j++)
std::cout << landscape[i][j] << ' ';
std::cout << std::endl;
}
};
but I have an error
57:43: error: invalid type argument of unary «*» (have «int»)
I don't understand what causes this.
The problem is with this line:
landscape[i][j] = *ia[i][j];
ia[i][j] gives you an int which you then try to dereference. It seems like you really just want:
landscape[i][j] = ia[i][j];
I'm not sure if this was a mistake when copy and pasting or not, but your constructor definition is missing an opening {.
TFigure(int **ia, int n, int m) {
// Here ^