I am writing a code to send fixed number of rows (split along first dimension in case of 2D) and a slab (for 3D, again split along the first dimension). For example for a 100x100 I would like to send 25x100 to each of the 4 processors and in case of 3D (100x100x100) I would like to send 25x100x100 to each processors.
I have a class file and I create instance of class in the main program. My question is how 1) How can I create class objects for each process (mpi rank) and be able to use the member functions (example my_average) in the code. 2) If I create instance of class inside enclosing parenthesis the I get scope issues. 3) In the output attached below I see that send is not working. What am I missing? Is this the right way to do things.
Please help.
Pardon my lingo - c++ is not my first language.
Attached sample output.
each process gets 2
First Dimension 4
Number of processors 2
Displacement and blocklen 0 8
Displacement and blocklen 8 8
Displacement and blocklen 0 8
Displacement and blocklen 8 8
( 0.0000 + i -0.0000)( 1.0000 + i -1.1315)( 2.0000 + i -2.7556)( 3.0000 + i -3.4587)
( 0.5328 + i 0.4672)( 1.2190 + i -0.2190)( 2.0470 + i -1.0470)( 3.6789 + i -2.6789)
( 1.3586 + i 1.3207)( 2.8694 + i 0.0653)( 2.7670 + i -0.3835)( 4.0388 + i -1.5194)
( 2.4929 + i 2.1690)( 1.1037 + i 1.9654)( 2.1604 + i 0.9465)( 4.5891 + i -0.5297)
end of 2D matrix
rank=0 Rows : 2 Width : 4 local_n0 size 2
( 0.0000 + i -0.0000)( 1.0000 + i -1.1315)( 2.0000 + i -2.7556)( 3.0000 + i -3.4587)
( 0.5328 + i 0.4672)( 1.2190 + i -0.2190)( 2.0470 + i -1.0470)( 3.6789 + i -2.6789)
rank=1 Rows : 2 Width : 4 local_n0 size 2
( 0.0000 + i 0.0000)( 0.0000 + i 0.0000)( 0.0000 + i 0.0000)( 0.0000 + i 0.0000)
( 0.0000 + i 0.0000)( 0.0000 + i 0.0000)( 0.0000 + i 0.0000)( 0.0000 + i 0.0000)
main program
#include <fftw3-mpi.h>
#include <cstdlib>
#include <iostream>
#include <iomanip> // needed for setfill
#include "cmplx_2d.hh"
#include "random.hh"
using namespace std;
int main(int argc, char **argv){
const int N0 = 4, N1 = 4;
int i,j;
int rank,size;
double rx;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
int local_n0=N0/size;
if(rank==0){
cout << " each process gets " << local_n0 << endl;
cout << " First Dimension " << N0 << endl;
cout << " Number of processors " << size << endl;
}
/* compute start and size of rows */
int n_rows[size];
int send_counts[size];
int displs[size];
displs[0]=0;
for (i = 0; i < size; ++i) {
n_rows[i]=N0/size;
if(i<N0%size){
n_rows[i]=n_rows[i]+1;
}
send_counts[i]=n_rows[i]*N1;
if(i>0){
displs[i]=displs[i-1]+send_counts[i-1];
}
cout << " Displacement and blocklen " << displs[i] << " " << send_counts[i] << endl;
}
int rows=n_rows[rank];
/* create 2d complex objects */
cmplx_2d arr_cmplx(N0,N1);
if(rank==0) {
/* initialize data -- random number */
for (i = 0; i < N0; ++i) {
for (j = 0; j < N1; ++j){
rx =((double)rand()/(double)RAND_MAX);
arr_cmplx(i,j)=complex<double>(rx*i+j,i-j-rx);
printf("(%8.4f + i%8.4f)", real(arr_cmplx(i,j)), imag(arr_cmplx(i,j)));
}
printf(" \n ");
}
printf(" \n end of 2D matrix \n \n ");
} // end of master
if(rank==0) {
MPI_Scatterv(&arr_cmplx(0,0),send_counts,displs,MPI_DOUBLE_COMPLEX,MPI_IN_PLACE,0,MPI_DOUBLE_COMPLEX,0,MPI_COMM_WORLD);
} else {
/* I should allocate local array on each processor to receive data but if I do it here I get scope error */
//cmplx_2d arr_cmplx(rows,N1);
MPI_Scatterv(NULL,send_counts,displs,MPI_DOUBLE_COMPLEX,&arr_cmplx(0,0),rows*N1,MPI_DOUBLE_COMPLEX,0,MPI_COMM_WORLD);
}
//printing, one proc at a time
if(rank>0){
MPI_Status status;
MPI_Recv(NULL,0,MPI_DOUBLE_COMPLEX,rank-1,0,MPI_COMM_WORLD,&status);
}
cout<<"rank="<< rank<<" Rows : "<<rows<<" Width : "<<N1<< " local_n0 size " << local_n0 << endl;
for(i=0; i<rows; i++)
{
for(j=0; j<N1; j++)
printf("(%8.4f + i%8.4f)", real(arr_cmplx(i,j)), imag(arr_cmplx(i,j)));
cout<<endl;
}
if(rank<size-1){
MPI_Send(NULL,0,MPI_DOUBLE_COMPLEX,rank+1,0,MPI_COMM_WORLD);
}
MPI_Finalize();
return 0;
}
header file
#include <cassert> // for assert()
#include <complex>
using namespace std;
class cmplx_2d
{
private:
int nx;
int ny;
complex<double>** array_2d;
public:
// constructor
cmplx_2d(int x, int y) {
array_2d = new complex<double>*[x];
for(int i(0); i < x; ++i) {
array_2d[i] = new complex<double>[y];
for(int j(0); j < y; ++j) {
array_2d[i][j]= complex<double>(0.,0.);
}}
nx=x;
ny=y;
}
// deconstructor
~cmplx_2d() {
delete [] array_2d;
}
// overload () operator
complex<double>& operator()(const int i, const int j) const;
// prototype of overload constructor - just a dummy function
void my_avg( const cmplx_2d& in, int* ir, int* il, int* jr, int* jl, const int flag );
int getNx() const {
return nx;
}
int getNy() const {
return ny;
}
}; // end of class
header file
#include <iostream>
#include "cmplx_2d.hh"
#include "main_header.hh"
// overload constructor to - just a dummy for now
void cmplx_2d::my_avg( const cmplx_2d& in, int* ir, int* il, int* jr, int* jl, const int flag ) {
nx=in.getNx();
ny=in.getNy();
if ( flag == DX )
{
// derivative with respect to x
for(int i(0); i < nx; ++i) {
for(int j(0); j < ny; ++j) {
array_2d[i][j] = 0.5*(in(ir[i],j)+in(il[i],j));
}}
}
else
{
std::cout << "NOT a valid flag for derivative!" << std::endl;
}
} // end
// overloading of () parenthesis operator
complex<double>& cmplx_2d::operator()(const int i, const int j) const
{
assert(i >= 0 && i < nx);
assert(j >= 0 && j < ny);
return array_2d[i][j];
}
main header
const double PI=4.0*atan(1.0);
const double TWO_PI=2.0*PI;
enum flag {DX, DY, LAP};
Related
Making Mandelbrot with MPI
So I've made a Mandelbrot generator and everything worked fine. Now I'm throwing in a speedup from MPI. Process 0 generates a file name mbrot.ppm and adds the appropriate metadata, then divides up the workload into chunks.
Each process receives the chunk's starting and ending positions and gets to work calculating its portion of the Mandelbrot set. To write to the mbrot.ppm file, each process saves its data in an array so it doesn't write to the file before the previous process finishes.
My Problem
Its a runtime error that says:
Primary job terminated normally, but 1 process returned
a non-zero exit code. Per user-direction, the job has been aborted.
--------------------------------------------------------------------------
--------------------------------------------------------------------------
mpirun noticed that process rank 0 with PID 0 on node Lenovo exited on signal 11 (Segmentation fault).
I believe it comes from the line int data[3][xrange][yrange]; (line 120) since the print statement after this line never executes. Would there be an obvious reason I'm missing why this multi-dimensional array is causing me problems?
Full Code
#include <iostream>
#include <mpi.h>
#include <unistd.h>
#include <stdlib.h>
#include <math.h>
#include <fstream>
#define MCW MPI_COMM_WORLD
using namespace std;
struct Complex {
double r;
double i;
};
Complex operator + (Complex s, Complex t) {
Complex v;
v.r = s.r + t.r;
v.i = s.i + t.i;
return v;
};
Complex operator * (Complex s, Complex t) {
Complex v;
v.r = s.r * t.r - s.i * t.i;
v.i = s.r * t.i + s.i * t.r;
return v;
};
int rcolor(int iters) {
if (iters == 255) return 0;
return 32 * (iters % 8);
};
int gcolor(int iters) {
if (iters == 255) return 0;
return 32 * (iters % 8);
};
int bcolor(int iters) {
if (iters == 255) return 0;
return 32 * (iters % 8);
};
int mbrot(Complex c, int maxIters) {
int i = 0;
Complex z;
z = c;
while (i < maxIters && z.r * z.r + z.i * z.i < 4) {
z = z * z + c;
i++;
}
return i;
};
int main(int argc, char * argv[]) {
int rank, size;
MPI_Init( & argc, & argv);
MPI_Comm_rank(MCW, & rank);
MPI_Comm_size(MCW, & size);
if (size < 2) {
printf("Not an MPI process if only 1 process runs.\n");
exit(1);
}
if (size % 2 != 0) {
printf("Please use a even number\n");
exit(1);
}
Complex c1, c2, c;
char path[] = "brot.ppm";
int DIM;
int chunk[4];
c1.r = -1;
c1.i = -1;
c2.r = 1;
c2.i = 1;
if (rank == 0) { //start the file
ofstream fout;
fout.open(path);
DIM = 2000; // pixel dimensions
fout << "P3" << endl; // The file type .ppm
fout << DIM << " " << DIM << endl; // dimensions of the image
fout << "255" << endl; // color depth
fout.close();
// making dimesions marks
for (int i = 0; i < size; i++) {
chunk[0] = 0; // startX
chunk[1] = DIM; // endX
chunk[2] = (DIM / size) * i; // startY
chunk[3] = (DIM / size) * (i + 1); // endY
MPI_Send(chunk, 4, MPI_INT, i, 0, MCW);
};
};
MPI_Recv(chunk, 4, MPI_INT, 0, 0, MCW, MPI_STATUS_IGNORE);
printf("Process %d recieved chunk\n\t StartX: %d, EndX: %d\n\t StartY: %d, EndY: %d\n", rank, chunk[0], chunk[1], chunk[2], chunk[3]);
// do stuff save in array
// data[3 elements][Xs][Ys]
int xrange = chunk[1] - chunk[0];
int yrange = chunk[3] - chunk[2];
printf("Process %d, x: %d, y: %d\n", rank, xrange, yrange);
int data[3][xrange][yrange];
printf("done\n");
// generate data for mandlebrot
for (int j = chunk[2]; j < chunk[3]; ++j) {
for (int i = chunk[0]; i < chunk[1]; ++i) {
// calculate one pixel of the DIM x DIM image
c.r = (i * (c1.r - c2.r) / DIM) + c2.r;
c.i = (j * (c1.i - c2.i) / DIM) + c2.i;
int iters = mbrot(c, 255);
data[0][i][j] = rcolor(iters);
data[1][i][j] = gcolor(iters);
data[2][i][j] = bcolor(iters);
}
}
printf("here2\n");
// taking turns to write their data to file
for (int k = 0; k < size; k++) {
if (rank == k) {
ofstream fout;
fout.open(path, ios::app);
fout << rank << " was here" << endl;
for (int j = chunk[2]; j < chunk[3]; ++j) {
for (int i = chunk[0]; i < chunk[1]; ++i) {
fout << data[0][i][j] << " " << data[1][i][j] << " " << data[2][i][j] << " ";
}
fout << endl;
}
printf("Process %d done and waiting\n", rank);
} else {
MPI_Barrier(MCW);
}
}
MPI_Finalize();
};
How to Run
$ mpic++ -o mbrot.out mbrot.cpp
$ mpirun -np 4 mbrot.out
I'm trying to solve this problem by bruteforce, but it seems to run very slow when given 7 (which is 2*7 points).
Note: I only need to run it to maximum 2*8 points
Problem statement:
Given 2*N points in a 2d plane, connect them in pairs to form N line segments. Minimize the total length of all the line segments.
Example:
Input: 5 10 10 20 10 5 5 1 1 120 3 6 6 50 60 3 24 6 9 0 0
Output: 118.4
#include <iostream>
#include <vector>
#include <cmath>
#include <algorithm>
#include <iomanip>
using namespace std;
class point{
public:
double x, y;
};
double getLength(point a, point b){
return hypot((a.x - b.x), (a.y - b.y));
}
static double mini = INT_MAX;
void solve(vector <point> vec, double sum){
double prevSum = sum;
if(sum > mini){
return;
}
if(vec.size() == 2){
sum += getLength(vec[0], vec[1]);
mini = min(mini, sum);
return;
}
for(int i = 0; i < vec.size() - 1; i++){
for(int j = i + 1; j < vec.size(); j++){
sum = prevSum;
vector <point> temp = vec;
sum += getLength(temp[i], temp[j]);
temp.erase(temp.begin() + j);
temp.erase(temp.begin() + i);
solve(temp, sum);
}
}
}
int main(){
point temp;
int input;
double sum = 0;
cin >> input;
vector<point> vec;
for(int i = 0; i < 2 * input; i++){
cin >> temp.x >> temp.y;
vec.push_back(temp);
}
solve(vec, sum);
cout << fixed << setprecision(2) << mini << endl;
}
How can I speed up this code ?
I don't think this is what you are looking for but I mention it for completeness sake anyway. The problem can be formulated as a Mixed Integer Programming (MIP) problem.
We have distances:
d(i,j) = distance between point i and j (only needed for i<j)
and decision variables
x(i,j) = 1 if points i and j are connected (only needed for i<j)
0 otherwise
Then we can write:
Solving this problem can be done with widely available MIP solvers and leads to proven optimal solutions. A small example with 50 points:
You can solve this iteratively by using next_permutation() to go through all the permutations one by one. Apologies for the messy code, but this should show you how to do it:
struct Point {
Point(int x, int y) : x(x), y(y) {
}
bool operator< (const Point& rhs) {
const int key1 = y * 1000 + x;
const int key2 = rhs.y * 1000 + rhs.x;
return key1 < key2;
}
double dist(const Point& next) {
const double h = (double)(next.x - x);
const double v = (double)(next.y - y);
return sqrt(h*h + v*v);
}
int x, y;
};
You need the operator so you have some sort of sorting key for your points, so next_permutation can go through them in lexicographical increasing order.
double getShortestDist(std::vector p) {
double min = 200000;
std::sort(p.begin(), p.end());
while(std::next_permutation(p.begin(), p.end())) {
double sum = 0.0;
for (int i = 0; i < p.size(); i+= 2) {
sum += p[i].dist(p[i+1]);
}
if (sum < min) {
min = sum;
}
}
return min;
}
int main(int argc, char*argv[]) {
static const int arr[] = {
10, 10, 20, 10, 5, 5, 1, 1, 120, 3, 6, 6, 50, 60, 3, 24, 6, 9, 0, 0
};
std::vector<Point> test;
for (int i = 0; i < 20; i += 2) {
test.push_back(Point(arr[i], arr[i+1]));
printf("%d %d\n", arr[i], arr[i+1]);
}
printf("Output: %d, %f", test.size(), getShortestDist(test));
}
I made a function that makes the inverse and then another multithreaded, as long I have to make inverse of arrays >2000 x 2000.
A 1000x1000 array unthreated takes 2.5 seconds (on a i5-4460 4 cores 2.9ghz)
and multithreaded takes 7.25 seconds
I placed the multithreads in the part that most time consumption is taken. Whai is wrong?
Is due vectors are used instead of 2 dimensions arrays?
This is the minimum code to test both versions:
#include<iostream>
#include <vector>
#include <stdlib.h>
#include <time.h>
#include <chrono>
#include <thread>
const int NUCLEOS = 8;
#ifdef __linux__
#include <unistd.h> //usleep()
typedef std::chrono::system_clock t_clock; //try to use high_resolution_clock on new linux x64 computer!
#else
typedef std::chrono::high_resolution_clock t_clock;
#pragma warning(disable:4996)
#endif
using namespace std;
std::chrono::time_point<t_clock> start_time, stop_time = start_time; char null_char = '\0';
void timer(char *title = 0, int data_size = 1) { stop_time = t_clock::now(); double us = (double)chrono::duration_cast<chrono::microseconds>(stop_time - start_time).count(); if (title) printf("%s time = %7lgms = %7lg MOPs\n", title, (double)us*1e-3, (double)data_size / us); start_time = t_clock::now(); }
//makes columns 0
void colum_zero(vector< vector<double> > &x, vector< vector<double> > &y, int pos0, int pos1,int dim, int ord);
//returns inverse of x, x is not modified, not threaded
vector< vector<double> > inverse(vector< vector<double> > x)
{
if (x.size() != x[0].size())
{
cout << "ERROR on inverse() not square array" << endl; getchar(); return{};//returns a null
}
size_t dim = x.size();
int i, j, ord;
vector< vector<double> > y(dim,vector<double>(dim,0));//initializes output = 0
//init_2Dvector(y, dim, dim);
//1. Unity array y:
for (i = 0; i < dim; i++)
{
y[i][i] = 1.0;
}
double diagon, coef;
double *ptrx, *ptry, *ptrx2, *ptry2;
for (ord = 0; ord<dim; ord++)
{
//2 Hacemos diagonal de x =1
int i2;
if (fabs(x[ord][ord])<1e-15) //If that element is 0, a line that contains a non zero is added
{
for (i2 = ord + 1; i2<dim; i2++)
{
if (fabs(x[i2][ord])>1e-15) break;
}
if (i2 >= dim)
return{};//error, returns null
for (i = 0; i<dim; i++)//added a line without 0
{
x[ord][i] += x[i2][i];
y[ord][i] += y[i2][i];
}
}
diagon = 1.0/x[ord][ord];
ptry = &y[ord][0];
ptrx = &x[ord][0];
for (i = 0; i < dim; i++)
{
*ptry++ *= diagon;
*ptrx++ *= diagon;
}
//uses the same function but not threaded:
colum_zero(x,y,0,dim,dim,ord);
}//end ord
return y;
}
//threaded version
vector< vector<double> > inverse_th(vector< vector<double> > x)
{
if (x.size() != x[0].size())
{
cout << "ERROR on inverse() not square array" << endl; getchar(); return{};//returns a null
}
int dim = (int) x.size();
int i, ord;
vector< vector<double> > y(dim, vector<double>(dim, 0));//initializes output = 0
//init_2Dvector(y, dim, dim);
//1. Unity array y:
for (i = 0; i < dim; i++)
{
y[i][i] = 1.0;
}
std::thread tarea[NUCLEOS];
double diagon;
double *ptrx, *ptry;// , *ptrx2, *ptry2;
for (ord = 0; ord<dim; ord++)
{
//2 Hacemos diagonal de x =1
int i2;
if (fabs(x[ord][ord])<1e-15) //If a diagonal element=0 it is added a column that is not 0 the diagonal element
{
for (i2 = ord + 1; i2<dim; i2++)
{
if (fabs(x[i2][ord])>1e-15) break;
}
if (i2 >= dim)
return{};//error, returns null
for (i = 0; i<dim; i++)//It is looked for a line without zero to be added to make the number a non zero one to avoid later divide by 0
{
x[ord][i] += x[i2][i];
y[ord][i] += y[i2][i];
}
}
diagon = 1.0 / x[ord][ord];
ptry = &y[ord][0];
ptrx = &x[ord][0];
for (i = 0; i < dim; i++)
{
*ptry++ *= diagon;
*ptrx++ *= diagon;
}
int pos0 = 0, N1 = dim;//initial array position
if ((N1<1) || (N1>5000))
{
cout << "It is detected out than 1-5000 simulations points=" << N1 << " ABORT or press enter to continue" << endl; getchar();
}
//cout << "Initiation of " << NUCLEOS << " threads" << endl;
for (int thread = 0; thread<NUCLEOS; thread++)
{
int pos1 = (int)((thread + 1)*N1 / NUCLEOS);//next position
tarea[thread] = std::thread(colum_zero, std::ref(x), std::ref(y), pos0, pos1, dim, ord);//ojo, coil current=1!!!!!!!!!!!!!!!!!!
pos0 = pos1;//next thread will work at next point
}
for (int thread = 0; thread<NUCLEOS; thread++)
{
tarea[thread].join();
//cout << "Thread num: " << thread << " end\n";
}
}//end ord
return y;
}
//makes columns 0
void colum_zero(vector< vector<double> > &x, vector< vector<double> > &y, int pos0, int pos1,int dim, int ord)
{
double coef;
double *ptrx, *ptry, *ptrx2, *ptry2;
//Hacemos '0' la columna ord salvo elemento diagonal:
for (int i = pos0; i<pos1; i++)//Begin to end for every thread
{
if (i == ord) continue;
coef = x[i][ord];//element to make 0
if (fabs(coef)<1e-15) continue; //If already zero, it is avoided
ptry = &y[i][0];
ptry2 = &y[ord][0];
ptrx = &x[i][0];
ptrx2 = &x[ord][0];
for (int j = 0; j < dim; j++)
{
*ptry++ = *ptry - coef * (*ptry2++);//1ª matriz
*ptrx++ = *ptrx - coef * (*ptrx2++);//2ª matriz
}
}
}
void test_6_inverse(int dim)
{
vector< vector<double> > vec1(dim, vector<double>(dim));
for (int i=0;i<dim;i++)
for (int j = 0; j < dim; j++)
{
vec1[i][j] = (-1.0 + 2.0*rand() / RAND_MAX) * 10000;
}
vector< vector<double> > vec2,vec3;
double ini, end;
ini = (double)clock();
vec2 = inverse(vec1);
end = (double)clock();
cout << "=== Time inverse unthreaded=" << (end - ini) / CLOCKS_PER_SEC << endl;
ini=end;
vec3 = inverse_th(vec1);
end = (double)clock();
cout << "=== Time inverse threaded=" << (end - ini) / CLOCKS_PER_SEC << endl;
cout<<vec2[2][2]<<" "<<vec3[2][2]<<endl;//to make the sw to do de inverse
cout << endl;
}
int main()
{
test_6_inverse(1000);
cout << endl << "=== END ===" << endl; getchar();
return 1;
}
After looking deeper in the code of the colum_zero() function I have seen that one thread rewrites in the data to be used by another threads, so the threads are not INDEPENDENT from each other. Fortunately the compiler detect it and avoid it.
Conclusions:
It is not recommended to try Gauss-Jordan method alone to make multithreads
If somebody detects that in multithread is slower and the initial function is spreaded correctly for every thread, perhaps is due one thread results are used by another
The main function inverse() works and can be used by other programmers, so this question should not be deleted
Non answered question:
What is a matrix inverse method that could be spreaded in a lot of independent threads to be used in a gpu?
I'm currently studying for an exam and I'm trying to deal with dynamical matrix. I've come across a problem regarding calculating the sum of every diagonal of a matrix whose values and size are chosen by the user.
The intent of my program is to print, thanks to a function, whose parameters are the matrix and its size, the value of every diagonal sum. I'll show you the code and describe it in depth.
----------------
| 52 | 35 | 5 | Example of matrix.
---------------- Imagine the first diagonal to be the one which goes right-to-left
| 2 | 71 | 1 | and only consists in the number "47".
---------------- The second diagonal would be the one which goes right-to-left and
| 3 | 60 | 25 | consists in the number "15" and "79".
---------------- So to get the sum of the second diagonal it would be:
| 79 | 55 | 98 |
---------------- sum = m[n_rows - 1][diag - 2] + m[n_rows - 2][diag - 1]
| 47 | 15 | 66 |
---------------- When diag > columns, in order to avoid error regarding matrix size,
I should lower the quantity "n_rows - 1" by the quantity "diag - n_columns".
This is what I thought to do, according to my description:
void diag_matrix(int** m, int righe, int colonne){//righe = rows, colonne = columns.
//M is the matrix.
// diag is the number of the diagonal I'm considering.
for(int diag = 1; diag < (righe + colonne); diag++){
int sum = 0;// the sum
int i = 0;// the counter of the cicle
int l = 0;// this is the value to riallign the row in case diag > column
int temp = diag;//I use this variable not to modify the value of diag.
// What I want is: when the column-index/row-index of the matrix reaches 0, the cicle will interrupt (after final iteration);
while(righe - i - l - 1 > 0 || diag - 1 - i > 0){
if (diag > colonne){//this condition changes l-value only if diag value is greater than column. Explanation outside the code
l = diag - colonne;//this is the value to subtract to row-index
temp = colonne;//this position is necessary to set column-index to its maxium.
}
sum = sum + m[righe - 1 - l - i][temp -1 - i];//pretty clear I think.
i++;//the i is incremented by one.
}// end of while-statement
cout << "Somma Diagonale " << diag << " = " << sum << ".\n";
}// end of for-statement
}//end of function declaration
Obviously it does not work, but I can't figure out the problem.
(There used to be a paragraph here, but on a second look, you didn’t make the mistake it was talking about.)
Since you didn’t post to Code Reviews, here’s a solution instead of a detailed code review. (If you want to make the original approach work, I’d suggest single-stepping through it in a debugger and checking where your variables first get the wrong value.) It’s got a lot of boilerplate to make it compile and run, but the part you’ll be most interested in is diag_sums() and its comments.
One idea here is to use OOP to automatically check the bounds of your array accesses. The latter is very important for catching off-by-one errors and the like. You can turn it off in production if you want, but you really don’t want to silence warnings when your program has a buffer overrun. Other optimizations here include locality of access for the data, and strength reduction on the operations: rather than check on each iteration whether we’ve hit the right edge and the bottom edge, we can simply calculate the length of each diagonal in advance.
Since the definition of diagonal number k of matrix a with M rows is equivalent to: all elements a[i][j] such that such that M - k = i - j, the algorithm ensures correctness by maintaining the invariant, which holds whenever we add 1 to both i and j, starting when either i or j is 0, and stopping whenever i = M or j = N, that is, traversing each step of the diagonal from the left or top edge to the right or bottom edge, whichever comes first.
#include <assert.h>
#include <iostream>
#include <stddef.h>
#include <stdlib.h>
#include <utility>
#include <vector>
using std::cin;
using std::cout;
template <typename T>
class matrix {
public:
matrix( const ptrdiff_t rows,
const ptrdiff_t cols,
std::vector<T>&& elems )
: rows_(rows), cols_(cols), elems_(elems)
{
assert( rows_ > 0 );
assert( cols_ > 0 );
assert( elems_.size() == static_cast<size_t>(rows_*cols_) );
}
matrix( const ptrdiff_t rows,
const ptrdiff_t cols,
const std::vector<T>& elems )
: matrix( rows, cols, std::move(std::vector<T>(elems)) )
{}
matrix( const matrix<T>& ) = default;
matrix( matrix<T>&& ) = default;
matrix& operator= ( const matrix<T>& ) = default;
matrix& operator= ( matrix<T>&& ) = default;
T& operator() ( const ptrdiff_t m, const ptrdiff_t n )
{
assert( m >= 0 && m < rows_ );
assert( n >= 0 && n < cols_ );
return elems_[static_cast<size_t>(m*cols_ + n)];
}
const T& operator() ( const ptrdiff_t m, const ptrdiff_t n ) const
{
/* Because this call does not modify any data, and the only reason the
* member function above cannot be const is that it returns a non-const
* reference to an element of elems, casting away the const qualifier
* internally and then returning a const reference is a safe way to
* re-use the code.
*/
matrix<T>& nonconst = *const_cast<matrix<T>*>(this);
return nonconst(m,n);
}
ptrdiff_t rows() const { return rows_; }
ptrdiff_t cols() const { return cols_; }
private:
ptrdiff_t rows_;
ptrdiff_t cols_;
std::vector<T> elems_;
};
template<typename T>
std::ostream& operator<< ( std::ostream& out, const matrix<T>& x )
/* Boilerplate to print a matrix. */
{
const ptrdiff_t m = x.rows(), n = x.cols();
for ( ptrdiff_t i = 0; i < m; ++i ) {
out << x(i,0);
for ( ptrdiff_t j = 1; j < n; ++j )
out << ' ' << x(i,j);
out << '\n';
} // end for
return out;
}
using elem_t = int;
std::vector<elem_t> diag_sums( const matrix<elem_t>& a )
/* Return a vector of all the diagonal sums of a.
*
* The first diagonal sum is a(rows-1,0)
* The second is a(rows-2,0) + a(rows-1,1)
* The third is a(rows-3,0) + a(rows-2,1) + a(rows-1,2)
* And so on. I.e., the kth diagonal is the sum of all elements a(i,j) such
* that i - j == rows - k.
*
* If a is a M×N matrix, there are M diagonals starting in column zero, and
* N-1 diagonals (excluding the one containing a(0,0) so we don't count it
* twice) starting in row 0. We process them bottom to top, then left to
* right.
*
* The number of elements in a diagonal starting at a(i,0) is min{M-i, N}. The
* number of elements in a diagonal starting at a(0,j) is min{M, N-j}. This is
* because a diagonal stops at either the bottom edge or the left edge of a.
*/
{
const ptrdiff_t m = a.rows(), n = a.cols();
std::vector<elem_t> result;
result.reserve( static_cast<size_t>(m + n - 1) );
for ( ptrdiff_t i = m-1; i > 0; --i ) {
elem_t sum = 0;
const ptrdiff_t nk = (m-i) < n ? (m-i) : n;
for ( ptrdiff_t k = 0; k < nk; ++k )
sum += a(i+k, k);
result.emplace_back(sum);
} // end for i
for ( ptrdiff_t j = 0; j < n; ++j ) {
elem_t sum = 0;
const ptrdiff_t nk = m < (n-j) ? m : (n-j);
for ( ptrdiff_t k = 0; k < nk; ++k )
sum += a(k, j+k);
result.emplace_back(sum);
} // end for j
return result;
}
matrix<elem_t> read_input_matrix( const int row, const int column )
/* Reads in row*column consecutive elements from cin and packs them into a
* matrix<elem_t>.
*/
{
assert(row > 0);
assert(column > 0);
const ptrdiff_t nelements = row*column;
assert(nelements > 0); // Check for overflow.
std::vector<elem_t> result;
result.reserve(static_cast<size_t>(nelements));
for ( ptrdiff_t i = nelements; i > 0; --i ) {
int x;
cin >> x;
assert(cin.good());
result.push_back(x);
}
return matrix<elem_t>( row,
column,
std::move(result) );
}
template<typename T>
bool print_sequence( const T& container )
/* Prints the contents of a container in the format
* "{47, 94, 124, 160, 148, 36, 5}".
*/
{
cout << "{";
if ( container.begin() != container.end() )
cout << *container.begin();
for ( auto it = container.begin() + 1; it < container.end(); ++it )
cout << ", " << *it;
cout << "}\n";
return cout.good();
}
/* A simple test driver that reads in the number of rows, the number of
* columns, and then row*columns int values, from standard input. It
* then passes the result to diag_matrix(), E.g.:
*
* 5 3
* 52 35 5
* 2 71 1
* 3 60 25
* 79 55 98
* 47 15 66
*/
int main()
{
int rows, columns;
cin >> rows;
cin >> columns;
assert(cin.good());
const matrix<elem_t> input_matrix = read_input_matrix( rows, columns );
// cout << input_matrix; // Instrumentation.
const std::vector<elem_t> sums = diag_sums(input_matrix);
print_sequence(sums);
return EXIT_SUCCESS;
}
You could also just do print_sequence(diag_sums(read_input_matrix( rows, columns ))).
You can simplify your code finding the starting position of each diagonal and then stepping through the matrix as long as the coordinates stay inside the matrix.
Something like this:
#include <iostream>
using namespace std;
void diag_matrix(int** m, int rows, int cols)
{
for (int diag = 1; diag < rows + cols; diag++)
{
int x, y;
if (diag < rows)
{
y = rows - diag;
x = 0;
}
else
{
y = 0;
x = diag - rows;
}
int sum = 0;
cout << "Summing diagonal #" << diag << ":";
while ((x < cols) && (y < rows))
{
sum += m[y][x];
cout << " " << m[y][x];
x++;
y++;
}
cout << " result: " << sum << "." << endl;
}
}
int main(int argc, char* argv[])
{
int rows = 5, cols = 3;
int **m = new int*[rows];
for (int i = 0; i < rows; i++)
m[i] = new int[cols];
m[0][0] = 52; m[0][1] = 35; m[0][2] = 5;
m[1][0] = 2; m[1][1] = 71; m[1][2] = 1;
m[2][0] = 3; m[2][1] = 60; m[2][2] = 25;
m[3][0] = 79; m[3][1] = 55; m[3][2] = 98;
m[4][0] = 47; m[4][1] = 15; m[4][2] = 66;
diag_matrix(m, rows, cols);
for (int i = 0; i < rows; i++)
delete[] m[i];
delete[] m;
return 0;
}
I am trying to write an efficient code to perform circular shift which I need to implement it on multiple times on big matrices during my data processing.
On my first trial, compiler throws some exception and it seems that I may be trying to access matrix element outside its size and I have no idea what is going on wrong.
1) I am also using Armadillo lib which has "mat" definition.
2) I intend to shift it by row and/ or column.
Here is my try:
#include "stdafx.h"
#include <vector>
#include <iostream>
#include "C:\Users\kumar\Documents\Visual Studio 2012\UserLibs\armadillo-3-910-0\include\armadillo"
#include <stdlib.h> /* srand, rand */
using namespace arma;
template<class ty>
void circshift(ty *out, const ty *in, int xdim, int ydim, int xshift, int yshift)
{
int iOutputInd, iInputInd, ii, jj;
for (int i =0; i < xdim; i++)
{
ii = (i + xshift) % xdim;
for (int j = 0; j < ydim; j++)
{
jj = (j + yshift) % ydim;
iOutputInd = ii * ydim + jj;
iInputInd = i * ydim + j;
std::cout << " iOutputInd --> " << iOutputInd << " ; iInputInd -->" << iInputInd << "\n";
out[iOutputInd] = in[iInputInd]; // EXCEPTION BEING THROWN HERE
}
}
}
int _tmain(int argc, _TCHAR* argv[])
{
//a = [1 2 3; 4 5 6; 7 8 9];
mat a, a_out; // "mat" defined in C++ lib Armadillo
a << 1 << 2 << 3 << endr
<< 4 << 5 << 6 << endr
<< 7 << 8 << 9 <<endr;
a.reshape(3,3);
//a.print();
a_out = a;
int xdim = 3; int ydim = 3; int xshift = 1; int yshift = 0;
circshift(&a_out, &a, xdim, ydim, xshift, yshift);
a_out.print();
return 0;
}
It compiles fine. However, when I try to run, Visual studio throws following error:
Unhandled exception at 0x3FF00000 in Circshift_Example.exe: 0xC0000005: Access violation (parameters: 0x00000008).
I get another error in visual studio console, which complains:
error: Mat::init(): requested size is too large
Update: FINAL SOLUTION
I am posting my code as it may be useful for some users.
Please note that I am using "Armadillo" library to create matrix. One can replace Armadillo "mat" class wwith their own matrix class.
Please up-vote if you use this code.
#include "stdafx.h"
#include "armadillo-3-910-0\include\armadillo"
using namespace arma;
template<class ty>
void circshift(ty& out, const ty& in, int xshift, int yshift)
{
int iOutputInd, iInputInd, ii, jj;
int ydim = in.n_cols;
int xdim = in.n_rows;
for (int j =0; j < ydim; j++)
{
jj = (j + yshift) % ydim;
if (jj <0) jj = jj + ydim;
for (int i = 0; i < xdim; i++)
{
ii = (i + xshift) % xdim;
if (ii <0) ii = ii + xdim;
out[jj * xdim + ii] = in[j * xdim + i];
}
}
}
int _tmain(int argc, _TCHAR* argv[])
{
//a = [1 2 3; 4 5 6; 7 8 9];
mat a, a_out;
a << 1 << 2 << 3 << endr
<< 4 << 5 << 6 << endr
<< 7 << 8 << 9 <<endr;
a.reshape(3,3);
a_out = a;
int xshift = 1; int yshift = 0;
circshift(a_out, a, xshift, yshift);
a_out.print();
xshift = 1; yshift = -1;
circshift(a_out, a, xshift, yshift);
a_out.print();
return 0;
}
The main error here is that you pass pointers to mat type objects to the circshift() function (the out and in argument, but then use these arguments as arrays to mat. The following line is not interpreted as you think
out[iOutputInd] = in[iInputInd];
because out and in are not mat objects. They are pointers to mat objects, so the compiler will interpret in and out as being pointer to arrays of mat and index these arrays, copying a non-existant mat from in[...] to another non-existant location.
One simple way to fix that is to use references instead of pointers to pass the mat objects, i.e.:
template<class ty> void circshift(ty& out, const ty& in, int xdim, int ydim, int xshift, int yshift)
{
...
}
and call it in _tmain using:
circshift(a_out, a, xdim, ydim, xshift, yshift);