I have an array a[10][50] and I created an array b[10][35] that stores the elements of a but I don't know how to create the array c[10][15] where the elements of array c are the elements left over from "a", can you help me?
int main(){
//Variables iniciales
float min = -32.768; //limite inferior
float max = 32.768; //limite superior
const int dim = 10; //dimension
const int N = 50; //poblacion inicial
//inicializar A
float a[dim][N];
for (int i = 0; i < dim; i++)
{
for (int j = 0; j < N; j++)
{
float aleatorio;
aleatorio = (float)rand() / (float)RAND_MAX;
a[i][j] = min + abs(max - min) * aleatorio;
cout << i<<" - " << j << ":" << a[i][j] << endl;
}
}
//Dividir A en presas y depredadores
float preys_rate[] = { 0.7,0.9 };
float aleatorio = (float)0 + rand() % 1 / (float)RAND_MAX;
float rate = preys_rate[0] + (preys_rate[1] - preys_rate[0]) * aleatorio;
float N_h = round(N * rate); //Numero de presas
float N_p = N - N_h; //Numero de depredadores
//Asignar los miembros de A a H
float H[dim][35] ;
float fitness_value_h[35];
for (int i = 0; i < dim; i++)
{
for (int j = 0; j < N_h; j++)
{
H[i][j] = a[i][j];
}
}
//Evaluar la funcion en el elemento N_h
for (int i = 0; i < N_h; i++)
{
fitness_value_h[i] = funcion_obj(H,35, 10 ,dim);
}
//Asignar los miembros restantes de A a P
float P[dim][15];
//float fitness_value_p[15];
//Si pertenecen a H[J], se ignoran
for (int i = 0; i < dim; i++)
{
for (int j = 35; j< N; j--)
{
if (H[i][j]=a[i][j])
{
}else{
P[i][j] = a[i][j];
}
}
}
}
As you can see, array a[10][50] has some random data, and array H[10][35] has the values of "a" and I need a third array (in this example, P) P[10][15] that has the other "a" elements
Why not create the new array and simply add the remaining elements at the end of whatever loop you're using? If you know exactly how long the arrays are supposed to be, you can do this very easily.
It would be much easier if you included code in your questions so that we could see where the issue is.
EDIT: You are doing the loop in a more complicated way than necessary. Never set the initial in in a for loop to be the number of repetitions and subtracting.
for (int j = 35; j < N; j--) //BAD
for (int j = 0; j <= 35; j++) //GOOD
To fix your issue, simply add a condition to your loop to do something past the 35th repitition.
//Asignar los miembros de A a H
float H[dim][35] ;
float fitness_value_h[35];
float K[dim][15] ;
float fitness_value_k[15];
for (int i = 0; i < dim; i++)
{
for (int j = 0; j < N_h; j++)
{
if (j < 35) {
H[i][j] = a[i][j];
}
else {
K[i][j] = a[i][j];
}
}
}
Related
I am using gcc compiler. (g++ -o test testfile.cpp)
I want to use Openacc to parallelize my code but I am a bit confused about using #pragma correctly.
Below is the part where I used parallelization.
Even after using Openacc the code is not faster than before.
I guess this is related to 'data-moving' thing.
So I think I need to use #pragma acc data copy here. But I am not sure how to use this properly.
Any help?
Thanks in advance.
#include <iostream>
#include <cstdio>
#include <chrono>
#include <vector>
#include <math.h> // power
#include <cmath> // abs
#include <fstream>
using namespace std;
using namespace chrono;
// Dynamically allocation with values(float)
void dallo_fn(float**** pMat, int Na, int Nd, int Ny) {
float*** Mat = new float** [Na];
for (int i = 0; i < Na; i++) {
Mat[i] = new float* [Nd];
for (int j = 0; j < Nd; j++) {
Mat[i][j] = new float[Ny];
fill_n(Mat[i][j], Ny, 1);
}
}
*pMat = Mat;
}
// Dynamically allocation without values(float)
void dallo_fn0(float**** pMat, int Na, int Nd, int Ny) {
float*** Mat = new float** [Na];
for (int i = 0; i < Na; i++) {
Mat[i] = new float* [Nd];
for (int j = 0; j < Nd; j++) {
Mat[i][j] = new float[Ny];
}
}
*pMat = Mat;
}
// Dynamically allocation without values(int)
void dallo_fn1(int**** pMat, int Na, int Nd, int Ny) {
int*** Mat = new int** [Na];
for (int i = 0; i < Na; i++) {
Mat[i] = new int* [Nd];
for (int j = 0; j < Nd; j++) {
Mat[i][j] = new int[Ny];
}
}
*pMat = Mat;
}
// Utility function
float utility(float a, float a_f, float d, float d_f, float y, double sig, double psi, double delta, double R) {
float C;
C = y + a - a_f / R - (d_f - (1 - delta) * d);
float result;
if (C > 0) {
result = 1 / (1 - 1 / sig) * pow(pow(C, psi) * pow(d_f, 1 - psi), (1 - 1 / sig));
}
else {
result = -999999;
}
return result;
}
int main()
{
float duration;
// Iteration Parameters
double tol = 0.000001;
int itmax = 200;
int H = 15;
// Model Parameters and utility function
double sig = 0.75;
double beta = 0.95;
double psi = 0.5;
double delta = 0.1;
double R = 1 / beta - 0.00215;
// =============== 2. Discretizing the state space =========================
// Size of arrays
const int Na = 1 * 91;
const int Nd = 1 * 71;
const int Ny = 3;
// Variables for discretization of state space
const float amin = -2;
const float amax = 7;
const float dmin = 0.01;
const float dmax = 7;
const float ymin = 0.5;
const float ymax = 1.5;
const float Ptrans[3] = { 0.2, 0.6, 0.2 };
// Discretization of state space
float ca = (amax - amin) / (Na - 1.0);
float cd = (dmax - dmin) / (Nd - 1.0);
float cy = (ymax - ymin) / (Ny - 1.0);
float* A = new float[Na];
float* Y = new float[Ny];
float* D = new float[Nd];
for (int i = 0; i < Na; i++) {
A[i] = amin + i * ca;
}
for (int i = 0; i < Nd; i++) {
D[i] = dmin + i * cd;
}
for (int i = 0; i < Ny; i++) {
Y[i] = ymin + i * cy;
}
// === 3. Initial guesses, Variable initialization and Transition matrix ===
// Initial guess for value function
float*** V;
dallo_fn(&V, Na, Nd, Ny);
float*** Vnew;
dallo_fn(&Vnew, Na, Nd, Ny);
// Initialization of other variables
float Val[Na][Nd];
float** Vfuture = new float* [Na];
for (int i = 0; i < Na; i++)
{
Vfuture[i] = new float[Nd];
}
float** temphoward = new float* [Na];
for (int i = 0; i < Na; i++)
{
temphoward[i] = new float[Nd];
}
float*** Vhoward;
dallo_fn0(&Vhoward, Na, Nd, Ny);
float*** tempdiff;
dallo_fn0(&tempdiff, Na, Nd, Ny);
int*** maxposition_a;
dallo_fn1(&maxposition_a, Na, Nd, Ny);
int*** maxposition_d;
dallo_fn1(&maxposition_d, Na, Nd, Ny);
float** mg_A_v = new float* [Na];
for (int i = 0; i < Na; i++)
{
mg_A_v[i] = new float[Nd];
}
for (int j = 0; j < Nd; j++) {
for (int i = 0; i < Na; i++) {
mg_A_v[i][j] = A[i];
}
}
float** mg_D_v = new float* [Na];
for (int i = 0; i < Na; i++)
{
mg_D_v[i] = new float[Nd];
}
for (int j = 0; j < Nd; j++) {
for (int i = 0; i < Na; i++) {
mg_D_v[i][j] = D[j];
}
}
float***** Uvec = new float**** [Na];
for (int i = 0; i < Na; i++) {
Uvec[i] = new float*** [Nd];
for (int j = 0; j < Nd; j++) {
Uvec[i][j] = new float** [Ny];
for (int k = 0; k < Ny; k++) {
Uvec[i][j][k] = new float* [Na];
for (int l = 0; l < Na; l++) {
Uvec[i][j][k][l] = new float[Nd];
}
}
}
}
for (int i = 0; i < Na; i++) {
for (int j = 0; j < Nd; j++) {
for (int k = 0; k < Ny; k++) {
for (int l = 0; l < Na; l++) {
for (int m = 0; m < Nd; m++) {
Uvec[i][j][k][l][m] = utility(A[i], mg_A_v[l][m], D[j], mg_D_v[l][m], Y[k], sig, psi, delta, R);
}
}
}
}
}
// Value function iteration
int it;
float dif;
float max;
it = 0;
dif = 1;
// ================ 4. Value function iteration ============================
while (dif >= tol && it <= itmax) {
system_clock::time_point start = system_clock::now();
it = it + 1;
// V = Vnew;
for (int i = 0; i < Na; i++) {
for (int j = 0; j < Nd; j++) {
for (int k = 0; k < Ny; k++) {
V[i][j][k] = Vnew[i][j][k];
}
}
}
for (int i = 0; i < Na; i++) {
for (int j = 0; j < Nd; j++) {
Vfuture[i][j] = 0;
for (int k = 0; k < Ny; k++) {
Vfuture[i][j] += beta * Ptrans[k] * Vnew[i][j][k]; // + beta * Ptrans[1] * Vnew[i][j][1] + beta * Ptrans[2] * Vnew[i][j][2]; // Why is this different from Vfuture[i][j] += beta * Vnew[i][j][k] * Ptrans[k]; with for k
}
}
}
#pragma acc kernels
for (int a = 0; a < Na; a++) {
for (int b = 0; b < Nd; b++) {
for (int c = 0; c < Ny; c++) {
max = -99999;
for (int d = 0; d < Na; d++) {
for (int e = 0; e < Nd; e++) {
Val[d][e] = Uvec[a][b][c][d][e] + Vfuture[d][e];
if (max < Val[d][e]) {
max = Val[d][e];
maxposition_a[a][b][c] = d;
maxposition_d[a][b][c] = e;
}
}
}
Vnew[a][b][c] = max;
}
}
}
// Howard improvement
for (int h = 0; h < H; h++) {
for (int i = 0; i < Na; i++) {
for (int j = 0; j < Nd; j++) {
for (int k = 0; k < Ny; k++) {
Vhoward[i][j][k] = Vnew[i][j][k];
}
}
}
for (int i = 0; i < Na; i++) {
for (int j = 0; j < Nd; j++) {
for (int k = 0; k < Ny; k++) {
temphoward[i][j] = beta * Vhoward[maxposition_a[i][j][k]][maxposition_d[i][j][k]][0] * Ptrans[0]
+ beta * Vhoward[maxposition_a[i][j][k]][maxposition_d[i][j][k]][1] * Ptrans[1]
+ beta * Vhoward[maxposition_a[i][j][k]][maxposition_d[i][j][k]][2] * Ptrans[2];
Vnew[i][j][k] = temphoward[i][j] + Uvec[i][j][k][maxposition_a[i][j][k]][maxposition_d[i][j][k]];
}
}
}
}
// Calculate Diff
dif = -100000;
for (int i = 0; i < Na; i++) {
for (int j = 0; j < Nd; j++) {
for (int k = 0; k < Ny; k++) {
tempdiff[i][j][k] = abs(V[i][j][k] - Vnew[i][j][k]);
if (tempdiff[i][j][k] > dif) {
dif = tempdiff[i][j][k];
}
}
}
}
system_clock::time_point end = system_clock::now();
std::chrono::duration<float> sec = end - start;
cout << dif << endl;
cout << it << endl;
cout << sec.count() << endl;
}
}
Are you using the flag to enable OpenACC, i.e. "-fopenacc"? If not the OpenACC directives will be ignored.
Note that you'll want to use a newer GNU version, 10.2 preferable, as GNU support for OpenACC has gotten better over the years. I believe their compiler loop dependency analysis is still lacking so will run "kernels" compute regions sequentially on the device. Hence, for now, you'll want to stick to using "parallel" regions. If you do really want to use "kernels", I'd suggest switching to the NVIDIA HPC compilers (full disclosure, I work for NVIDIA)
Now I think the initial problem is just that you're not enabling OpenACC and why it's the same speed. Actually here I'd expect this case to be extremely slow if you tried to offload it. Besides running the "kernels" region sequentially on the device, the data would need to be transferred back and forth between the host and device each timestep.
The optimal strategy is to have a data region outside of the while loop, use an "update" directive when needed to synchronize the device and host copies of the arrays, and then ensure all the compute has been offload to the device.
Since you didn't post a complete reproducer, I can't test this code and hence verify that it's correct. But to give you an idea of this strategy, I modified your code below:
#pragma acc enter data copyin(Vnew[:Na][:Nd][:Ny], Ptrans[:Ny]) \
create(Vfuture[:Na][Nd], V[:Na][:Nd][:Ny], maxposition_a[:Na][:Nd][:Ny], maxposition_b[:Na][:Nd][:Ny]) \
create(Vhoward[:Na][:Nd][:Ny]) // add others here as needed
while (dif >= tol && it <= itmax) {
system_clock::time_point start = system_clock::now();
it = it + 1;
// V = Vnew;
#pragma acc parallel loop collapse(3) default(present)
for (int i = 0; i < Na; i++) {
for (int j = 0; j < Nd; j++) {
for (int k = 0; k < Ny; k++) {
V[i][j][k] = Vnew[i][j][k];
}
}
}
#pragma acc parallel loop collapse(2) default(present)
for (int i = 0; i < Na; i++) {
for (int j = 0; j < Nd; j++) {
Vfuture[i][j] = 0;
for (int k = 0; k < Ny; k++) {
Vfuture[i][j] += beta * Ptrans[k] * Vnew[i][j][k]; // + beta * Ptrans[1] * Vnew[i][j][1] + beta * Ptrans[2] * Vnew[i][j][2]; // Why is this different from Vfuture[i][j] += beta * Vnew[i][j][k] * Ptrans[k]; with for k
}
}
}
#pragma acc parallel loop collapse(3) default(present)
for (int a = 0; a < Na; a++) {
for (int b = 0; b < Nd; b++) {
for (int c = 0; c < Ny; c++) {
max = -99999;
for (int d = 0; d < Na; d++) {
for (int e = 0; e < Nd; e++) {
Val[d][e] = Uvec[a][b][c][d][e] + Vfuture[d][e];
if (max < Val[d][e]) {
max = Val[d][e];
maxposition_a[a][b][c] = d;
maxposition_d[a][b][c] = e;
}
}
}
Vnew[a][b][c] = max;
}
}
}
// Howard improvement
for (int h = 0; h < H; h++) {
#pragma acc parallel loop collapse(3) default(present)
for (int i = 0; i < Na; i++) {
for (int j = 0; j < Nd; j++) {
for (int k = 0; k < Ny; k++) {
Vhoward[i][j][k] = Vnew[i][j][k];
}
}
}
#pragma acc parallel loop collapse(2) default(present)
for (int i = 0; i < Na; i++) {
for (int j = 0; j < Nd; j++) {
for (int k = 0; k < Ny; k++) {
// I'm unclear why your using a 2D array for temphoward. It's preventing
// parallelzation of the inner loop and could be replaced with a scalar.
temphoward[i][j] = beta * Vhoward[maxposition_a[i][j][k]][maxposition_d[i][j][k]][0] * Ptrans[0]
+ beta * Vhoward[maxposition_a[i][j][k]][maxposition_d[i][j][k]][1] * Ptrans[1]
+ beta * Vhoward[maxposition_a[i][j][k]][maxposition_d[i][j][k]][2] * Ptrans[2];
Vnew[i][j][k] = temphoward[i][j] + Uvec[i][j][k][maxposition_a[i][j][k]][maxposition_d[i][j][k]];
}
}
}
}
// Calculate Diff
dif = -100000;
#pragma acc parallel loop collapse(3) reduction(max:dif)
for (int i = 0; i < Na; i++) {
for (int j = 0; j < Nd; j++) {
for (int k = 0; k < Ny; k++) {
// Again, why aren't you using a scalar here for tempdiff?
tempdiff[i][j][k] = abs(V[i][j][k] - Vnew[i][j][k]);
if (tempdiff[i][j][k] > dif) {
dif = tempdiff[i][j][k];
}
}
}
}
system_clock::time_point end = system_clock::now();
std::chrono::duration<float> sec = end - start;
cout << dif << endl;
cout << it << endl;
cout << sec.count() << endl;
}
#pragma acc update self(Vnew[:Na][:Nd][:Ny])
for (int k = 0; k < Ny; k++) {
for (int i = 0; i < Na; i++) {
for (int j = 0; j < Nd; j++) {
cout << Vnew[i][j][k];
}
cout << '\n';
}
}
}
#pragma acc exit data delete(Vnew, Ptrans, Vfuture, V, maxposition_a, maxposition_b, Vhoward)
// add others here as needed
I am using Visual studio now and I have a question about an error message that I got.
It says 'std::bad_alloc at memory location'.
I guess this means that I don't have enough memory. I am using arrays for computation and I think the problem arises from the size of each array.
First I used stack instead of heap so I allocate every array dynamically. But still, I got the same error message.
If I reduce the size of the array, I could run this code but I need to use this with a bigger size of arrays.
If I use a better computer, can I run this code? I am not sure how to reduce the memory required to run this code.... should I use fewer arrays?
Below, I put the entire code just in case....
Thanks in advance.
include <iostream>
#include <cstdio>
#include <chrono>
#include <vector>
#include <math.h> // power
#include <cmath> // abs
#include <fstream>
#include <omp.h>
using namespace std;
using namespace chrono;
// Dynamically allocation with values(double)
void dallo_fn(double**** pMat, int Na, int Nd, int Ny) {
double*** Mat = new double** [Na];
for (int i = 0; i < Na; i++) {
Mat[i] = new double* [Nd];
for (int j = 0; j < Nd; j++) {
Mat[i][j] = new double[Ny];
fill_n(Mat[i][j], Ny, 1);
}
}
*pMat = Mat;
}
// Dynamically allocation without values(double)
void dallo_fn0(double**** pMat, int Na, int Nd, int Ny) {
double*** Mat = new double** [Na];
for (int i = 0; i < Na; i++) {
Mat[i] = new double* [Nd];
for (int j = 0; j < Nd; j++) {
Mat[i][j] = new double[Ny];
}
}
*pMat = Mat;
}
// Dynamically allocation without values(int)
void dallo_fn1(int**** pMat, int Na, int Nd, int Ny) {
int*** Mat = new int** [Na];
for (int i = 0; i < Na; i++) {
Mat[i] = new int* [Nd];
for (int j = 0; j < Nd; j++) {
Mat[i][j] = new int[Ny];
}
}
*pMat = Mat;
}
// Utility function
double utility(double a, double a_f, double d, double d_f, double y, double sig, double psi, double delta, double R) {
double C;
C = y + a - a_f / R - (d_f - (1 - delta) * d);
double result;
if (C > 0) {
result = 1 / (1 - 1 / sig) * pow(pow(C, psi) * pow(d_f, 1 - psi), (1 - 1 / sig));
}
else {
result = -999999;
}
return result;
}
int main()
{
#if defined _OPENMP
omp_set_num_threads(8);
#endif
double duration;
// Iteration Parameters
double tol = 0.000001;
double itmax = 200;
double H = 15;
// Model Parameters and utility function
double sig = 0.75;
double beta = 0.95;
double psi = 0.5;
double delta = 0.1;
double R = 1 / beta - 0.00215;
// =============== 2. Discretizing the state space =========================
// Size of arrays
const int Na = 2 * 91;
const int Nd = 1 * 71;
const int Ny = 3;
// Variables for discretization of state space
const double amin = -2;
const double amax = 7;
const double dmin = 0.01;
const double dmax = 7;
const double ymin = 0.5;
const double ymax = 1.5;
const double Ptrans[3] = { 0.2, 0.6, 0.2 };
// Discretization of state space
double ca = (amax - amin) / (Na - 1.0);
double cd = (dmax - dmin) / (Nd - 1.0);
double cy = (ymax - ymin) / (Ny - 1.0);
double* A = new double[Na];
double* Y = new double[Ny];
double* D = new double[Nd];
for (int i = 0; i < Na; i++) {
A[i] = amin + i * ca;
}
for (int i = 0; i < Nd; i++) {
D[i] = dmin + i * cd;
}
for (int i = 0; i < Ny; i++) {
Y[i] = ymin + i * cy;
}
// === 3. Initial guesses, Variable initialization and Transition matrix ===
// Initial guess for value function
double*** V;
dallo_fn(&V, Na, Nd, Ny);
double*** Vnew;
dallo_fn(&Vnew, Na, Nd, Ny);
// Initialization of other variables
double Val[Na][Nd];
double Vfuture[Na][Nd];
double** temphoward = new double* [Na];
for (int i = 0; i < Na; i++)
{
temphoward[i] = new double[Nd];
}
double*** Vhoward;
dallo_fn0(&Vhoward, Na, Nd, Ny);
double*** tempdiff;
dallo_fn0(&tempdiff, Na, Nd, Ny);
int*** maxposition_a;
dallo_fn1(&maxposition_a, Na, Nd, Ny);
int*** maxposition_d;
dallo_fn1(&maxposition_d, Na, Nd, Ny);
double** mg_A_v = new double* [Na];
for (int i = 0; i < Na; i++)
{
mg_A_v[i] = new double[Nd];
}
for (int j = 0; j < Nd; j++) {
for (int i = 0; i < Na; i++) {
mg_A_v[i][j] = A[i];
}
}
double** mg_D_v = new double* [Na];
for (int i = 0; i < Na; i++)
{
mg_D_v[i] = new double[Nd];
}
for (int j = 0; j < Nd; j++) {
for (int i = 0; i < Na; i++) {
mg_D_v[i][j] = D[j];
}
}
double***** Uvec = new double**** [Na];
for (int i = 0; i < Na; i++) {
Uvec[i] = new double*** [Nd];
for (int j = 0; j < Nd; j++) {
Uvec[i][j] = new double** [Ny];
for (int k = 0; k < Ny; k++) {
Uvec[i][j][k] = new double* [Na];
for (int l = 0; l < Na; l++) {
Uvec[i][j][k][l] = new double[Nd];
}
}
}
}
for (int i = 0; i < Na; i++) {
for (int j = 0; j < Nd; j++) {
for (int k = 0; k < Ny; k++) {
for (int l = 0; l < Na; l++) {
for (int m = 0; m < Nd; m++) {
Uvec[i][j][k][l][m] = utility(A[i], mg_A_v[l][m], D[j], mg_D_v[l][m], Y[k], sig, psi, delta, R);
}
}
}
}
}
// Value function iteration
int it;
double dif;
double max;
it = 0;
dif = 1;
// ================ 4. Value function iteration ============================
while (dif >= tol && it <= itmax) {
system_clock::time_point start = system_clock::now();
it = it + 1;
// V = Vnew;
for (int i = 0; i < Na; i++) {
for (int j = 0; j < Nd; j++) {
for (int k = 0; k < Ny; k++) {
V[i][j][k] = Vnew[i][j][k];
}
}
}
for (int i = 0; i < Na; i++) {
for (int j = 0; j < Nd; j++) {
Vfuture[i][j] = 0;
for (int k = 0; k < Ny; k++) {
Vfuture[i][j] += beta * Ptrans[k] * Vnew[i][j][k]; // + beta * Ptrans[1] * Vnew[i][j][1] + beta * Ptrans[2] * Vnew[i][j][2]; // Why is this different from Vfuture[i][j] += beta * Vnew[i][j][k] * Ptrans[k]; with for k
}
}
}
#pragma omp parallel for private(Val)
for (int a = 0; a < Na; a++) {
for (int b = 0; b < Nd; b++) {
for (int c = 0; c < Ny; c++) {
max = -99999;
for (int d = 0; d < Na; d++) {
for (int e = 0; e < Nd; e++) {
Val[d][e] = Uvec[a][b][c][d][e] + Vfuture[d][e];
if (max < Val[d][e]) {
max = Val[d][e];
maxposition_a[a][b][c] = d;
maxposition_d[a][b][c] = e;
}
}
}
Vnew[a][b][c] = max;
}
}
}
// Howard improvement
for (int h = 0; h < H; h++) {
for (int i = 0; i < Na; i++) {
for (int j = 0; j < Nd; j++) {
for (int k = 0; k < Ny; k++) {
Vhoward[i][j][k] = Vnew[i][j][k];
}
}
}
for (int i = 0; i < Na; i++) {
for (int j = 0; j < Nd; j++) {
for (int k = 0; k < Ny; k++) {
temphoward[i][j] = beta * Vhoward[maxposition_a[i][j][k]][maxposition_d[i][j][k]][0] * Ptrans[0]
+ beta * Vhoward[maxposition_a[i][j][k]][maxposition_d[i][j][k]][1] * Ptrans[1]
+ beta * Vhoward[maxposition_a[i][j][k]][maxposition_d[i][j][k]][2] * Ptrans[2];
Vnew[i][j][k] = temphoward[i][j] + Uvec[i][j][k][maxposition_a[i][j][k]][maxposition_d[i][j][k]];
}
}
}
}
// Calculate Diff
dif = -100000;
for (int i = 0; i < Na; i++) {
for (int j = 0; j < Nd; j++) {
for (int k = 0; k < Ny; k++) {
tempdiff[i][j][k] = abs(V[i][j][k] - Vnew[i][j][k]);
if (tempdiff[i][j][k] > dif) {
dif = tempdiff[i][j][k];
}
}
}
}
system_clock::time_point end = system_clock::now();
std::chrono::duration<double> sec = end - start;
cout << dif << endl;
cout << it << endl;
cout << sec.count() << endl;
}
for (int k = 0; k < Ny; k++) {
for (int i = 0; i < Na; i++) {
for (int j = 0; j < Nd; j++) {
cout << Vnew[i][j][k];
}
cout << '\n';
}
}
cout << omp_get_max_threads() << endl;
}
Yes, with enough memory you can run this code. I've tested it just now on my machine with 64GBs of RAM and it ran just fine.
One way to reduce the amount of memory required is to use floats instead of doubles, as they take up half the size.
Moreover, as others suggested in the comments already you could represent the multidimensional structures as 1d arrays and compute the indices into the array instead of actually having nested arrays. The benefit is that you get rid of a huge number of pointers.
The code is admittedly pretty memory hungry, but works on my 8GB machine. Please make sure you are targetting x64 platform, otherwise you reach the 4GB inherent limit.
I wrote the code according to the algorithm, but the result is incorrect. According to the algorithm, we must indicate the dimension of the matrix and manually fill in the main matrix A and vector B. We need to generate an LU matrix. It is generated, but with the wrong numbers. And in the end we have to get the vector X with solutions. And this is in windowed mode.
https://imgur.com/TSsjMXp
int N = 1; // matrix dimension
double R = 0;
typedef double Matrix [6][6];
typedef double Vec [6];
.
.
.
void Decomp (Matrix A, int N, int &Change)
{
int i, j, k ;
double R, L, U;
Change = 1;
R = Math::Abs(A[1][1]);
for(j=2; j<=N; j++)
if (Math::Abs(A[j][1])>= R)
{
Change = j;
R = Math::Abs(A[j][1]);
}
if (R<= 1E-7)
{
MessageBox::Show("The system is degenerate");
}
if (k!=1)
{
for(i=1; i<=N; i++)
{
R = A[Change][i];
A[Change][i] = A[1][i];
A[1][i] = R;
}
}
for(i=2; i<=N; i++)
A[1][i] = A[1][i]/A[1][1];
for(i=2; i<=N; i++)
{
for(k=i; k<=N; k++);
{
R = 0;
for ( j=1; j<=(i-1); j++)
R = R + A[k][j] * A[j][i];
A[k][i] = A[k][i] - R;
}
if (A[i][i]<= 1E-7)
{
MessageBox::Show("The system is degenerate[enter image description here][1]");
}
for(k = i+1; k<=N; k++)
{
R = 0;
for (j=1; j<=(i-1); j++)
R = R + A[i][j] * A[j][k];
A[i][k] = (A[i][k] - R) / A[i][i];
}
}
for (int i = 0; i < N; i++)
for (int j = 0; j < N; j++)
{
C_matrix_dgv->Rows[i]->Cells[j] -> Value = Convert::ToString(A[i+1][j+1]);
}
}
void Solve (Matrix A, Vec b, Vec x, int Change, int N)
{
int i = 0,j = 0;
double R;
if (Change!=1)
{
R = b[Change];
b[Change] = b[1];
b[1] = R;
}
b[1] = b[1]/A[1][1];
for(i=2; i<=N; i++)
{
R = 0;
for( j=1; j<=(i-1); j++)
R = R + A[i][j] * b[j];
b[i] = (b[i] - R) / A[i][i];
}
x[N] = b[N];
for( i=1; i<=(N-1); i++)
{
R = 0;
for(j = (N+1-i); j<=N; j++)
R = R + A[N - i][j] * x[j];
x[N - i] = b[N - i] - R;
}
}
int N = 1; // matrix dimension
If you use this in the rest of the code you cannot get correct results. The dimension of the matrix is 6x6. Use a std::array or std::vector so that you dont need to keep the size in a seperate variable.
Here is my driver
int square[2][2] = {
{1,2},
{3,4}
};
matrixMulti(square, 2);
Here is my function
void matrixMulti(const int a[][2], const int rows) {
int b[2][2];
for (int i = 0; i < 2; i++) {
for (int j = 0; j < 2; j++) {
for (int k = 0; k < 2; k++) {
b[i][j] += a[i][k] * a[k][j];
}
}
}
cout << "new matrix " << endl;
for (int i = 0; i < 2; i++) {
for (int j = 0; j < 2; j++) {
cout << b[i][j] << " ";
}
cout << endl;
}
}
The output is:
new matrix
-858993453 -858993450
-858993445 -858993438
I am confused as to why it is printing the memory addresses rather then the values stored in them and what can be done to get it to print the values rather than the memory address.
You wrote:
int b[2][2];
and then
b[i][j] += ...
Where did you initialize b ?
int b[2][2] = {};
or
int b[2][2] = {0,0};
Should do the job.
Garbage values are being printed, not the address of the array.
Here is why.
In this snippet of code
void matrixMulti(const int a[][2], const int rows) {
int b[2][2];
for (int i = 0; i < 2; i++) {
for (int j = 0; j < 2; j++) {
for (int k = 0; k < 2; k++) {
b[i][j] += a[i][k] * a[k][j];
}
}
}
You allocated space for int b[2][2], but you do not initialize it to a value. This would be fine except in the for-loop body.
b[i][j] += a[i][k] * a[k][j]; <=> b[i][j] = b[i][j] + a[i][k] * a[k][j];
you are referencing the value of b[i][j] before it is initialized. In other words you say b[i][j] is equal to b[i][j] + a[i][k] * a[k][j], but you can't do this as, the value of b[i][j] is unknown.
The solution to this is to initialize int b[2][2] = {{0,0},{0,0}};
I want make any matrix[n][n+1] become an upper triangular matrix[n][n+1].
I did this code but that causes a segmentation fault.
void diagonalizarMatriz(float** Matriz, int n){
for(int i = 0; i < n-1; i++)
for(int k = 0; k < n; k ++)
for(int j = n; j >= i; j++)
Matriz[k][j] = Matriz[k][j] - ((Matriz[k][i] * Matriz[i][j]) / Matriz[i][i]);
}
int main(){
float** Matriz = new float* [3];
for(int i = 0; i < 3 ; i++)
Matriz[i] = new float [4];
//test matrix
Matriz[0][0] = 1;
Matriz[0][1] = 4;
Matriz[0][2] = 52;
Matriz[0][3] = 57;
Matriz[1][0] = -27;
Matriz[1][1] = -110;
Matriz[1][2] = -3;
Matriz[1][3] = -134;
Matriz[2][0] = 22;
Matriz[2][1] = 2;
Matriz[2][2] = 14;
Matriz[2][3] = 38;
diagonalizarMatriz(Matriz, 3);
Here
for(int j = n; j >= i; j++)
you start with n at the upper border of the dimensions of your array and the count up,
very soon you therefor access beyond your array, which gets you a segfault if you are lucky.
At a guess you want
for(int j = n; j >= i; j--)
to count down.
UPDATE
Assuming your question is about diagonalizing a matrix (as Eugene claims in the comment section):
In addition to what Yunnosch pointed out, diagonalizing a matrix means that the matrix must be a square n x n. However, in main you're initializing it to be a 3 x 4 matrix.
Original code
float** Matriz = new float* [3];
for(int i = 0; i < 3 ; i++)
Matriz[i] = new float [4];
To get rid of the segfault, change the following part in main (set matrix to 3 x 3):
float** Matriz = new float* [3];
for(int i = 0; i < 3 ; i++)
Matriz[i] = new float [3];
//test matrix
Matriz[0][0] = 1;
Matriz[0][1] = 4;
Matriz[0][2] = 52;
Matriz[1][0] = -27;
Matriz[1][1] = -110;
Matriz[1][2] = -3;
Matriz[2][0] = 22;
Matriz[2][1] = 2;
Matriz[2][2] = 14;
Finally, to get the following matrix (lower-triangular):
1 0 0
4 5 0
7 8 9
Remove the equal sign from your third nested loop:
for(int j = n; j > i; j--)
I assume from here you can work your way through this, to make it an upper-triangular matrix.
OLD ANSWER
Try this:
int matrix[3][3] = {{1,2,3},{4,5,6},{7,8,9}};
int row = 3;
int col = 3;
int i, j;
for (i = 0; i < row; i++) {
for (j = 0; j < col; j++) {
if (i > j) {
cout << "0" << " ";
} else
cout << matrix[i][j] << " ";
}
cout << endl;
}
Will give you this matrix
1 2 3
0 5 6
0 0 9