I'm trying to run the following code on an Ubuntu machine:
/**
For compiling -->
export GOMP_CPU_AFFINITY='0,4,8,12,16,20,24,28,32,36,40,44,48,52,56,60,64,68,72,76,80,84,88,92,96,100,104,108,112,116,120,124,128,132,136,140,144,148,152,156,160,164,168,172,176,180,184,188,192,196,200,204,208,212,216,220,224,228,232,236,240,244,248,252,256,260,264,268'
export OMP_NUM_THREADS=68
set MKL_NUM_THREADS = 68
icc -std=gnu++98 -O3 -qopenmp -xhost -ansi-alias -ipo -AVX512 mkl_2d_heat_fftw_P.cpp -o mkl_2d_heat_fftw_P -lm -mkl
For running -->
* ./mkl_2d_heat_fftw_P N T numThreads
* Example: ./mkl_2d_heat_fftw_P 1000 100000 1
*/
#include <iostream>
#include <vector>
#include <algorithm>
#include <cstring>
#include <complex.h>
#include "mkl_service.h"
#include "mkl_dfti.h"
#include <string>
#include <cstdlib>
#include <cmath>
#include <ctime>
#include <sys/time.h>
#include <cstdio>
#include <omp.h>
// #include <cilk/cilk.h>
// #include <cilk/cilk_api.h>
// #include "cilktime.h"
#ifdef USE_PAPI
#include <papi.h>
#include "papilib.h"
#endif
#ifdef POLYBENCH
#include <polybench.h>
#endif
using namespace std;
typedef vector<double> vd;
typedef vector<vector<double> > vvd;
#define PB push_back
#define SZ(x) (int)x.size()
#define MAXN 8010
int T, N, N_THREADS;
const int BASE = 1024;
double a1[MAXN][MAXN], a2[MAXN][MAXN];
// double *forward_input_buffer, *backward_output_buffer;
// double complex *forward_output_buffer, *backward_input_buffer;
double complex *a_complex, *odd_mults, *input_complex;
double *mkl_forward_input_buffer, *mkl_backward_output_buffer;
double complex *mkl_forward_output_buffer, *mkl_backward_input_buffer;
template<class T> void out(const vector<T> &a) { cout<<"array: "; for (int i=0;i<SZ(a);i++) cout<<a[i]<<" "; cout<<endl; cout.flush(); }
long getTime(){
struct timeval tp;
gettimeofday(&tp, NULL);
long int ms = tp.tv_sec * 1000 + tp.tv_usec / 1000;
return ms;
}
void pad_matrix(vvd &v, int rows, int cols){
int pad_rows = rows - SZ(v);
int pad_cols = cols - SZ(v[0]);
for (int i = 0; i < pad_rows; i++)
v.PB(vd(SZ(v[0]), 0.0));
for (int i = 0; i < SZ(v); i++){
for (int j = 0; j < pad_cols; j++)
v[i].PB(0.0);
}
}
// Resizing matrices to [r1+r2, c1+c2] for circular convolution
void pad_vectors(vd &input, vd &formula)
{
int n = SZ(input);
vd tmp = vd(n*3, 0);
for (int i = 0; i < n; i++)
tmp[i] = tmp[n + i] = tmp[n + n + i] = input[i];
input = tmp;
int diff = abs(SZ(input) - SZ(formula));
for (int i = 0; i < diff; i++)
if (SZ(input) < SZ(formula))
input.PB(0.0);
else
formula.PB(0.0);
}
void print_matrix(vvd v, string msg){
cout << msg << ": " << endl;
for (int i = 0; i < SZ(v); i++){
for (int j = 0; j < SZ(v[i]); j++)
cout << v[i][j] << " ";
cout << endl;
}
cout << endl;
}
void print_matrix_arr(double *v, int n, string msg){
cout << msg << ": " << endl;
for (int i = 0; i < n; i++){
for (int j = 0; j < n; j++)
cout << v[i*n + j] << " ";
cout << endl;
}
cout << endl;
}
void print_complex_matrix(double complex* input_buffer1, double complex* input_buffer2, int n, string msg){
cout << msg << ": " << endl;
for (int i = 0; i < n * n; i++){
if (i % n == 0)
cout << endl;
printf("ratio:%f\t%f%+fi\t \t%f%+fi\n", crealf(input_buffer1[i])/crealf(input_buffer2[i]), crealf(input_buffer1[i]), cimagf(input_buffer1[i]), crealf(input_buffer2[i]), cimagf(input_buffer2[i]));
// cout << (*input_buffer[i]).real() << " " << (*input_buffer[i]).imag() << ",\t";
}
cout << endl;
}
void print_vector(vd v, string msg){
cout << msg << ": ";
for (int i = 0; i < SZ(v); i++)
cout << v[i] << " ";
cout << endl;
}
// fftw_plan plan_forward, plan_backward;
DFTI_DESCRIPTOR_HANDLE my_desc1_handle = NULL, my_desc2_handle = NULL;
// double mkl_forward_input_buffer[MAXN * MAXN], mkl_backward_output_buffer[MAXN * MAXN];
// double complex mkl_forward_output_buffer[MAXN * MAXN], mkl_backward_input_buffer[MAXN * MAXN];
// DFT of real valued matrix. CAUTION: initialize the input array after creating the plan
void mkl_fft_forward(vvd &v, double complex *output_buffer, int n)
{
int sz_i = SZ(v), sz_j = SZ(v[0]);
#pragma omp parallel for
for (int i = 0; i < sz_i; i++)
for (int j = 0; j < sz_j; j++){
mkl_forward_input_buffer[i*n + j] = v[i][j];
}
#pragma omp parallel for
for (int i = 0; i < n; i++)
for (int j = 0; j < n; j++)
mkl_forward_output_buffer[i*n + j] = 0.0;
// print_matrix_arr(mkl_forward_input_buffer, n, "input bufer");
DftiComputeForward(my_desc1_handle, mkl_forward_input_buffer, mkl_forward_output_buffer);
#pragma omp parallel for
for (int i = 0; i < n; i++){
for (int j = 0; j < n; j++){
output_buffer[i*n + j] = mkl_forward_output_buffer[i*n + j];
// printf("%f+%f\n", crealf(output_buffer[i*n + j]), cimagf(output_buffer[i*n+j]));
}
}
}
// Inverse DFT of complex input array
void mkl_fft_backward(double complex* input_buffer, vvd &output, int n)
{
#pragma omp parallel for
for (int i = 0; i < n; i++)
for (int j = 0; j < n; j++)
{
mkl_backward_input_buffer[i*n + j] = input_buffer[i*n + j];
mkl_backward_output_buffer[i*n + j] = 0.0;
}
DftiComputeBackward(my_desc2_handle, mkl_backward_input_buffer, mkl_backward_output_buffer);
#pragma omp parallel for
for (int i = 0; i < n; i++)
for (int j = 0; j < n; j++)
output[i][j] = mkl_backward_output_buffer[i*n + j]/(n * n * 1.0);
}
// Takes two array(a_real and b_real) as input and writes the output to "res"
void convolution_fftw_2d(vvd &a_real, vvd &input, vvd &result)
{
if (T == 0)
return ;
int n_formula = N;
mkl_fft_forward(a_real, a_complex, n_formula);
// double complex* odd_mults = fftw_alloc_complex(n_formula * n_formula); // Do not need to allocate new space, we can use the space of a_complex or b_complex
bool is_initialized = false; // if odd_mult array is initialized
int t = T;
// ############# Repeated squaring - start ############
while (t > 1){
if (t & 1){
if (is_initialized == false){
#pragma omp parallel for
for(int i = 0; i < n_formula * n_formula; i++)
odd_mults[i] = a_complex[i];
is_initialized = true;
} else {
#pragma omp parallel for
for(int i = 0; i < n_formula * n_formula; i++)
odd_mults[i] = odd_mults[i] * a_complex[i];
}
}
#pragma omp parallel for
for(int i = 0; i < n_formula * n_formula; i++)
a_complex[i] = a_complex[i] * a_complex[i];
t /= 2;
}
if (is_initialized){
#pragma omp parallel for
for(int i = 0; i < n_formula * n_formula; i++)
a_complex[i] = a_complex[i] * odd_mults[i];
}
// ############# Repeated squaring - end ############
// while (--t > 0){
// cout << "t: " << t << endl;
// for(int i = 0; i < n_formula * n_formula; i++)
// pointwise_mult[i] = pointwise_mult[i] * a_complex[i];
// }
// fft_backward(a_complex, formula, n_formula);
// // Scale the output array according to number of samples
// #pragma omp parallel for
// for (int i = 0; i < SZ(formula); i++)
// for (int j = 0; j < SZ(formula[0]); j++){
// double r = formula[i][j] / (n_formula * n_formula);
// formula[i][j] = r;
// // formula[i][j] = (abs(r) < 1e-8? 0:r);
// }
// print_matrix(formula, "Formula");
// vvd input(N, vd(N, 0.0));
// #pragma omp parallel for
// for (int i = 0; i < N; i++)
// for (int j = 0; j < N; j++){
// input[i][j] = a1[i][j];
// }
// print_matrix(input, "Input");
// reverse(input.begin(), input.end());
// double complex* formula_complex = fftw_alloc_complex(N * N);
// fft_forward(formula, formula_complex, N);
// double complex* input_complex = fftw_alloc_complex(N * N);
mkl_fft_forward(input, input_complex, N);
// fft_forward(input, input_complex, N);
// double complex* result_complex = fftw_alloc_complex(n * n); // Do not need to allocate new space, we can use the space of a_complex or b_complex
#pragma omp parallel for
for (int i = 0; i < N * N; i++){
a_complex[i] = input_complex[i] * a_complex[i];
}
mkl_fft_backward(a_complex, result, N);
// fft_backward(a_complex, result, N);
// print_matrix(result, "Result (needs to be rotated)");
return ;
}
void mkl_init(int n)
{
MKL_LONG status;
MKL_LONG len[2] = {n, n};
len[0] = n; len[1] = n;
status = DftiCreateDescriptor(&my_desc1_handle, DFTI_DOUBLE, DFTI_REAL, 2, len);
status = DftiSetValue(my_desc1_handle, DFTI_PLACEMENT, DFTI_NOT_INPLACE);
status = DftiSetValue(my_desc1_handle, DFTI_CONJUGATE_EVEN_STORAGE, DFTI_COMPLEX_COMPLEX);
status = DftiSetValue( my_desc1_handle, DFTI_PACKED_FORMAT, DFTI_CCE_FORMAT );
status = DftiCommitDescriptor(my_desc1_handle);
status = DftiCreateDescriptor(&my_desc2_handle, DFTI_DOUBLE, DFTI_REAL, 2, len);
status = DftiSetValue(my_desc2_handle, DFTI_CONJUGATE_EVEN_STORAGE, DFTI_COMPLEX_COMPLEX);
status = DftiSetValue(my_desc2_handle, DFTI_PLACEMENT, DFTI_NOT_INPLACE);
status = DftiSetValue( my_desc2_handle, DFTI_PACKED_FORMAT, DFTI_CCE_FORMAT );
status = DftiCommitDescriptor(my_desc2_handle);
}
void initialize(){
mkl_init(N);
// forward_input_buffer = fftw_alloc_real(N * N);
// backward_output_buffer = fftw_alloc_real(N * N);
// forward_output_buffer = fftw_alloc_complex(N * N);
// backward_input_buffer = fftw_alloc_complex(N * N);
a_complex = (double complex *)malloc(sizeof(double complex) * N * N); //fftw_alloc_complex(N);
odd_mults = (double complex *)malloc(sizeof(double complex) * N * N); //fftw_alloc_complex(N);
input_complex = (double complex *)malloc(sizeof(double complex) * N * N); //fftw_alloc_complex(N);
mkl_forward_input_buffer = (double *)malloc(sizeof(double) * N * N);
mkl_backward_output_buffer = (double *)malloc(sizeof(double) * N * N);
mkl_forward_output_buffer = (double complex *)malloc(sizeof(double complex) * N * N);
mkl_backward_input_buffer = (double complex *)malloc(sizeof(double complex) * N * N);
for (int i = 0; i < N+2; ++i)
for (int j = 0; j < N+2; j++)
a1[i][j] = a2[i][j] = 1.0 * (rand() % BASE);
}
void mkl_destroy(){
MKL_LONG status;
status = DftiFreeDescriptor(&my_desc1_handle);
status = DftiFreeDescriptor(&my_desc2_handle);
free(a_complex);
free(odd_mults);
free(input_complex);
free(mkl_forward_input_buffer);
free(mkl_backward_output_buffer);
free(mkl_forward_output_buffer);
free(mkl_backward_input_buffer);
}
#define getIdx(i, N) ((i + N) % N)
bool verify(vvd result){
for (int t = 0; t < T; ++t) {
// cout << "t: " << t << endl;
for (int i = 0; i < N; ++i)
for (int j = 0; j < N; j++){
// a2[i] = 0.125 * (a1[i+1] - 2.0 * a1[i] + a1[i-1]);
// cout << i << " " << j << " : " << getIdx(i -1, N) << " " << getIdx(i + 1, N) << " " << getIdx(j - 1, N) << " " << getIdx(j + 1, N) << endl;
// a2[i][j] = a1[getIdx(i - 1, N)][getIdx(j, N)] + a1[getIdx(i, N)][getIdx(j + 1, N)]
// + a1[getIdx(i + 1, N)][getIdx(j, N)] + a1[getIdx(i, N)][getIdx(j - 1, N)];
a2[i][j] = 0.125*a1[getIdx(i - 1, N)][getIdx(j, N)] + 0.125*a1[getIdx(i, N)][getIdx(j + 1, N)]
+ 0.125*a1[getIdx(i + 1, N)][getIdx(j, N)] + 0.125*a1[getIdx(i, N)][getIdx(j - 1, N)]
+ (-2.0*(0.125*2.0) + 1.0)*a1[i][j];
}
for (int i = 0; i < N; ++i)
for (int j = 0; j < N; j++)
a1[i][j] = a2[i][j];
}
// cout << "Final Answer (iter): ";
// for (int i = 0; i < N; i++){
// for (int j = 0; j < N; j++)
// cout << a1[i][j] << " ";
// cout << endl;
// }
// cout << endl;
int cnt = 0;
for (int i = 0; i < N; i++)
for (int j = 0; j < N; j++)
if (fabs (a1[i][j] - result[i][j]) > 1e-8)
cnt++;
cout << "Number of Mismatched Cell: " << cnt << endl;
return 0;
}
int main(int argc, char *argv[])
{
double x;
int t, n, numThreads;
// vvd a, b;
if (argc < 4){
cout << "Enter: N T numThreads" << endl;
return 1;
}
if (argc > 1){
n = atoi(argv[1]);
}
if (argc > 2)
t = atoi(argv[2]);
numThreads = 1;
if (argc > 3){
numThreads = atoi(argv[3]);
omp_set_num_threads(numThreads);
}
N = n; T = t; N_THREADS = numThreads;
initialize();
#ifdef USE_PAPI
papi_init();
#endif
int sz_formula = 3;
// double formula[3][3] = {{0, 1, 0},
// {1, 0, 1},
// {0, 1, 0}};
double formula[3][3] = {{0, 0.125, 0},
{0.125, (-2.0*(0.125*2.0) + 1.0), 0.125},
{0, 0.125, 0}};
// double formula[3][3] = {{1, 0, 1},
// {0, 0, 0},
// {0, 0, 0}};
vvd a(sz_formula, vd(sz_formula));
for (int i = 0; i < sz_formula; i++)
for (int j = 0; j < sz_formula; j++)
a[i][j] = formula[i][j];
vvd input(n, vd(n)), result(n, vd(n,0.0));
for (int i = 0; i < n; i++)
for (int j = 0; j < n; j++)
input[i][j] = a1[i][j];
long start = getTime();
#ifdef POLYBENCH
/* Start timer. */
polybench_start_instruments;
#endif
convolution_fftw_2d(a, input, result);
// Result must be rotated (T mod N) indices
#ifdef POLYBENCH
/* Stop and print timer. */
polybench_stop_instruments;
polybench_print_instruments;
#endif
long end = getTime();
vvd rotated_result(n, vd(n, 0.0));
int k = 0;
for (int i = 0; i < n; i++)
for (int j = 0; j < n; j++)
rotated_result[i][j] = result[(i+(t%n)) % n][(j+(t%n)) % n];
// print_matrix(rotated_result, "rotated");
cout << N << "," << T << "," << numThreads << "," << (end - start) / 1000.0 << endl;
mkl_destroy();
#ifdef USE_PAPI
countTotalMiss(p);
PAPI_shutdown();
delete threadcounter;
for (int i = 0; i < p; i++) delete l2miss[i];
delete l2miss;
delete errstring;
delete EventSet;
delete eventCode;
#endif
long start_iter = getTime();
// verify(rotated_result);
long end_iter = getTime();
// cout << "Time (Iter): " << end_iter - start_iter << endl;
return 0;
}
I'm pretty new to running C++ overall (I'm most familiar with Python, which is an interpreted language), so I'm not sure how it all works. This was pre-made code that I have to test before I make my own. I'm on Ubuntu 20.04 and to begin with, I'm not sure how to work everything. I have Visual Studio Code installed, and I'm trying to run the command icc -std=gnu++98 -O3 -qopenmp -xhost -ansi-alias -ipo -AVX512 mkl_2d_heat_fftw_P.cpp -o mkl_2d_heat_fftw_P -lm -mkl through the terminal. I'm not even sure if this is right, but I'm getting the following error messages:
ipo: warning #11021 (6 times): unresolved DftiFreeDescriptor, DftiCommitDescriptor, DftiSetValue, DftiCreateDescriptor_d_md, DftiComputeBackward, DftiComputeForward
ld: cannot find (3 times): -lmkl_intel_lp64, -lmkl_intel_thread, -lmkl_core.
These are eventually meant to run on supercomputers at my institution, but is the compiling command wrong for my local computer? If it helps, I'm running this on a VirtualBox on Windows 10 Pro (I tried running Ubuntu 20.04 LTS on Hyper-V but I never figured out how to successfully connect it to the Internet because whenever I made the VM's switch my WiFi one, it made my computer overall have no Internet access).
Looking at it, it seems like the Math Kernel Library is the problem, though I'm not too sure. Did I mess up the installation of the oneAPI Base Toolkit? I have every oneAPI Toolkit installed (and all of their features too, including FPGA support).
Or am I just compiling it wrong as the script described listed in the top comment is for a supercomputer (I'm not sure what to do with the first three lines, like export GOMP_CPU_AFFINITY, export OMP_NUM_THREADS, and set MKL_NUM_THREADS).
Thanks for any help in advance!
this is the code that is have i am trying to get the take the for loops that i have and condense them
into a loop but i am not sure how. If there is anyone that can help me out i would appreciate it. The fist set of for loops are taking the inverse and then it is muiltiplying through the positions of the array. Right now the for loops are going through on position for each loop and i know there is a better way but i cant think of how to do it.
using namespace std;
#include<iostream>
#include<fstream>
#include<iomanip>
#include<Windows.h>
// User-defined function declarations (prototypes)
void readit();
void calcit(float[5][6], float[5][6]);
void writeit(float [5][6], float[5][6], float[5]);
// Declaration and definition of the main()
int main()
{
readit();
return 0;
}
void readit()
{
// Local variable declarations
float origarray[5][6], reducedarray[5][6];
// Filestream declaration and error trap
ifstream infile("C:\\EGR111\\rowechelondata.txt");
if(!infile)
{
cout << "There is no file, or the filestream is corrupted. Correct the problem and "
<< "try again!";
Sleep(2000);
exit(0);
}
// File read. 'i' is row index and 'j' is column index.
for(int i = 0; i < 5; i++)
{
for(int j = 0; j < 6; j++)
{
infile >> origarray[i][j];
reducedarray[i][j] = origarray[i][j];
}
}
calcit(origarray, reducedarray);
}
void calcit(float origarray[5][6], float reducedarray[5][6])
{
// Local variable declaration
float roots[5] = {};
cout << setprecision(4) << endl;
// Multiply first row by its leading coefficient, such that the result is '1'
for(int i = 0; i < 1; i++)
{
for(int j = 0; j < 6; j++)
{
reducedarray[i][j] = origarray[i][j] * (1.0 / origarray[0][0]);
}
}
// Multiply second row by its leading coefficient, such that the result is '1'
for(int i = 1; i < 2; i++)
{
for(int j = 0; j < 6; j++)
{
reducedarray[i][j] = origarray[i][j] * (1.0 / origarray[1][0]);
}
}
// Multiply third row by its leading coefficient, such that the result is '1'
for(int i = 2; i < 3; i++)
{
for(int j = 0; j < 6; j++)
{
reducedarray[i][j] = origarray[i][j] * (1.0 / origarray[2][0]);
}
}
// Multiply fourth row by its leading coefficient, such that the result is '1'
for(int i = 3; i < 4; i++)
{
for(int j = 0; j < 6; j++)
{
reducedarray[i][j] = origarray[i][j] * (1.0 / origarray[3][0]);
}
}
// Multiply fifth row by its leading coefficient, such that the result is '1'
for(int i = 4; i < 5; i++)
{
for(int j = 0; j < 6; j++)
{
reducedarray[i][j] = origarray[i][j] * (1.0 / origarray[4][0]);
}
}
// Subtract the first row of the reduced array into the subsequent rows
for(int i = 1; i < 5; i++)
{
for(int j = 0; j < 6; j++)
{
reducedarray[i][j] = reducedarray[i][j] - reducedarray[0][j];
}
}
// Multiply array position [1][1] by its leading coefficient, such that the result is '1'
for(int i = 1; i < 2; i++)
{
for(int j = 1; j < 6; j++)
{
reducedarray[i][j] = reducedarray[i][j] * (1.0 / reducedarray[1][1]);
}
}
// Multiply array position [2][1] by its leading coefficient, such that the result is '1'
for(int i = 2; i < 3; i++)
{
for(int j = 1; j < 6; j++)
{
reducedarray[i][j] = reducedarray[i][j] * (1.0 / reducedarray[2][1]);
}
}
// Multiply array position [3][1] by its leading coefficient, such that the result is '1'
for(int i = 3; i < 4; i++)
{
for(int j = 1; j < 6; j++)
{
reducedarray[i][j] = reducedarray[i][j] * (1.0 / reducedarray[3][1]);
}
}
// Multiply array position [4][1] by its leading coefficient, such that the result is '1'
for(int i = 4; i < 5; i++)
{
for(int j = 1; j < 6; j++)
{
reducedarray[i][j] = reducedarray[i][j] * (1.0 / reducedarray[4][1]);
}
}
// Subtract the second row of the reduced array into the subsequent rows
for(int i = 2; i < 5; i++)
{
for(int j = 1; j < 6; j++)
{
reducedarray[i][j] = reducedarray[i][j] - reducedarray[1][j];
}
}
// Multiply array position [2][2] by its leading coefficient, such that the result is '1'
for(int i = 2; i < 3; i++)
{
for(int j = 2; j < 6; j++)
{
reducedarray[i][j] = reducedarray[i][j] * (1.0 / reducedarray[2][2]);
}
}
// Multiply array position [3][2] by its leading coefficient, such that the result is '1'
for(int i = 3; i < 4; i++)
{
for(int j = 2; j < 6; j++)
{
reducedarray[i][j] = reducedarray[i][j] * (1.0 / reducedarray[3][2]);
}
}
// Multiply array position [4][2] by its leading coefficient, such that the result is '1'
for(int i = 4; i < 5; i++)
{
for(int j = 2; j < 6; j++)
{
reducedarray[i][j] = reducedarray[i][j] * (1.0 / reducedarray[4][2]);
}
}
// Subtract the third row of the reduced array into the subsequent rows
for(int i = 3; i < 5; i++)
{
for(int j = 2; j < 6; j++)
{
reducedarray[i][j] = reducedarray[i][j] - reducedarray[2][j];
}
}
// Multiply array position [3][3] by its leading coefficient, such that the result is '1'
for(int i = 3; i < 4; i++)
{
for(int j = 3; j < 6; j++)
{
reducedarray[i][j] = reducedarray[i][j] * (1.0 / reducedarray[3][3]);
}
}
// Multiply array position [4][3] by its leading coefficient, such that the result is '1'
for(int i = 4; i < 5; i++)
{
for(int j = 3; j < 6; j++)
{
reducedarray[i][j] = reducedarray[i][j] * (1.0 / reducedarray[4][3]);
}
}
// Subtract the fourth row of the reduced array into the subsequent row
for(int i = 4; i < 5; i++)
{
for(int j = 3; j < 6; j++)
{
reducedarray[i][j] = reducedarray[i][j] - reducedarray[3][j];
}
}
// Multiply array position [4][4] by its leading coefficient, such that the result is '1'
for(int i = 4; i <= 4; i++)
{
for(int j = 4; j < 6; j++)
{
reducedarray[i][j] = reducedarray[i][j] * (1.0 / reducedarray[4][4]);
}
}
// Back solve to assign roots for each row
for(int i = 4; i >= 0; i--)
{
for(int j = 5; j >= 0; j--)
{
roots[i] = reducedarray[4][5];
roots[4] = roots[i];
}
}
for(int i = 3; i >= 0; i--)
{
for(int j = 5; j >= 0; j--)
{
roots[i] = reducedarray[3][5] - (reducedarray[3][4] * roots[4]);
roots[3] = roots[i];
}
}
for(int i = 2; i >= 0; i--)
{
for(int j = 5; j >= 0; j--)
{
roots[i] = reducedarray[2][5] - (reducedarray[2][4] * roots[4]) -
(reducedarray[2][3] * roots[3]);
roots[2] = roots[i];
}
}
for(int i = 1; i >= 0; i--)
{
for(int j = 5; j >= 0; j--)
{
roots[i] = reducedarray[1][5] - (reducedarray[1][4] * roots[4]) -
(reducedarray[1][3] * roots[3]) - (reducedarray[1][2] * roots[2]);
roots[1] = roots[i];
}
}
for(int i = 0; i >= 0; i--)
{
for(int j = 5; j >= 0; j--)
{
roots[i] = reducedarray[0][5] - (reducedarray[0][4] * roots[4]) -
(reducedarray[0][3] * roots[3]) - (reducedarray[0][2] * roots[2]) -
(reducedarray[0][1] * roots[1]);
roots[0] = roots[i];
}
}
writeit(origarray, reducedarray, roots);
}
void writeit(float origarray[5][6], float reducedarray[5][6], float roots[5])
{
cout << "The Original Array" << endl << endl;
for(int i = 0; i < 5; i++)
{
for(int j = 0; j < 6; j++)
{
cout << setw(6) << origarray[i][j] << setw(4) << "";
}
cout << endl << endl;
}
cout << endl << endl;
cout << "The Reduced Array" << endl << endl;
for(int i = 0; i < 5; i++)
{
for(int j = 0; j < 6; j++)
{
cout << setw(6) << reducedarray[i][j] << setw(4) << "";
}
cout << endl << endl;
}
cout << endl << endl;
cout << "The Roots of the equations are: " << endl << endl;
cout << setw(6) << "A = " << setw(6) << roots[0] << endl << endl;
cout << setw(6) << "B = " << setw(6) << roots[1] << endl << endl;
cout << setw(6) << "C = " << setw(6) << roots[2] << endl << endl;
cout << setw(6) << "D = " << setw(6) << roots[3] << endl << endl;
cout << setw(6) << "E = " << setw(6) << roots[4] << endl << endl;
cout << endl << endl;
}
Where m=number of columns in matrix and n=number of rows in matrix.
Just the needed segment:
void calcit(float origarray[5][6], float reducedarray[5][6])
{
float roots[5] = {};
int n=5,m=6;
for(int i=0;i<n;i++){
for(int j=0;j<m;j++){
reducedarray[i][j]=origarray[i][j]*(1.0/origarray[i][0]);
}
}
for(int i = 1; i < 5; i++){
for(int j = 0; j < 6; j++){
reducedarray[i][j] -= reducedarray[0][j];
}
}
int num=1;
for(int i=1;i<m;i++){
for(int j=num;j<n;j++){
for(int k=i;k<m;k++)
reducedarray[j][k] = reducedarray[j][k] * (1.0 / reducedarray[j][i]);
}
for(int j=num+1;j<n;j++){
for(int k=num;k<m;k++){
reducedarray[j][k] -= reducedarray[i][k];
}
}
num++;
}
for(int i=n-1;i>=0;i--){
float ans=reducedarray[i][m-1];
for(int j=m-2;j>=i+1;j--){
ans-=(reducedarray[i][j]*roots[j]);
}
roots[i]=ans;
}
writeit(origarray, reducedarray, roots);
}
Whole code:
#include<iostream>
#include<fstream>
#include<iomanip>
#include<Windows.h>
using namespace std;
// User-defined function declarations (prototypes)
void readit();
void calcit(float[5][6], float[5][6]);
void writeit(float [5][6], float[5][6], float[5]);
// Declaration and definition of the main()
int main()
{
readit();
return 0;
}
void readit()
{
// Local variable declarations
float origarray[5][6], reducedarray[5][6];
// Filestream declaration and error tra
cout << setprecision(4) << endl;
// File read. 'i' is row index and 'j' is column index.
for(int i = 0; i < 5; i++)
{
for(int j = 0; j < 6; j++)
{
cin >> origarray[i][j];
reducedarray[i][j] = origarray[i][j];
}
}
calcit(origarray, reducedarray);
}
void calcit(float origarray[5][6], float reducedarray[5][6])
{
float roots[5] = {};
int n=5,m=6;
for(int i=0;i<n;i++){
for(int j=0;j<m;j++){
reducedarray[i][j]=origarray[i][j]*(1.0/origarray[i][0]);
}
}
for(int i = 1; i < 5; i++){
for(int j = 0; j < 6; j++){
reducedarray[i][j] -= reducedarray[0][j];
}
}
int num=1;
for(int i=1;i<m;i++){
for(int j=num;j<n;j++){
for(int k=i;k<m;k++)
reducedarray[j][k] = reducedarray[j][k] * (1.0 / reducedarray[j][i]);
}
for(int j=num+1;j<n;j++){
for(int k=num;k<m;k++){
reducedarray[j][k] -= reducedarray[i][k];
}
}
num++;
}
for(int i=n-1;i>=0;i--){
float ans=reducedarray[i][m-1];
for(int j=m-2;j>=i+1;j--){
ans-=(reducedarray[i][j]*roots[j]);
}
roots[i]=ans;
}
writeit(origarray, reducedarray, roots);
}
void writeit(float origarray[5][6], float reducedarray[5][6], float roots[5])
{
cout << "The Original Array" << endl << endl;
for(int i = 0; i < 5; i++)
{
for(int j = 0; j < 6; j++)
{
cout << setw(6) << origarray[i][j] << setw(4) << "";
}
cout << endl << endl;
}
cout << endl << endl;
cout << "The Reduced Array" << endl << endl;
for(int i = 0; i < 5; i++)
{
for(int j = 0; j < 6; j++)
{
cout << setw(6) << reducedarray[i][j] << setw(4) << "";
}
cout << endl << endl;
}
cout << endl << endl;
cout << "The Roots of the equations are: " << endl << endl;
for(int i=0;i<5;i++){
cout<<(char)('A'+i)<<" = "<<setw(6)<<roots[i]<<endl;
}
}
Please, kindly, check if it is working. If it does not work, please notify me in the comments of this post.
Also, i'm not sure why are you using double endl (cout<<endl<<endl;),there is an alternative to this: cout<<"\n\n"; (note that this doesn't flush the output buffer)
I would like to kill two birds with one stone, as the questions are very similiar:
1:
I followed this code on github Smith Waterman Alignment to create the smith-waterman in C++. After some research I understood that implementing
double H[N_a+1][N_b+1]; is not possible (anymore) for the "newer" C++ versions. So to create a constant variable I changed this line to:
double **H = new double*[nReal + 1];
for (int i = 0; i < nReal + 1; i++)
H[i] = new double[nSynth + 1];
and also the same scheme for int I_i[N_a+1][N_b+1], I_j[N_a+1][N_b+1]; and so one (well, everywhere, where a two dimensional array exists). Now I'm getting the exception:
Unhandled exception at 0x00007FFF7B413C58 in Smith-Waterman.exe: Microsoft C
++ exception: std :: bad_alloc at location 0x0000008FF4F9FA50.
What is wrong here? Already debugged, and the program throws the exceptions above the for (int i = 0; i < nReal + 1; i++).
2: This code uses std::strings as parameters. Would it be also possible to create a smith waterman algortihm for cv::Mat?
For maybe more clarification, my full code looks like this:
#include "BinaryAlignment.h"
#include "WallMapping.h"
//using declarations
using namespace cv;
using namespace std;
//global variables
std::string bin;
cv::Mat temp;
std::stringstream sstrMat;
const int maxMismatch = 2;
const float mu = 0.33f;
const float delta = 1.33;
int ind;
BinaryAlignment::BinaryAlignment() { }
BinaryAlignment::~BinaryAlignment() { }
/**
*** Convert matrix to binary sequence
**/
std::string BinaryAlignment::matToBin(cv::Mat src, std::experimental::filesystem::path path) {
cv::Mat linesMat = WallMapping::wallMapping(src, path);
for (int i = 0; i < linesMat.size().height; i++) {
for (int j = 0; j < linesMat.size().width; j++) {
if (linesMat.at<Vec3b>(i, j)[0] == 0
&& linesMat.at<Vec3b>(i, j)[1] == 0
&& linesMat.at<Vec3b>(i, j)[2] == 255) {
src.at<int>(i, j) = 1;
}
else {
src.at<int>(i, j) = 0;
}
sstrMat << src.at<int>(i, j);
}
}
bin = sstrMat.str();
return bin;
}
double BinaryAlignment::similarityScore(char a, char b) {
double result;
if (a == b)
result = 1;
else
result = -mu;
return result;
}
double BinaryAlignment::findArrayMax(double array[], int length) {
double max = array[0];
ind = 0;
for (int i = 1; i < length; i++) {
if (array[i] > max) {
max = array[i];
ind = i;
}
}
return max;
}
/**
*** Smith-Waterman alignment for given sequences
**/
int BinaryAlignment::watermanAlign(std::string seqSynth, std::string seqReal, bool viableAlignment) {
const int nSynth = seqSynth.length(); //length of sequences
const int nReal = seqReal.length();
//H[nSynth + 1][nReal + 1]
double **H = new double*[nReal + 1];
for (int i = 0; i < nReal + 1; i++)
H[i] = new double[nSynth + 1];
cout << "passt";
for (int m = 0; m <= nSynth; m++)
for (int n = 0; n <= nReal; n++)
H[m][n] = 0;
double temp[4];
int **Ii = new int*[nReal + 1];
for (int i = 0; i < nReal + 1; i++)
Ii[i] = new int[nSynth + 1];
int **Ij = new int*[nReal + 1];
for (int i = 0; i < nReal + 1; i++)
Ij[i] = new int[nSynth + 1];
for (int i = 1; i <= nSynth; i++) {
for (int j = 1; j <= nReal; j++) {
temp[0] = H[i - 1][j - 1] + similarityScore(seqSynth[i - 1], seqReal[j - 1]);
temp[1] = H[i - 1][j] - delta;
temp[2] = H[i][j - 1] - delta;
temp[3] = 0;
H[i][j] = findArrayMax(temp, 4);
switch (ind) {
case 0: // score in (i,j) stems from a match/mismatch
Ii[i][j] = i - 1;
Ij[i][j] = j - 1;
break;
case 1: // score in (i,j) stems from a deletion in sequence A
Ii[i][j] = i - 1;
Ij[i][j] = j;
break;
case 2: // score in (i,j) stems from a deletion in sequence B
Ii[i][j] = i;
Ij[i][j] = j - 1;
break;
case 3: // (i,j) is the beginning of a subsequence
Ii[i][j] = i;
Ij[i][j] = j;
break;
}
}
}
//Print matrix H to console
std::cout << "**********************************************" << std::endl;
std::cout << "The scoring matrix is given by " << std::endl << std::endl;
for (int i = 1; i <= nSynth; i++) {
for (int j = 1; j <= nReal; j++) {
std::cout << H[i][j] << " ";
}
std::cout << std::endl;
}
//search H for the moaximal score
double Hmax = 0;
int imax = 0, jmax = 0;
for (int i = 1; i <= nSynth; i++) {
for (int j = 1; j <= nReal; j++) {
if (H[i][j] > Hmax) {
Hmax = H[i][j];
imax = i;
jmax = j;
}
}
}
std::cout << Hmax << endl;
std::cout << nSynth << ", " << nReal << ", " << imax << ", " << jmax << std::endl;
std::cout << "max score: " << Hmax << std::endl;
std::cout << "alignment index: " << (imax - jmax) << std::endl;
//Backtracing from Hmax
int icurrent = imax, jcurrent = jmax;
int inext = Ii[icurrent][jcurrent];
int jnext = Ij[icurrent][jcurrent];
int tick = 0;
char *consensusSynth = new char[nSynth + nReal + 2];
char *consensusReal = new char[nSynth + nReal + 2];
while (((icurrent != inext) || (jcurrent != jnext)) && (jnext >= 0) && (inext >= 0)) {
if (inext == icurrent)
consensusSynth[tick] = '-'; //deletion in A
else
consensusSynth[tick] = seqSynth[icurrent - 1]; //match / mismatch in A
if (jnext == jcurrent)
consensusReal[tick] = '-'; //deletion in B
else
consensusReal[tick] = seqReal[jcurrent - 1]; //match/mismatch in B
//fix for adding first character of the alignment.
if (inext == 0)
inext = -1;
else if (jnext == 0)
jnext = -1;
else
icurrent = inext;
jcurrent = jnext;
inext = Ii[icurrent][jcurrent];
jnext = Ij[icurrent][jcurrent];
tick++;
}
// Output of the consensus motif to the console
std::cout << std::endl << "***********************************************" << std::endl;
std::cout << "The alignment of the sequences" << std::endl << std::endl;
for (int i = 0; i < nSynth; i++) {
std::cout << seqSynth[i];
};
std::cout << " and" << std::endl;
for (int i = 0; i < nReal; i++) {
std::cout << seqReal[i];
};
std::cout << std::endl << std::endl;
std::cout << "is for the parameters mu = " << mu << " and delta = " << delta << " given by" << std::endl << std::endl;
for (int i = tick - 1; i >= 0; i--)
std::cout << consensusSynth[i];
std::cout << std::endl;
for (int j = tick - 1; j >= 0; j--)
std::cout << consensusReal[j];
std::cout << std::endl;
int numMismatches = 0;
for (int i = tick - 1; i >= 0; i--) {
if (consensusSynth[i] != consensusReal[i]) {
numMismatches++;
}
}
viableAlignment = numMismatches <= maxMismatch;
return imax - jmax;
}
Thanks!