I wrote paralell program based on a Strassen multiplication algorithm using pointers.
this program return the result of multiplication of two matrices that are the same size.
when the size is 256 , program fill about 1 GB of ram, and when it is 512 ram total\y become full and my windows doesn't work then I must restart.
I replace whole pointers with vectors then incredibly Ram usage decreased!.for 1024 size , just 80 MB of ram used.
I know a little about vector that is bound statically at first then if we need more space during runtime its bound dynamically.
Why pointers needed more space than vectors ?
this is my first code :
#include <iostream>
#include<cilk\cilk.h>
#include <cilk/cilk_api.h>
#include<conio.h>
#include<ctime>
#include<string>
#include<random>
#include <Windows.h>
#include <Psapi.h>
#include<vector>
using namespace std;
int ** matrix_1;
int ** matrix_2;
#define number_thread:4;
void show(string name, int n, int **show)
{
cout << " matrix " << name << " :" << endl;
for (int i = 0; i < n; i++)
{
for (int j = 0; j < n; j++)
cout << show[i][j] << " ";
cout << endl;
}
}
int ** strassen(int n, int **matrix_a, int ** matrix_b)
{
int ** A11;
int ** A12;
int ** A21;
int ** A22;
int ** B11;
int ** B12;
int ** B21;
int ** B22;
int ** result;
int **m1, **m2, **m3, ** m4, ** m5, ** m6, ** m7, ** m8;
A11 = new int*[n / 2];
A12 = new int*[n / 2];
A21 = new int*[n / 2];
A22 = new int*[n / 2];
B11 = new int*[n / 2];
B12 = new int*[n / 2];
B21 = new int*[n / 2];
B22 = new int*[n / 2];
result = new int *[n];
m1 = new int*[n / 2];
m2 = new int*[n / 2];
m3 = new int*[n / 2];
m4 = new int*[n / 2];
m5 = new int*[n / 2];
m6 = new int*[n / 2];
m7 = new int*[n / 2];
m8 = new int*[n / 2];
cilk_for(int i = 0; i < n / 2; i++)
{
//cout << " value i : " << i << endl;
A11[i] = new int[n / 2];
A12[i] = new int[n / 2];
A21[i] = new int[n / 2];
A22[i] = new int[n / 2];
B11[i] = new int[n / 2];
B12[i] = new int[n / 2];
B21[i] = new int[n / 2];
B22[i] = new int[n / 2];
m1[i] = new int[n / 2];
m2[i] = new int[n / 2];
m3[i] = new int[n / 2];
m4[i] = new int[n / 2];
m5[i] = new int[n / 2];
m6[i] = new int[n / 2];
m7[i] = new int[n / 2];
m8[i] = new int[n / 2];
}
cilk_for(int i = 0; i < n; i++) // matrix result
result[i] = new int[n];
if (n == 2)
{
result[0][0] = matrix_a[0][0] * matrix_b[0][0] + matrix_a[0][1] * matrix_b[1][0];
result[0][1] = matrix_a[0][0] * matrix_b[0][1] + matrix_a[0][1] * matrix_b[1][1];
result[1][0] = matrix_a[1][0] * matrix_b[0][0] + matrix_a[1][1] * matrix_b[1][0];
result[1][1] = matrix_a[1][0] * matrix_b[0][1] + matrix_a[1][1] * matrix_b[1][1];
return result;
}
// for (int i = 0; i < n;i++)
cilk_for(int i = 0; i < (n / 2); i++)
{
for (int j = 0; j < (n / 2); j++)
{
A11[i][j] = matrix_a[i][j];
B11[i][j] = matrix_b[i][j];
A12[i][j] = matrix_a[i][j + n / 2];
B12[i][j] = matrix_b[i][j + n / 2];
A21[i][j] = matrix_a[i + n / 2][j];
B21[i][j] = matrix_b[i + n / 2][j];
A22[i][j] = matrix_a[i + n / 2][j + n / 2];
B22[i][j] = matrix_b[i + n / 2][j + n / 2];
}
}
/*
show("A11", n / 2, A11);
show("A12", n / 2, A12);
show("A21", n / 2, A21);
show("A22", n / 2, A22);
show("B11", n / 2, B11);
show("B12", n / 2, B12);
show("B21", n / 2, B21);
show("B22", n / 2, B22);*/
// Run By eight_thread
m1 = cilk_spawn(strassen(n / 2, A11, B11));// A11B11
m2 = cilk_spawn(strassen(n / 2, A12, B21));// A12B21
m3 = cilk_spawn(strassen(n / 2, A11, B12));// A11B12
m4 = cilk_spawn(strassen(n / 2, A12, B22));// A12B22
m5 = cilk_spawn(strassen(n / 2, A21, B11));// A21B11
m6 = cilk_spawn(strassen(n / 2, A22, B21));// A22B21
m7 = cilk_spawn(strassen(n / 2, A21, B12));// A21B12
m8 = cilk_spawn(strassen(n / 2, A22, B22));// A22B22
cilk_sync;
/*
cout << "****************************\n";
cout << "*********** before add :\n";
show("m1", n / 2, m1);
show("m2", n / 2, m2);
show("m3", n / 2, m3);
show("m4", n / 2, m4);
show("m5", n / 2, m5);
show("m6", n / 2, m6);
show("m7", n / 2, m7);
show("m8", n / 2, m8);*/
cilk_for(int i = 0; i < n / 2; i++)
for (int j = 0; j < n / 2; j++)
{
m1[i][j] = m1[i][j] + m2[i][j];
m3[i][j] = m3[i][j] + m4[i][j];
m5[i][j] = m5[i][j] + m6[i][j];
m7[i][j] = m7[i][j] + m8[i][j];
}
/*cout << "after adding hello \n";
show("m1", n / 2, m1);
show("m3", n / 2, m3);
show("m5", n / 2, m5);
show("m7", n / 2, m7);*/
cilk_for(int i = 0; i < n; i++)
{
for (int j = 0; j < n; j++)
{
if (i < n / 2 && j < n / 2)
{
result[i][j] = m1[i][j];
}
else if (i < n / 2 && j >= n / 2)
{
result[i][j] = m3[i][j - n / 2];
}
else if (i >= n / 2 && j < n / 2)
{
result[i][j] = m5[i - n / 2][j];
}
else if (i >= n / 2 && j >= n / 2)
{
result[i][j] = m7[i - n / 2][j - n / 2];
}
}
}
/*
cilk_for(int i = 0; i < n / 2; i++)
{
for (int j = 0; j < n / 2; j++)
{
delete A11[i][j];
delete A12[i][j];
delete A21[i][j];
delete A22[i][j];
delete B11[i][j];
delete B12[i][j];
delete B21[i][j];
delete B22[i][j];
delete m1[i][j];
delete m2[i][j];
delete m3[i][j];
delete m4[i][j];
delete m5[i][j];
delete m6[i][j];
delete m7[i][j];
delete m8[i][j];*/
/* }
delete[] A11[i];
delete[] A12[i];
delete[] A21[i];
delete[] A22[i];
delete[] B11[i];
delete[] B12[i];
delete[] B21[i];
delete[] B22[i];
delete[] m1[i];
delete[] m2[i];
delete[] m3[i];
delete[] m4[i];
delete[] m5[i];
delete[] m6[i];
delete[] m7[i];
delete[] m8[i];
}*/
delete[] A11;
delete[] A12;
delete[] A21;
delete[] A22;
delete[] B11;
delete[] B12;
delete[] B21;
delete[] B22;
delete[] m1;
delete[] m2;
delete[] m3;
delete[] m4;
delete[] m5;
delete[] m6;
delete[] m7;
delete[] m8;
return result;
}
int main()
{
int size;
freopen("in.txt", "r", stdin);
freopen("out.txt", "w", stdout);
__cilkrts_set_param("nworkers", "4");
//cout << " please Enter the size OF ur matrix /n";
cin >> size;
matrix_1 = new int*[size];
matrix_2 = new int*[size];
if (size % 2 == 0)
{
//instialize matrix1
//cout << "matrix_1 :" << endl;
for (int i = 0; i < size; i++)
{
matrix_1[i] = new int[size];
for (int j = 0; j < size; j++)
{
matrix_1[i][j] = rand() % 3;
//cin >> matrix_1[i][j];
//cout << matrix_1[i][j] << " ";
}
//cout << endl;
}
//instialize matrix2
//cout << "matrix2_is :\n";
for (int i = 0; i < size; i++)
{
matrix_2[i] = new int[size];
for (int j = 0; j < size; j++)
{
matrix_2[i][j] = rand() % 3;
//cout << matrix_2[i][j]<<" ";
//cin >> matrix_2[i][j];
}
// cout << endl;
}
clock_t begin = clock();
matrix_2 = strassen(size, matrix_1, matrix_2);
clock_t end = clock();
double elapsed_secs = double(end - begin) / CLOCKS_PER_SEC;
cout << "*******\ntime is : " << elapsed_secs << endl;
//answer:
/* for (int i = 0; i < size; i++)
{
for (int j = 0; j < size; j++)
{
cout<< matrix_2[i][j]<<" ";
}
cout << endl;
}*/
}
else
cout << " we couldnt use strasen ";
cout << "\nTotal Virtual Memory:" << endl;
MEMORYSTATUSEX memInfo;
memInfo.dwLength = sizeof(MEMORYSTATUSEX);
GlobalMemoryStatusEx(&memInfo);
DWORDLONG totalVirtualMem = memInfo.ullTotalPageFile;
printf("%u", totalVirtualMem);
cout << "\nVirtual Memory currently used:" << endl;
// MEMORYSTATUSEX memInfo;
memInfo.dwLength = sizeof(MEMORYSTATUSEX);
GlobalMemoryStatusEx(&memInfo);
DWORDLONG virtualMemUsed = memInfo.ullTotalPageFile - memInfo.ullAvailPageFile;
printf("%u", virtualMemUsed);
cout << "\nVirtual Memory currently used by current process:" << endl;
PROCESS_MEMORY_COUNTERS_EX pmc;
GetProcessMemoryInfo(GetCurrentProcess(), (PROCESS_MEMORY_COUNTERS*)&pmc, sizeof(pmc));
SIZE_T virtualMemUsedByMe = pmc.PrivateUsage;
printf("%u", virtualMemUsedByMe);
cout << "\nPhysical Memory currently used: " << endl;
//MEMORYSTATUSEX memInfo;
memInfo.dwLength = sizeof(MEMORYSTATUSEX);
GlobalMemoryStatusEx(&memInfo);
DWORDLONG physMemUsed = memInfo.ullTotalPhys - memInfo.ullAvailPhys;
printf("%u", physMemUsed);
cout << endl;
cout << "\nPhysical Memory currently used by current process : " << endl;
// PROCESS_MEMORY_COUNTERS_EX pmc;
GetProcessMemoryInfo(GetCurrentProcess(), (PROCESS_MEMORY_COUNTERS*)&pmc, sizeof(pmc));
SIZE_T physMemUsedByMe = pmc.WorkingSetSize;
printf("%u", physMemUsedByMe);
//cout << "memory usage :"<<double(totalVirtualMem) << endl;
//_getch();
return 0;
}
I replace whole pointers array with vectors :
#include <iostream>
#include<cilk\cilk.h>
#include <cilk/cilk_api.h>
#include<conio.h>
#include<ctime>
#include<string>
#include<random>
#include <Windows.h>
#include <Psapi.h>
#include<vector>
using namespace std;
vector<vector<int> > matrix_1, matrix_2;
//int matrix_1;
//int ** matrix_2;
#define number_thread:4;
void show(string name ,int n, int **show)
{
cout << " matrix " << name<<" :" << endl;
for (int i = 0; i < n; i++)
{
for (int j = 0; j < n; j++)
cout << show[i][j] << " ";
cout << endl;
}
}
vector<vector<int>> strassen(int n, vector<vector<int>> matrix_a, vector<vector<int>> matrix_b)
{
vector<vector<int>> A11;
vector<vector<int>> A12;
vector<vector<int>> A21;
vector<vector<int>> A22;
vector<vector<int>> B11;
vector<vector<int>> B12;
vector<vector<int>> B21;
vector<vector<int>> B22;
vector<vector<int>> result;
vector<int> help;
vector<vector<int>> m1, m2, m3, m4, m5, m6, m7, m8;
help.clear();
for (int j = 0; j < n / 2; j++)
{
help.push_back(2);
}
for(int i = 0; i < n / 2; i++)
{
A11.push_back(help);
A12.push_back(help);
A21.push_back(help);
A22.push_back(help);
B11.push_back(help);
B12.push_back(help);
B21.push_back(help);
B22.push_back(help);
m1.push_back(help);
m2.push_back(help);
m3.push_back(help);
m4.push_back(help);
m5.push_back(help);
m6.push_back(help);
m7.push_back(help);
m8.push_back(help);
}
for (int j = 0; j < n / 2; j++)
help.push_back(2);
for(int i = 0; i < n; i++)
{
result.push_back(help);
}
if (n == 2)
{
result[0][0] = matrix_a[0][0] * matrix_b[0][0] + matrix_a[0][1] * matrix_b[1][0];
result[0][1] = matrix_a[0][0] * matrix_b[0][1] + matrix_a[0][1] * matrix_b[1][1];
result[1][0] = matrix_a[1][0] * matrix_b[0][0] + matrix_a[1][1] * matrix_b[1][0];
result[1][1] = matrix_a[1][0] * matrix_b[0][1] + matrix_a[1][1] * matrix_b[1][1];
return result;
}
// for (int i = 0; i < n;i++)
for(int i = 0; i < (n / 2); i++)
{
for(int j = 0; j <( n / 2); j++)
{
A11[i][j] = matrix_a[i][j];
B11[i][j] = matrix_b[i][j];
A12[i][j] = matrix_a[i][j + n / 2];
B12[i][j] = matrix_b[i][j + n / 2];
A21[i][j] = matrix_a[i + n / 2][j];
B21[i][j] = matrix_b[i + n / 2][j];
A22[i][j] = matrix_a[i + n / 2][j + n / 2];
B22[i][j] = matrix_b[i + n / 2][j + n / 2];
}
}
/*
show("A11", n / 2, A11);
show("A12", n / 2, A12);
show("A21", n / 2, A21);
show("A22", n / 2, A22);
show("B11", n / 2, B11);
show("B12", n / 2, B12);
show("B21", n / 2, B21);
show("B22", n / 2, B22);*/
// Run By eight_thread
m1 = cilk_spawn(strassen(n / 2, A11, B11));// A11B11
m2 = cilk_spawn(strassen(n / 2, A12, B21));// A12B21
m3 = cilk_spawn(strassen(n / 2, A11, B12));// A11B12
m4 = cilk_spawn(strassen(n / 2, A12, B22));// A12B22
m5 = cilk_spawn(strassen(n / 2, A21, B11));// A21B11
m6 = cilk_spawn(strassen(n / 2, A22, B21));// A22B21
m7 = cilk_spawn(strassen(n / 2, A21, B12));// A21B12
m8 = cilk_spawn(strassen(n / 2, A22, B22));// A22B22
cilk_sync;
/*
cout << "****************************\n";
cout << "*********** before add :\n";
show("m1", n / 2, m1);
show("m2", n / 2, m2);
show
("m3", n / 2, m3);
show("m4", n / 2, m4);
show("m5", n / 2, m5);
show("m6", n / 2, m6);
show("m7", n / 2, m7);
show("m8", n / 2, m8);*/
for(int i = 0; i < n / 2; i++)
for (int j = 0; j < n / 2; j++)
{
m1[i][j] = m1[i][j] + m2[i][j];
m3[i][j] = m3[i][j] + m4[i][j];
m5[i][j] = m5[i][j] + m6[i][j];
m7[i][j] = m7[i][j] + m8[i][j];
}
/*cout << "after adding hello \n";
show("m1", n / 2, m1);
show("m3", n / 2, m3);
show("m5", n / 2, m5);
show("m7", n / 2, m7);*/
for(int i = 0; i < n ; i++)
{
for(int j = 0; j < n ; j++)
{
if (i < n / 2 && j < n / 2)
{
result[i][j] = m1[i][j];
}
else if (i < n / 2 && j >= n / 2)
{
result[i][j] = m3[i][j - n / 2];
}
else if (i >= n / 2 && j < n / 2)
{
result[i][j] = m5[i - n / 2][j];
}
else if (i >= n / 2 && j >= n / 2)
{
result[i][j] = m7[i - n / 2][j - n / 2];
}
}
}
/*
cilk_for(int i = 0; i < n / 2; i++)
{
for (int j = 0; j < n / 2; j++)
{
delete A11[i][j];
delete A12[i][j];
delete A21[i][j];
delete A22[i][j];
delete B11[i][j];
delete B12[i][j];
delete B21[i][j];
delete B22[i][j];
delete m1[i][j];
delete m2[i][j];
delete m3[i][j];
delete m4[i][j];
delete m5[i][j];
delete m6[i][j];
delete m7[i][j];
delete m8[i][j];*/
/* }
delete[] A11[i];
delete[] A12[i];
delete[] A21[i];
delete[] A22[i];
delete[] B11[i];
delete[] B12[i];
delete[] B21[i];
delete[] B22[i];
delete[] m1[i];
delete[] m2[i];
delete[] m3[i];
delete[] m4[i];
delete[] m5[i];
delete[] m6[i];
delete[] m7[i];
delete[] m8[i];
}*/
/* for (int i = 0; i < n; i++)
{
for (int j = 0; j < n; j++)
{
cout << result[i][j] << " ";
}
cout << endl;
}*/
return result;
}
int main()
{
int size;
freopen("in.txt","r",stdin);
freopen("out.txt", "w", stdout);
__cilkrts_set_param("nworkers", "1");
//cout << " please Enter the size OF ur matrix /n";
cin >> size;
vector<int> inner;
if (size % 2 == 0)
{
//instialize matrix1
cout << "matrix_1 :" << endl;
for (int i = 0; i < size; i++)
{
inner.clear();
for (int j = 0; j < size; j++)
{
inner.push_back(rand()%3);
//cin >> matrix_1[i][j];
cout << inner[j]<<" ";
}
cout << endl;
matrix_1.push_back(inner);
}
//instialize matrix2
cout << "matrix2_is :\n";
inner.clear();
for (int i = 0; i < size; i++)
{
inner.clear();
//matrix_2[i] = new int[size];
for (int j = 0; j < size; j++)
{
inner.push_back(rand()%3);
cout << inner[j]<<" ";
//cin >> matrix_2[i][j];
}
cout << endl;
matrix_2.push_back(inner);
}
clock_t begin = clock();
matrix_2 = strassen(size, matrix_1, matrix_2);
clock_t end = clock();
double elapsed_secs = double(end - begin) / CLOCKS_PER_SEC;
cout << "*******\ntime is : " << elapsed_secs << endl;
//answer:
cout << "answerrr :" << endl;
for (int i = 0; i < size; i++)
{
for (int j = 0; j < size; j++)
{
cout<< matrix_2[i][j]<<" ";
}
cout << endl;
}
}
else
cout << " we couldnt use strasen ";
cout << "\nTotal Virtual Memory:" << endl;
MEMORYSTATUSEX memInfo;
memInfo.dwLength = sizeof(MEMORYSTATUSEX);
GlobalMemoryStatusEx(&memInfo);
DWORDLONG totalVirtualMem = memInfo.ullTotalPageFile;
printf("%u", totalVirtualMem);
cout << "\nVirtual Memory currently used:" << endl;
// MEMORYSTATUSEX memInfo;
memInfo.dwLength = sizeof(MEMORYSTATUSEX);
GlobalMemoryStatusEx(&memInfo);
DWORDLONG virtualMemUsed = memInfo.ullTotalPageFile - memInfo.ullAvailPageFile;
printf("%u", virtualMemUsed);
cout << "\nVirtual Memory currently used by current process:" << endl;
PROCESS_MEMORY_COUNTERS_EX pmc;
GetProcessMemoryInfo(GetCurrentProcess(), (PROCESS_MEMORY_COUNTERS*)&pmc, sizeof(pmc));
SIZE_T virtualMemUsedByMe = pmc.PrivateUsage;
printf("%u", virtualMemUsedByMe);
cout << "\nPhysical Memory currently used: " << endl;
//MEMORYSTATUSEX memInfo;
memInfo.dwLength = sizeof(MEMORYSTATUSEX);
GlobalMemoryStatusEx(&memInfo);
DWORDLONG physMemUsed = memInfo.ullTotalPhys - memInfo.ullAvailPhys;
printf("%u", physMemUsed);
cout << endl;
cout << "\nPhysical Memory currently used by current process : " << endl;
// PROCESS_MEMORY_COUNTERS_EX pmc;
GetProcessMemoryInfo(GetCurrentProcess(), (PROCESS_MEMORY_COUNTERS*)&pmc, sizeof(pmc));
SIZE_T physMemUsedByMe = pmc.WorkingSetSize;
printf("%u", physMemUsedByMe);
//cout << "memory usage :"<<double(totalVirtualMem) << endl;
//_getch();
return 0;
}
Two likely reasons come to mind:
If you allocate memory manually and don't free it correctly you create memory leaks. With raw pointers this is much more likely to happen than with vectors.
If you allocate 1000 integers in 1000 separate allocations it will take much more space than allocating a single block of 1000 integers (what vectors do). Each allocation requires some additional memory for bookkeeping.
I am going to guess this is an allocation issue. Allocation from the OS seems to be quite time consuming from what I have seen.
Just a guess but maybe the std::vector default allocator is grabbing a much larger contiguous block of memory from the OS and is drawing from that to satisfy smaller vector allocations?
This answer may provide some insight:
https://stackoverflow.com/a/29659791/3807729
I managed to reduce the time taken to run a test program simply by allocating, then deallocating a large std::vector before running the timing operations.
I am speculating that the C++ runtime system (in some implementations) may hold on to memory it has received from the OS even after it has been deallocated because grabbing small chunks from the OS each time is much more expensive.
Related
I have to create a program in c++ (standard libraries only) that detects edges with Sobel operator on a bmp photo and writes the result to the output bmp file (no need to create it, just to write into a previously created file). To do so I have to read an input bmp file and put it into a dynamic 2x2 array. Reading must be done in two different modes (picked up by an user) - first one is to read whole image into a memory in a dynamic array, second one is to read an image piece by piece (pieces with set size). I have no idea how to do a second mode, i guess it has to somehow read an exact number of lines from an input file, use a sobel on them and put them together in an output file, but i have no idea how to implement it. can you tell me how to start it?
this is some code that i've made, it's incomplete since i have to do a second mode but the first one is done (the Sobel operator code was taken from Dwayne Phillips' book about processing images in C, so it's more C-like)
#include <iostream>
#include <fstream>
using namespace std;
typedef unsigned char byte_t; //krotszy zapis
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////// BMP STRUCTS ////////////////////////////////////////////////////////////////////////////
struct BITMAPFILEHEADER {
byte_t bfType[2];
byte_t bfSize[4];
byte_t bfReserved1[2];
byte_t bfReserved2[2];
byte_t bfOffBits[4];
};
struct BITMAPINFOHEADER {
byte_t biSize[4];
byte_t biWidth[4];
byte_t biHeight[4];
byte_t biPlanes[2];
byte_t biBitCount[2];
byte_t biCompression[4];
byte_t biSizeImage[4];
byte_t biXpelsPerMeter[4];
byte_t biYpelsPerMeter[4];
byte_t biCrlUses[4];
byte_t biCrlImportant[4];
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////// SOBEL MASKS ////////////////////////////////////////////////////////////////////////////////////////
short mask_0[3][3] = { //S1
{ 1, 2, 1},
{ 0, 0, 0},
{-1, -2, -1} };
short mask_1[3][3] = { //S2
{ 2, 1, 0},
{ 1, 0, -1},
{ 0, -1, -2} };
short mask_2[3][3] = { //S3
{ 1, 0, -1},
{ 2, 0, -2},
{ 1, 0, -1} };
short mask_3[3][3] = { //S4
{ 0, -1, -2},
{ 1, 0, -1},
{ 2, 1, 0} };
short mask_4[3][3] = { //S5 (-S1)
{-1, -2, -1},
{ 0, 0, 0},
{ 1, 2, 1} };
short mask_5[3][3] = { //S6 (-S2)
{-2, -1, 0},
{-1, 0, 1},
{ 0, 1, 2} };
short mask_6[3][3] = { //S7 (-S3)
{-1, 0, 1},
{-2, 0, 2},
{-1, 0, 1} };
short mask_7[3][3] = { //S8 (-S4)
{ 0, 1, 2},
{-1, 0, 1},
{-2, -1, 0} };
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////// BASIC FUNCTIONS ///////////////////////////////////////////////////////////////////////////////////
int wordValue(byte_t* t) //value of 2 byte long variable
{
return t[0] + t[1] * 256;
}
int dwordValue(byte_t* t) //value of 4 byte long variable
{
return t[0] + t[1] * 256 + t[2] * 256 * 256 + t[3] * 256 * 256 * 256;
}
int menudisplay()
{
cout << "<------------------------------------- OPERATOR SOBELA - PROJEKT -------------------------------- >" << endl;
cout << "< Welcome at Sobel! >" << endl;
cout << "< Sobel detects edges in BMP files. >" << endl;
cout << "< It has two modes: >" << endl;
cout << "< 1. Reading whole image into a RAM >" << endl;
cout << "< 2. Reading an image piece by piece (for big files) >" << endl;
cout << "< To pick a mode, just enter 1 or 2. >" << endl;
cout << "< Then enter a name of an input file with .bmp. >" << endl;
cout << "< Later enter a name of an output value with .bmp. >" << endl;
cout << "< Have fun! >" << endl;
cout << "<----------------------- ----------------------->" << endl << endl;
cout << "Your choice: ";
}
void menuservice()
{
menudisplay();
powrot:
int a;
cin >> a;
switch (a)
{
case 1:
save_to_bmp();
break;
case 2:
/*pieces of image*/
break;
default:
cout << "Bad choice! Pick a mode once again: ";
goto powrot;
break;
}
}
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////// BMP READING //////////////////////////////////////////////////////////////////////////////////
void readBFH(ifstream& ifs, BITMAPFILEHEADER& bfh) //reading bfh
{
ifs.read((char*)&(bfh.bfType), sizeof(bfh.bfType));
ifs.read((char*)&(bfh.bfSize), sizeof(bfh.bfSize));
ifs.read((char*)&(bfh.bfReserved1), sizeof(bfh.bfReserved1));
ifs.read((char*)&(bfh.bfReserved2), sizeof(bfh.bfReserved2));
ifs.read((char*)&(bfh.bfOffBits), sizeof(bfh.bfOffBits));
}
void readBIH(ifstream& ifs, BITMAPINFOHEADER& bih) //reading bih
{
ifs.read((char*)&(bih.biSize), sizeof(bih.biSize));
ifs.read((char*)&(bih.biWidth), sizeof(bih.biWidth));
ifs.read((char*)&(bih.biHeight), sizeof(bih.biHeight));
ifs.read((char*)&(bih.biPlanes), sizeof(bih.biPlanes));
ifs.read((char*)&(bih.biBitCount), sizeof(bih.biBitCount));
ifs.read((char*)&(bih.biCompression), sizeof(bih.biCompression));
ifs.read((char*)&(bih.biSizeImage), sizeof(bih.biSizeImage));
ifs.read((char*)&(bih.biXpelsPerMeter), sizeof(bih.biXpelsPerMeter));
ifs.read((char*)&(bih.biYpelsPerMeter), sizeof(bih.biYpelsPerMeter));
ifs.read((char*)&(bih.biCrlUses), sizeof(bih.biCrlUses));
ifs.read((char*)&(bih.biCrlImportant), sizeof(bih.biCrlImportant));
}
void writeBFH(ofstream& ofs, BITMAPFILEHEADER& bfh) //zapisywanie naglowka pliku
{
ofs.write((char*)&(bfh.bfType), sizeof(bfh.bfType));
ofs.write((char*)&(bfh.bfSize), sizeof(bfh.bfSize));
ofs.write((char*)&(bfh.bfReserved1), sizeof(bfh.bfReserved1));
ofs.write((char*)&(bfh.bfReserved2), sizeof(bfh.bfReserved2));
ofs.write((char*)&(bfh.bfOffBits), sizeof(bfh.bfOffBits));
}
void writeBIH(ofstream& ofs, BITMAPINFOHEADER& bih) //writing bih
{
ofs.write((char*)&(bih.biSize), sizeof(bih.biSize));
ofs.write((char*)&(bih.biWidth), sizeof(bih.biWidth));
ofs.write((char*)&(bih.biHeight), sizeof(bih.biHeight));
ofs.write((char*)&(bih.biPlanes), sizeof(bih.biPlanes));
ofs.write((char*)&(bih.biBitCount), sizeof(bih.biBitCount));
ofs.write((char*)&(bih.biCompression), sizeof(bih.biCompression));
ofs.write((char*)&(bih.biSizeImage), sizeof(bih.biSizeImage));
ofs.write((char*)&(bih.biXpelsPerMeter), sizeof(bih.biXpelsPerMeter));
ofs.write((char*)&(bih.biYpelsPerMeter), sizeof(bih.biYpelsPerMeter));
ofs.write((char*)&(bih.biCrlUses), sizeof(bih.biCrlUses));
ofs.write((char*)&(bih.biCrlImportant), sizeof(bih.biCrlImportant));
}
void displaydata(BITMAPFILEHEADER bfh, BITMAPINFOHEADER bih) //displaying data
{
cout << "Input file data:" << endl;
cout << "<---------------------------------------->";
cout << "File length: " << dwordValue(bfh.bfSize) << endl;
cout << "Header size: " << dwordValue(bih.biSize) << endl;
cout << "Image width: " << dwordValue(bih.biWidth) << endl;
cout << "Image height: " << dwordValue(bih.biHeight) << endl;
cout << "Image size: " << dwordValue(bih.biSizeImage) << endl;
cout << "Real amount of picture's bytes: " << dwordValue(bih.biSizeImage) - dwordValue(bfh.bfOffBits) << endl;
cout << "Zeros per row: " << zerosPerRow(bfh, bih) << endl;
}
int zerosPerRow(BITMAPFILEHEADER bfh, BITMAPINFOHEADER bih)
{
int realsize = dwordValue(bih.biSizeImage) - dwordValue(bfh.bfOffBits);
int height = dwordValue(bih.biHeight);
int width = dwordValue(bih.biWidth);
int zeros = 0;
if (width % 4 != 0)
{
int bytesPerRow = width * 3;
zeros = (bytesPerRow / 4 + 1) * 4 - bytesPerRow;
}
return zeros;
}
void save_to_bmp()
{
char infile[100];
char outfile[100];
cout << "Enter input file: ";
cin >> infile;
cout << "Enter output file: ";
cin >> outfile;
ifstream ifs(infile, ios::binary); //input stream
ofstream ofs(outfile, ios::binary); //output stream
BITMAPFILEHEADER bfh;
BITMAPINFOHEADER bih;
if (!ifs.is_open() || !ofs.is_open())
{
cerr << "Opening error!";
save_to_bmp();
}
else {
readBFH(ifs, bfh);
readBIH(ifs, bih);
writeBFH(ofs, bfh);
writeBIH(ofs, bih);
displaydata(bfh, bih);
/*reading values of pixels*/
unsigned char p; //value of a single pixel
short** the_image; //input image array
the_image = new short* [dwordValue(bih.biWidth)];
for (int i = 0; i < dwordValue(bih.biWidth); i++)
{
the_image[i] = new short [dwordValue(bih.biHeight)];
}
short** out_image; //output image array
out_image = new short* [dwordValue(bih.biWidth)];
for (int i = 0; i < dwordValue(bih.biWidth); i++)
{
out_image[i] = new short[dwordValue(bih.biHeight)];
}
for (int i = 0; i < dwordValue(bih.biHeight); i++)
{
for (int j = 0; j < dwordValue(bih.biWidth); j++)
{
for (int n = 0; n < 2; n++)
{
ifs.read((char*)&p, sizeof(char));
ofs.write((char*)&p, sizeof(char));
}
the_image[i][j] = (short)p;
out_image[i][j] = (short)p;
}
}
/*zera*/
for (int i = 0; i < zerosPerRow(bfh, bih); i++)
{
ifs.read((char*)&p, sizeof(char));
ofs.write((char*)&p, sizeof(char));
}
sobel(the_image, out_image, dwordValue(bih.biWidth), dwordValue(bih.biHeight), dwordValue(bih.biBitCount));
for (int i = 0; i < dwordValue(bih.biWidth); i++)
{
delete[] the_image[i];
the_image[i] = NULL;
}
delete[] the_image;
the_image = NULL;
ifs.close();
ofs.close();
char a;
cout << "File has been read. Enter anything to exit program";
cin >> a;
system("cls");
}
}
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////// SOBEL CODE //////////////////////////////////////////////////////////////////////////////////////
short** image, ** out_image;
long rows, cols, bits_per_pixel;
int perform_convolution(short** image, short** out_image, long rows, long cols, long bits_per_pixel)
{
char response[80];
int a, b, i, is_present, j, sum;
short max, min, new_hi, new_low;
new_hi = 250;
new_low = 16;
if (bits_per_pixel == 4) {
new_hi = 10;
new_low = 3;
}
min = 0;
max = 255;
if (bits_per_pixel == 4)
max = 16;
/*cleaning an output array*/
for (i = 0; i < rows; i++)
for (j = 0; j < cols; j++)
out_image[i][j] = 0;
printf("\n ");
for (i = 1; i < rows - 1; i++) {
if ((i % 10) == 0) { printf("%4d", i); }
for (j = 1; j < cols - 1; j++) {
/*convolution*/
/*kierunek 0*/
sum = 0;
for (a = -1; a < 2; a++) {
for (b = -1; b < 2; b++) {
sum = sum + image[i + a][j + b] *
mask_0[a + 1][b + 1];
}
}
if (sum > max) sum = max;
if (sum < 0) sum = 0;
if (sum > out_image[i][j])
out_image[i][j] = sum;
/*kierunek 1*/
sum = 0;
for (a = -1; a < 2; a++) {
for (b = -1; b < 2; b++) {
sum = sum + image[i + a][j + b] * mask_1[a + 1][b + 1];
}
}
if (sum > max) sum = max;
if (sum < 0) sum = 0;
if (sum > out_image[i][j])
out_image[i][j] = sum;
/*kierunek 2*/
sum = 0;
for (a = -1; a < 2; a++) {
for (b = -1; b < 2; b++) {
sum = sum + image[i + a][j + b] * mask_2[a + 1][b + 1];
}
}
if (sum > max) sum = max;
if (sum < 0) sum = 0;
if (sum > out_image[i][j])
out_image[i][j] = sum;
/*kierunek 3*/
sum = 0;
for (a = -1; a < 2; a++) {
for (b = -1; b < 2; b++) {
sum = sum + image[i + a][j + b] * mask_3[a + 1][b + 1];
}
}
if (sum > max) sum = max;
if (sum < 0) sum = 0;
if (sum > out_image[i][j])
out_image[i][j] = sum;
/*kierunek 4*/
sum = 0;
for (a = -1; a < 2; a++) {
for (b = -1; b < 2; b++) {
sum = sum + image[i + a][j + b] * mask_4[a + 1][b + 1];
}
}
if (sum > max) sum = max;
if (sum < 0) sum = 0;
if (sum > out_image[i][j])
out_image[i][j] = sum;
/*kierunek 5*/
sum = 0;
for (a = -1; a < 2; a++) {
for (b = -1; b < 2; b++) {
sum = sum + image[i + a][j + b] * mask_5[a + 1][b + 1];
}
}
if (sum > max) sum = max;
if (sum < 0) sum = 0;
if (sum > out_image[i][j])
out_image[i][j] = sum;
/*kierunek 6*/
sum = 0;
for (a = -1; a < 2; a++) {
for (b = -1; b < 2; b++) {
sum = sum + image[i + a][j + b] * mask_6[a + 1][b + 1];
}
}
if (sum > max) sum = max;
if (sum < 0) sum = 0;
if (sum > out_image[i][j])
out_image[i][j] = sum;
/*kierunek 7*/
sum = 0;
for (a = -1; a < 2; a++) {
for (b = -1; b < 2; b++) {
sum = sum + image[i + a][j + b] * mask_7[a + 1][b + 1];
}
}
if (sum > max) sum = max;
if (sum < 0) sum = 0;
if (sum > out_image[i][j])
out_image[i][j] = sum;
}
}
return(1);
}
int fix_edges(short** im, int w, long rows, long cols)
{
int i, j;
for (i = w; i > 0; i--) {
im[i - 1][i - 1] = im[i][i];
im[i - 1][cols - (i - 1)] = im[i][cols - 1 - (i - 1)];
im[rows - (i - 1)][i - 1] = im[rows - 1 - (i - 1)][i];
im[rows - (i - 1)][cols - (i - 1)] = im[rows - 1 - (i - 1)][cols - 1 - (i - 1)];
}
for (i = 0; i < rows; i++) {
for (j = w; j > 0; j--) {
im[i][j - 1] = im[i][j];
im[i][cols - j] = im[i][cols - j - 1];
}
}
for (j = 0; j < cols; j++) {
for (i = w; i > 0; i--) {
im[i - 1][j] = im[i][j];
im[rows - i][j] = im[rows - i - 1][j];
}
}
return(1);
}
int sobel(short** the_image, short** out_image, long rows, long cols, long bits_per_pixel) //wykrywanie krawedzi operatorem sobela
{
perform_convolution(the_image, out_image, rows, cols, bits_per_pixel);
fix_edges(out_image, 1, rows, cols);
return(1);
}
int main()
{
menuservice();
return 0;
}
Hello I have tried to entering n division number by a number or a constant.
Here is the code:
}
int main()
{
//The main problem in below
int n_temp;
std::cout << "Please enter the division number";
std::cin >> n_temp;
const unsigned int n = n_temp;
int const iter_n = 10;
double const dx = L / (n - 1);
double T[n];
double P[n], Q[n];
double kP, kE, kW, ke, kw, Sp, Sc;
double a[n], b[n], c[n], d[n];
T[0] = 300; T[n - 1] = 1000;
for (int i = 1; i < n - 1; i++) { T[i] = 500; }
std::cout << "T= ["; for (double T_i : T) { std::cout << T_i << ","; } std::cout << "]\n";
for (int iter = 0; iter < iter_n; iter++) {
a[0] = 1; b[0] = 0; c[0] = 0; d[0] = T[0];
a[n - 1] = 1; b[n - 1] = 0; c[n - 1] = 0; d[n - 1] = T[n - 1];
for (int i = 1; i < n - 1; i++) {
float x = i * dx;
Sp = Spv(T[i], x);
Sc = Scv(T[i], x);
kP = kv(T[i], x);
kE = kv(T[i + 1], x + dx);
kW = kv(T[i - 1], x - dx);
ke = 2 * kP * kE / (kP + kE);
kw = 2 * kP * kW / (kP + kW);
b[i] = ke / dx;
c[i] = kw / dx;
a[i] = b[i] + c[i] + Sp * dx;
d[i] = Sc * dx;
}
P[0] = 0;
Q[0] = d[0];
for (int i = 1; i < n; i++) {
P[i] = b[i] / (a[i] - c[i] * P[i - 1]);
Q[i] = (c[i] * Q[i - 1] + d[i]) / (a[i] - c[i] * P[i - 1]);
}
for (int i = n - 2; i > 0; i--) {
T[i] = P[i] * T[i + 1] + Q[i];
}
std::cout << "T =["; for (double T_i : T) { std::cout << T_i << ","; } std::cout << "]\n";
}
return 0;
}
Here is the revised version which is after reviewing comments. There are some errors that appear in the T. For example E2291,C3536, C2893, C2784, C2672,C2100 Thanks.
int main()
{
int n_temp;
std::cout << "Please enter the division number";
std::cin >> n_temp;
const unsigned int n = n_temp;
int const iter_n = 10;
double const dx = L / (n - 1);
double*T= new double[n];
double*P = new double[n];
double*Q= new double[n];
double kP, kE, kW, ke, kw, Sp, Sc;
double*a= new double[n];
double*b = new double[n];
double* c = new double[n];
double* d = new double[n];
T[0] = 300; T[n - 1] = 1000;
for (int i = 1; i < n - 1; i++) { T[i] = 500; }
std::cout << "T= ["; for (double T_i : T) { std::cout << T_i << ","; } std::cout << "]\n";
for (int iter = 0; iter < iter_n; iter++) {
a[0] = 1; b[0] = 0; c[0] = 0; d[0] = T[0];
a[n - 1] = 1; b[n - 1] = 0; c[n - 1] = 0; d[n - 1] = T[n - 1];
for (int i = 1; i < n - 1; i++) {
float x = i * dx;
Sp = Spv(T[i], x);
Sc = Scv(T[i], x);
kP = kv(T[i], x);
kE = kv(T[i + 1], x + dx);
kW = kv(T[i - 1], x - dx);
ke = 2 * kP * kE / (kP + kE);
kw = 2 * kP * kW / (kP + kW);
b[i] = ke / dx;
c[i] = kw / dx;
a[i] = b[i] + c[i] + Sp * dx;
d[i] = Sc * dx;
}
P[0] = 0;
Q[0] = d[0];
for (int i = 1; i < n; i++) {
P[i] = b[i] / (a[i] - c[i] * P[i - 1]);
Q[i] = (c[i] * Q[i - 1] + d[i]) / (a[i] - c[i] * P[i - 1]);
}
for (int i = n - 2; i > 0; i--) {
T[i] = P[i] * T[i + 1] + Q[i];
}
std::cout << "T =["; for (double T_i : T) { std::cout << T_i << ","; } std::cout << "]\n";
delete[]T;
}
return 0;
}
test_euclid_ask.h (only need to read 2 functions: euclid_slow, euclid_fast)
#pragma once
#include "included.h"
double
euclid_slow(int n, double* data1, double* data2, int* mask1, int* mask2, const double weight[])
{
double result = 0.0;
double totalWeight = 0;
for (int i = 0; i < n; i++) {
if (mask1[i] && mask2[i]) {
double term = data1[i] - data2[i];
result += weight[i] * term * term;
totalWeight += weight[i];
}
}
if (totalWeight==0) return 0;
return result / totalWeight;
}
double
euclid_fast(int n, double* data1, double* data2, int* mask1, int* mask2, const double weight[])
{
double result = 0.0;
double totalWeight = 0;
double subResult[4] = { 0. };
double subTweight[4] = { 0. };
double subDiff[4] = { 0. };
double subWeight[4] = { 0. };
double subMask[4] = { 0. };
int nstep4 = n - n % 4;
for (int i = 0; i < nstep4; i += 4) {
subMask[0] = mask1[i] && mask2[i];
subMask[1] = mask1[i + 1] && mask2[i + 1];
subMask[2] = mask1[i + 2] && mask2[i + 2];
subMask[3] = mask1[i + 3] && mask2[i + 3];
if (!(subMask[0] || subMask[1] || subMask[2] || subMask[3])) continue;
subDiff[0] = data1[i] - data2[i];
subDiff[1] = data1[i + 1] - data2[i + 1];
subDiff[2] = data1[i + 2] - data2[i + 2];
subDiff[3] = data1[i + 3] - data2[i + 3];
subDiff[0] *= subDiff[0];
subDiff[1] *= subDiff[1];
subDiff[2] *= subDiff[2];
subDiff[3] *= subDiff[3];
subWeight[0] = weight[i] * subMask[0];
subWeight[1] = weight[i + 1] * subMask[1];
subWeight[2] = weight[i + 2] * subMask[2];
subWeight[3] = weight[i + 3] * subMask[3];
subTweight[0] += subWeight[0];
subTweight[1] += subWeight[1];
subTweight[2] += subWeight[2];
subTweight[3] += subWeight[3];
subResult[0] += subWeight[0] * subDiff[0];
subResult[1] += subWeight[1] * subDiff[1];
subResult[2] += subWeight[2] * subDiff[2];
subResult[3] += subWeight[3] * subDiff[3];
}
for (int i = nstep4; i < n; i++) {
if (mask1[i] && mask2[i]) {
double term = data1[i] - data2[i];
result += weight[i] * term * term;
totalWeight += weight[i];
}
}
result += subResult[0] + subResult[1] + subResult[2] + subResult[3];
totalWeight += subTweight[0] + subTweight[1] + subTweight[2] + subTweight[3];
//cout << "end fast\n";
if (!totalWeight) return 0;
return result / totalWeight;
}
void test_euclid_ask()
{
const int MAXN = 10000000, MINN = 1000000;
double* data1, * data2;
int* mask1, * mask2;
double* dataPro1, * dataPro2;
int* maskPro1, * maskPro2;
double *weight, * weightPro;
//***********
data1 = new double[MAXN + MINN + 1];
data2 = new double[MAXN + MINN + 1];
mask1 = new int[MAXN + MINN + 1];
mask2 = new int[MAXN + MINN + 1];
dataPro1 = new double[MAXN + MINN + 1];
dataPro2 = new double[MAXN + MINN + 1];
maskPro1 = new int[MAXN + MINN + 1];
maskPro2 = new int[MAXN + MINN + 1];
// ******
weight = new double[MAXN + MINN + 1];
weightPro = new double[MAXN + MINN + 1];
MyTimer timer;
int n;
double guess1, guess2, tmp, total1 = 0, total2 = 0, prev1 = 0, prev2 = 0;
for (int t = 5000; t < 6000; t++) {
if (t <= 5000) n = t;
else n = MINN + rand() % (MAXN - MINN);
cout << n << "\n";
int index = 0;
for (int i = 0; i < n; i++) {
weight[i] = int64(randomed()) % 100;
data1[i] = int64(randomed()) % 100;
data2[i] = int64(randomed()) % 100;
mask1[i] = rand() % 10;
mask2[i] = rand() % 10;
}
memcpy(weightPro, weight, n * sizeof(double));
memcpy(dataPro1, data1, n * sizeof(double));
memcpy(dataPro2, data2, n * sizeof(double));
memcpy(maskPro1, mask1, n * sizeof(int));
memcpy(maskPro2, mask2, n * sizeof(int));
//****
int tmp = flush_cache(); // do something to ensure the cache does not contain test data
cout << "ignore this " << tmp << "\n";
timer.startCounter();
guess1 = euclid_slow(n, data1, data2, mask1, mask2, weight);
tmp = timer.getCounterMicro();
total1 += tmp;
cout << "time slow = " << tmp << " us\n";
timer.startCounter();
guess2 = euclid_fast(n, dataPro1, dataPro2, maskPro1, maskPro2, weightPro);
tmp = timer.getCounterMicro();
total2 += tmp;
cout << "time fast = " << tmp << " us\n";
bool ok = fabs(guess1 - guess2) <= 0.1;
if (!ok) {
cout << "error at N = " << n << "\n";
exit(-1);
}
cout << "\n";
}
cout << "slow speed = " << (total1 / 1000) << " ms\n";
cout << "fast speed = " << (total2 / 1000) << " ms\n";
}
Basically, the function computes a kind-of Euclidean distance between 2 arrays:
result = sum(weight[i] * (data1[i] - data2[i])^2)
but only in positions where both values are available (mask1[i]==0 means it's ignored, same with mask2). The normal code is in function euclid_slow.
So I tried to improve the code by processing 4 elements at once, hoping that SSE/AVX can speed this up. However, the result stays the same or slower(using g++ -O3 -march=native) or becomes 40% slower (using Visual Studio 2019 compiler, release mode (x64), -O2, AVX2 enabled). I tried both -O2 and -O3, same result.
The compiler made better optimizations than what I wrote. But how can I make it actually faster?
Edit: code to test the programs here
I use a rolling weighted moving average function whose code is provided below. It is coded in C++ via Rcpp.
This function works for most times series there is no loop issues or anything like this. I provided below a times series of length 2 that sometimes triggers the fatal error.
I could not find the reason of the error.
Thanks for your help! =)
Here is the R code :
# Install packages
sourceCpp("partialMA.cpp")
spencer_weights=c( -3, -6, -5, 3, 21, 46, 67, 0, 67, 46, 21, 3, -5, -6, -3)
spencer_ma <- function(x) roll_mean(x,spencer_weights)
x=c(11.026420323685528,0.25933761651337001)
spencer_ma(x) # works
for(i in 1:1000) spencer_ma(x) # triggers the fatal error
I include the C++ code of my roll_mean function below :
#include <Rcpp.h>
using namespace Rcpp;
// [[Rcpp::export]]
NumericVector roll_mean(const NumericVector& x,
const NumericVector& w) {
int n = x.size();
int w_size = w.size();
int size = (w_size - 1) / 2;
NumericVector res(n);
int i, ind_x, ind_w;
double w_sum = Rcpp::sum(w), tmp_wsum, tmp_xwsum, tmp_w;
// beginning
for (i = 0; i < size; i++) {
tmp_xwsum = tmp_wsum = 0;
for (ind_x = i + size, ind_w = w_size - 1; ind_x >= 0; ind_x--, ind_w--) {
tmp_w = w[ind_w];
tmp_wsum += tmp_w;
tmp_xwsum += x[ind_x] * tmp_w;
}
res[i] = tmp_xwsum / tmp_wsum;
}
// middle
int lim2 = n - size;
for (; i < lim2; i++) {
tmp_xwsum = 0;
for (ind_x = i - size, ind_w = 0; ind_w < w_size; ind_x++, ind_w++) {
tmp_xwsum += x[ind_x] * w[ind_w];
}
res[i] = tmp_xwsum / w_sum;
}
// end
for (; i < n; i++) {
tmp_xwsum = tmp_wsum = 0;
for (ind_x = i - size, ind_w = 0; ind_x < n; ind_x++, ind_w++) {
tmp_w = w[ind_w];
tmp_wsum += tmp_w;
tmp_xwsum += x[ind_x] * tmp_w;
}
res[i] = tmp_xwsum / tmp_wsum;
}
return res;
}
A Wild Index Out of Bounds Error Appeared!
You can pinpoint the issue by switching element accessors from [] to (). The latter has a built in bounds check, e.g. is index between 0 and n-1.
Running the code with the built-in check gives:
Error in roll_mean(x, spencer_weights) :
Index out of bounds: [index=7; extent=2].
So, the indices being used are greatly exceeding the length of the vector. Adding a trace statement indicates its the first loop that is wrong.
#include <Rcpp.h>
// [[Rcpp::export]]
NumericVector roll_mean(const NumericVector& x,
const NumericVector& w) {
int n = x.size();
int w_size = w.size();
int size = (w_size - 1) / 2;
Rcpp::Rcout << n << ", w_size: " << w_size << ", size: " << size << std::endl;
NumericVector res(n);
int i, ind_x, ind_w;
double w_sum = Rcpp::sum(w), tmp_wsum, tmp_xwsum, tmp_w;
// beginning
for (i = 0; i < size; i++) {
tmp_xwsum = tmp_wsum = 0;
// Fix this line
for (ind_x = i + size, ind_w = w_size - 1; ind_x >= 0; ind_x--, ind_w--) {
tmp_w = w(ind_w);
Rcpp::Rcout << "Loop at: " << ind_w << std::endl;
tmp_wsum += tmp_w;
tmp_xwsum += x(ind_x) * tmp_w;
}
res(i) = tmp_xwsum / tmp_wsum;
}
Rcpp::Rcout << "success" << std::endl;
return res;
}
And that's all folks!
I'm implementing an algorithm, I excuse myself for the extreme for looping, haven't found a better way yet.
The problem is that at the second iteration at line 81 it gives a First-chance exception at 0x000000007707320E (ntdll.dll) in Test.exe: 0xC0000005: Access violation reading location 0xFFFFFFFFFFFFFFFF.
void co_hog(Mat image, int offset, int blockSize, int nrBins, int cat) {
Mat img_x;
Mat img_y;
IplImage img = image;
Mat kern_x = (Mat_<char>(1, 3) << -1, 0, 1);
Mat kern_y = (Mat_<char>(3, 1) << -1, 0, 1);
filter2D(image, img_x, image.depth(), kern_x);
filter2D(image, img_y, image.depth(), kern_y);
Size imageSize = image.size();
int nrBlocksY = imageSize.height / blockSize;
int nrBlocksX = imageSize.width / blockSize;
int degreePerBin = 180 / nrBins;
Mat gradients = Mat(image.size(), CV_32FC1);
Mat magnitudes = Mat(image.size(), CV_32FC1);
for(int y = 0; y < image.rows; y++) {
for(int x = 0; x < image.cols; x++) {
float grad_x = (float)img_x.at<uchar>(y, x);
float grad_y = (float)img_y.at<uchar>(y, x);
gradients.at<float>(y, x) = abs(atan2(grad_y, grad_x) * 180 / PI);
magnitudes.at<float>(y, x) = sqrt(pow(grad_x, 2) + pow(grad_y, 2));
}
}
int bin_1, bin_2, bin_3, bin_4;
double theta_1, theta_2, theta_3, theta_4;
Mat H;
stringstream line(stringstream::in | stringstream::out);
line << cat << " ";
int index = 1;
for(int i = 0; i < nrBlocksY; i++) {
for(int j = 0; j < nrBlocksX; j++) {
Mat coOccMat = Mat::zeros(nrBins, nrBins, CV_32FC1);
for(int q = i * blockSize; q < (i * blockSize) + blockSize; q++) {
for(int p = j * blockSize; p < (j * blockSize) + blockSize; p++) {
for(int offy = -offset; offy < offset; offy++) {
for(int offx = -offset; offx < offset; offx++) {
if((q + offy) >= imageSize.height || (p + offx) >= imageSize.width || (q + offy) < 0 || (p + offx) < 0) {
continue;
}
float m_1 = magnitudes.at<float>(q, p);
float m_2 = magnitudes.at<float>(q + offy, p + offx);
float alpha = gradients.at<float>(q, p);
float beta = gradients.at<float>(q + offy, p + offx);
if(fmod(alpha / degreePerBin, 1) > 0.5) {
bin_1 = floor(alpha / degreePerBin);
bin_2 = bin_1 + 1;
} else {
bin_2 = floor(alpha / degreePerBin);
bin_1 = bin_2 - 1;
}
if(fmod(beta / degreePerBin, 1) > 0.5) {
bin_3 = floor(beta / degreePerBin);
bin_4 = bin_3 + 1;
} else {
bin_4 = floor(beta / degreePerBin);
bin_3 = bin_4 - 1;
}
theta_1 = (bin_1 * degreePerBin) + (degreePerBin / 2);
theta_2 = (bin_2 * degreePerBin) + (degreePerBin / 2);
theta_3 = (bin_3 * degreePerBin) + (degreePerBin / 2);
theta_4 = (bin_4 * degreePerBin) + (degreePerBin / 2);
coOccMat.at<float>(bin_1, bin_3) += (m_1 * (1 - (alpha - theta_1) / (theta_2 - theta_1))) + (m_2 * (1 - (beta - theta_3) / (theta_4 - theta_1)));
coOccMat.at<float>(bin_1, bin_4) += (m_1 * (1 - (alpha - theta_1) / (theta_2 - theta_1))) + (m_2 * ((beta - theta_3) / (theta_4 - theta_1)));
coOccMat.at<float>(bin_2, bin_3) += (m_1 * ((alpha - theta_1) / (theta_2 - theta_1))) + (m_2 * (1 - (beta - theta_3) / (theta_4 - theta_1)));
coOccMat.at<float>(bin_2, bin_4) += (m_1 * ((alpha - theta_1) / (theta_2 - theta_1))) + (m_2 * ((beta - theta_3) / (theta_4 - theta_1)));
}
}
}
}
cout << coOccMat << endl;
-> Next statement to be called *passes the first time* H = coOccMat.reshape(0, 1);
normalize(H, H);
cout << H.size() << endl;
for(int i = 0; i < H.cols; ++i) {
for(int j = 0; j < H.rows; ++j) {
if(H.at<float>(j, i) > 0) {
line << index << ":" << H.at<float>(j, i) << " ";
}
index++;
}
}
cout << "Done" << index << endl;
}
}
}
Problem has been fixed, sometimes the value for a bin was set on -1 so it couldn't access it, debugging tools of visual studio couldn't point out where it went wrong.