I'm implementing an algorithm, I excuse myself for the extreme for looping, haven't found a better way yet.
The problem is that at the second iteration at line 81 it gives a First-chance exception at 0x000000007707320E (ntdll.dll) in Test.exe: 0xC0000005: Access violation reading location 0xFFFFFFFFFFFFFFFF.
void co_hog(Mat image, int offset, int blockSize, int nrBins, int cat) {
Mat img_x;
Mat img_y;
IplImage img = image;
Mat kern_x = (Mat_<char>(1, 3) << -1, 0, 1);
Mat kern_y = (Mat_<char>(3, 1) << -1, 0, 1);
filter2D(image, img_x, image.depth(), kern_x);
filter2D(image, img_y, image.depth(), kern_y);
Size imageSize = image.size();
int nrBlocksY = imageSize.height / blockSize;
int nrBlocksX = imageSize.width / blockSize;
int degreePerBin = 180 / nrBins;
Mat gradients = Mat(image.size(), CV_32FC1);
Mat magnitudes = Mat(image.size(), CV_32FC1);
for(int y = 0; y < image.rows; y++) {
for(int x = 0; x < image.cols; x++) {
float grad_x = (float)img_x.at<uchar>(y, x);
float grad_y = (float)img_y.at<uchar>(y, x);
gradients.at<float>(y, x) = abs(atan2(grad_y, grad_x) * 180 / PI);
magnitudes.at<float>(y, x) = sqrt(pow(grad_x, 2) + pow(grad_y, 2));
}
}
int bin_1, bin_2, bin_3, bin_4;
double theta_1, theta_2, theta_3, theta_4;
Mat H;
stringstream line(stringstream::in | stringstream::out);
line << cat << " ";
int index = 1;
for(int i = 0; i < nrBlocksY; i++) {
for(int j = 0; j < nrBlocksX; j++) {
Mat coOccMat = Mat::zeros(nrBins, nrBins, CV_32FC1);
for(int q = i * blockSize; q < (i * blockSize) + blockSize; q++) {
for(int p = j * blockSize; p < (j * blockSize) + blockSize; p++) {
for(int offy = -offset; offy < offset; offy++) {
for(int offx = -offset; offx < offset; offx++) {
if((q + offy) >= imageSize.height || (p + offx) >= imageSize.width || (q + offy) < 0 || (p + offx) < 0) {
continue;
}
float m_1 = magnitudes.at<float>(q, p);
float m_2 = magnitudes.at<float>(q + offy, p + offx);
float alpha = gradients.at<float>(q, p);
float beta = gradients.at<float>(q + offy, p + offx);
if(fmod(alpha / degreePerBin, 1) > 0.5) {
bin_1 = floor(alpha / degreePerBin);
bin_2 = bin_1 + 1;
} else {
bin_2 = floor(alpha / degreePerBin);
bin_1 = bin_2 - 1;
}
if(fmod(beta / degreePerBin, 1) > 0.5) {
bin_3 = floor(beta / degreePerBin);
bin_4 = bin_3 + 1;
} else {
bin_4 = floor(beta / degreePerBin);
bin_3 = bin_4 - 1;
}
theta_1 = (bin_1 * degreePerBin) + (degreePerBin / 2);
theta_2 = (bin_2 * degreePerBin) + (degreePerBin / 2);
theta_3 = (bin_3 * degreePerBin) + (degreePerBin / 2);
theta_4 = (bin_4 * degreePerBin) + (degreePerBin / 2);
coOccMat.at<float>(bin_1, bin_3) += (m_1 * (1 - (alpha - theta_1) / (theta_2 - theta_1))) + (m_2 * (1 - (beta - theta_3) / (theta_4 - theta_1)));
coOccMat.at<float>(bin_1, bin_4) += (m_1 * (1 - (alpha - theta_1) / (theta_2 - theta_1))) + (m_2 * ((beta - theta_3) / (theta_4 - theta_1)));
coOccMat.at<float>(bin_2, bin_3) += (m_1 * ((alpha - theta_1) / (theta_2 - theta_1))) + (m_2 * (1 - (beta - theta_3) / (theta_4 - theta_1)));
coOccMat.at<float>(bin_2, bin_4) += (m_1 * ((alpha - theta_1) / (theta_2 - theta_1))) + (m_2 * ((beta - theta_3) / (theta_4 - theta_1)));
}
}
}
}
cout << coOccMat << endl;
-> Next statement to be called *passes the first time* H = coOccMat.reshape(0, 1);
normalize(H, H);
cout << H.size() << endl;
for(int i = 0; i < H.cols; ++i) {
for(int j = 0; j < H.rows; ++j) {
if(H.at<float>(j, i) > 0) {
line << index << ":" << H.at<float>(j, i) << " ";
}
index++;
}
}
cout << "Done" << index << endl;
}
}
}
Problem has been fixed, sometimes the value for a bin was set on -1 so it couldn't access it, debugging tools of visual studio couldn't point out where it went wrong.
Related
Hello I have tried to entering n division number by a number or a constant.
Here is the code:
}
int main()
{
//The main problem in below
int n_temp;
std::cout << "Please enter the division number";
std::cin >> n_temp;
const unsigned int n = n_temp;
int const iter_n = 10;
double const dx = L / (n - 1);
double T[n];
double P[n], Q[n];
double kP, kE, kW, ke, kw, Sp, Sc;
double a[n], b[n], c[n], d[n];
T[0] = 300; T[n - 1] = 1000;
for (int i = 1; i < n - 1; i++) { T[i] = 500; }
std::cout << "T= ["; for (double T_i : T) { std::cout << T_i << ","; } std::cout << "]\n";
for (int iter = 0; iter < iter_n; iter++) {
a[0] = 1; b[0] = 0; c[0] = 0; d[0] = T[0];
a[n - 1] = 1; b[n - 1] = 0; c[n - 1] = 0; d[n - 1] = T[n - 1];
for (int i = 1; i < n - 1; i++) {
float x = i * dx;
Sp = Spv(T[i], x);
Sc = Scv(T[i], x);
kP = kv(T[i], x);
kE = kv(T[i + 1], x + dx);
kW = kv(T[i - 1], x - dx);
ke = 2 * kP * kE / (kP + kE);
kw = 2 * kP * kW / (kP + kW);
b[i] = ke / dx;
c[i] = kw / dx;
a[i] = b[i] + c[i] + Sp * dx;
d[i] = Sc * dx;
}
P[0] = 0;
Q[0] = d[0];
for (int i = 1; i < n; i++) {
P[i] = b[i] / (a[i] - c[i] * P[i - 1]);
Q[i] = (c[i] * Q[i - 1] + d[i]) / (a[i] - c[i] * P[i - 1]);
}
for (int i = n - 2; i > 0; i--) {
T[i] = P[i] * T[i + 1] + Q[i];
}
std::cout << "T =["; for (double T_i : T) { std::cout << T_i << ","; } std::cout << "]\n";
}
return 0;
}
Here is the revised version which is after reviewing comments. There are some errors that appear in the T. For example E2291,C3536, C2893, C2784, C2672,C2100 Thanks.
int main()
{
int n_temp;
std::cout << "Please enter the division number";
std::cin >> n_temp;
const unsigned int n = n_temp;
int const iter_n = 10;
double const dx = L / (n - 1);
double*T= new double[n];
double*P = new double[n];
double*Q= new double[n];
double kP, kE, kW, ke, kw, Sp, Sc;
double*a= new double[n];
double*b = new double[n];
double* c = new double[n];
double* d = new double[n];
T[0] = 300; T[n - 1] = 1000;
for (int i = 1; i < n - 1; i++) { T[i] = 500; }
std::cout << "T= ["; for (double T_i : T) { std::cout << T_i << ","; } std::cout << "]\n";
for (int iter = 0; iter < iter_n; iter++) {
a[0] = 1; b[0] = 0; c[0] = 0; d[0] = T[0];
a[n - 1] = 1; b[n - 1] = 0; c[n - 1] = 0; d[n - 1] = T[n - 1];
for (int i = 1; i < n - 1; i++) {
float x = i * dx;
Sp = Spv(T[i], x);
Sc = Scv(T[i], x);
kP = kv(T[i], x);
kE = kv(T[i + 1], x + dx);
kW = kv(T[i - 1], x - dx);
ke = 2 * kP * kE / (kP + kE);
kw = 2 * kP * kW / (kP + kW);
b[i] = ke / dx;
c[i] = kw / dx;
a[i] = b[i] + c[i] + Sp * dx;
d[i] = Sc * dx;
}
P[0] = 0;
Q[0] = d[0];
for (int i = 1; i < n; i++) {
P[i] = b[i] / (a[i] - c[i] * P[i - 1]);
Q[i] = (c[i] * Q[i - 1] + d[i]) / (a[i] - c[i] * P[i - 1]);
}
for (int i = n - 2; i > 0; i--) {
T[i] = P[i] * T[i + 1] + Q[i];
}
std::cout << "T =["; for (double T_i : T) { std::cout << T_i << ","; } std::cout << "]\n";
delete[]T;
}
return 0;
}
test_euclid_ask.h (only need to read 2 functions: euclid_slow, euclid_fast)
#pragma once
#include "included.h"
double
euclid_slow(int n, double* data1, double* data2, int* mask1, int* mask2, const double weight[])
{
double result = 0.0;
double totalWeight = 0;
for (int i = 0; i < n; i++) {
if (mask1[i] && mask2[i]) {
double term = data1[i] - data2[i];
result += weight[i] * term * term;
totalWeight += weight[i];
}
}
if (totalWeight==0) return 0;
return result / totalWeight;
}
double
euclid_fast(int n, double* data1, double* data2, int* mask1, int* mask2, const double weight[])
{
double result = 0.0;
double totalWeight = 0;
double subResult[4] = { 0. };
double subTweight[4] = { 0. };
double subDiff[4] = { 0. };
double subWeight[4] = { 0. };
double subMask[4] = { 0. };
int nstep4 = n - n % 4;
for (int i = 0; i < nstep4; i += 4) {
subMask[0] = mask1[i] && mask2[i];
subMask[1] = mask1[i + 1] && mask2[i + 1];
subMask[2] = mask1[i + 2] && mask2[i + 2];
subMask[3] = mask1[i + 3] && mask2[i + 3];
if (!(subMask[0] || subMask[1] || subMask[2] || subMask[3])) continue;
subDiff[0] = data1[i] - data2[i];
subDiff[1] = data1[i + 1] - data2[i + 1];
subDiff[2] = data1[i + 2] - data2[i + 2];
subDiff[3] = data1[i + 3] - data2[i + 3];
subDiff[0] *= subDiff[0];
subDiff[1] *= subDiff[1];
subDiff[2] *= subDiff[2];
subDiff[3] *= subDiff[3];
subWeight[0] = weight[i] * subMask[0];
subWeight[1] = weight[i + 1] * subMask[1];
subWeight[2] = weight[i + 2] * subMask[2];
subWeight[3] = weight[i + 3] * subMask[3];
subTweight[0] += subWeight[0];
subTweight[1] += subWeight[1];
subTweight[2] += subWeight[2];
subTweight[3] += subWeight[3];
subResult[0] += subWeight[0] * subDiff[0];
subResult[1] += subWeight[1] * subDiff[1];
subResult[2] += subWeight[2] * subDiff[2];
subResult[3] += subWeight[3] * subDiff[3];
}
for (int i = nstep4; i < n; i++) {
if (mask1[i] && mask2[i]) {
double term = data1[i] - data2[i];
result += weight[i] * term * term;
totalWeight += weight[i];
}
}
result += subResult[0] + subResult[1] + subResult[2] + subResult[3];
totalWeight += subTweight[0] + subTweight[1] + subTweight[2] + subTweight[3];
//cout << "end fast\n";
if (!totalWeight) return 0;
return result / totalWeight;
}
void test_euclid_ask()
{
const int MAXN = 10000000, MINN = 1000000;
double* data1, * data2;
int* mask1, * mask2;
double* dataPro1, * dataPro2;
int* maskPro1, * maskPro2;
double *weight, * weightPro;
//***********
data1 = new double[MAXN + MINN + 1];
data2 = new double[MAXN + MINN + 1];
mask1 = new int[MAXN + MINN + 1];
mask2 = new int[MAXN + MINN + 1];
dataPro1 = new double[MAXN + MINN + 1];
dataPro2 = new double[MAXN + MINN + 1];
maskPro1 = new int[MAXN + MINN + 1];
maskPro2 = new int[MAXN + MINN + 1];
// ******
weight = new double[MAXN + MINN + 1];
weightPro = new double[MAXN + MINN + 1];
MyTimer timer;
int n;
double guess1, guess2, tmp, total1 = 0, total2 = 0, prev1 = 0, prev2 = 0;
for (int t = 5000; t < 6000; t++) {
if (t <= 5000) n = t;
else n = MINN + rand() % (MAXN - MINN);
cout << n << "\n";
int index = 0;
for (int i = 0; i < n; i++) {
weight[i] = int64(randomed()) % 100;
data1[i] = int64(randomed()) % 100;
data2[i] = int64(randomed()) % 100;
mask1[i] = rand() % 10;
mask2[i] = rand() % 10;
}
memcpy(weightPro, weight, n * sizeof(double));
memcpy(dataPro1, data1, n * sizeof(double));
memcpy(dataPro2, data2, n * sizeof(double));
memcpy(maskPro1, mask1, n * sizeof(int));
memcpy(maskPro2, mask2, n * sizeof(int));
//****
int tmp = flush_cache(); // do something to ensure the cache does not contain test data
cout << "ignore this " << tmp << "\n";
timer.startCounter();
guess1 = euclid_slow(n, data1, data2, mask1, mask2, weight);
tmp = timer.getCounterMicro();
total1 += tmp;
cout << "time slow = " << tmp << " us\n";
timer.startCounter();
guess2 = euclid_fast(n, dataPro1, dataPro2, maskPro1, maskPro2, weightPro);
tmp = timer.getCounterMicro();
total2 += tmp;
cout << "time fast = " << tmp << " us\n";
bool ok = fabs(guess1 - guess2) <= 0.1;
if (!ok) {
cout << "error at N = " << n << "\n";
exit(-1);
}
cout << "\n";
}
cout << "slow speed = " << (total1 / 1000) << " ms\n";
cout << "fast speed = " << (total2 / 1000) << " ms\n";
}
Basically, the function computes a kind-of Euclidean distance between 2 arrays:
result = sum(weight[i] * (data1[i] - data2[i])^2)
but only in positions where both values are available (mask1[i]==0 means it's ignored, same with mask2). The normal code is in function euclid_slow.
So I tried to improve the code by processing 4 elements at once, hoping that SSE/AVX can speed this up. However, the result stays the same or slower(using g++ -O3 -march=native) or becomes 40% slower (using Visual Studio 2019 compiler, release mode (x64), -O2, AVX2 enabled). I tried both -O2 and -O3, same result.
The compiler made better optimizations than what I wrote. But how can I make it actually faster?
Edit: code to test the programs here
I'm making an image editing program in c++ using sfml and tried to add image filters using:
int clamp(int value, int min, int max)
{
if (value < min)
return min;
if (value > max)
return max;
return value;
}
void MyImage::applyKernel(std::vector<std::vector<int>> kernel)
{
int index(0), tempx(0), tempy(0);
int wr(0), wg(0), wb(0), wa(0), sum(0);
auto newPixels = new sf::Uint8[this->size_y * this->size_x * 4];
// Calculate the sum of the kernel
for (int i = 0; i < kernel.size(); i++) {
for (int j = 0; j < kernel[i].size(); j++) {
sum += kernel[i][j];
}
}
for (int y = 0; y < this->size_y; y++) {
for (int x = 0; x < this->size_x; x++) {
/*
Calculate weighted sum from kernel
*/
wr = wg = wb = wa = 0;
for (int i = 0; i < kernel.size(); i++) {
for (int j = 0; j < kernel[i].size(); j++) {
/*
Calculates the coordinates of the kernel relative to the pixel we are changing
*/
tempx = x + (j - floor(kernel[i].size() / 2));
tempy = y + (i - floor(kernel.size() / 2));
//std::cout << "kernel=(" << j << ", " << i << "), pixel=(" << x << ", " << y << ") tempPos=(" << tempx << ", " << tempy << ")\n";
/*
This code below should have the effect of mirroring the image in the case the kernel coordinate is out of bounds (along the edge of the image)
*/
tempx = (tempx < 0) ? -1 * tempx : tempx;
tempy = (tempy < 0) ? -1 * tempy : tempy;
tempx = (tempx > this->size_x) ? x - (j - floor(kernel[i].size() / 2)) : tempx;
tempy = (tempy > this->size_y) ? y - (i - floor(kernel.size() / 2)) : tempy;
if (tempx >= 0 && tempx < this->size_x && tempy >= 0 && tempy < this->size_y) {
index = (((tempy * this->size_x) - tempy) + (tempx)) * 4;
wr += kernel[i][j] * this->pixels[index];
wg += kernel[i][j] * this->pixels[index + 1];
wb += kernel[i][j] * this->pixels[index + 2];
wa += kernel[i][j] * this->pixels[index + 3];
}
}
}
if (sum) {
wr /= sum;
wg /= sum;
wb /= sum;
wa /= sum;
}
index = (((y * this->size_x) - y) + (x)) * 4;
newPixels[index] = clamp(wr, 0, 255); // Red
newPixels[index + 1] = clamp(wg, 0, 255); // Green
newPixels[index + 2] = clamp(wb, 0, 255); // Blue
newPixels[index + 3] = clamp(wa, 0, 255); // Alpha
}
}
this->pixels = newPixels;
// Copies the data from our sf::Uint8 array to the image object to be displayed => Removes the overhead of calling setPixel(x,y,color) for every pixel {As a side note setPixel() should always be avoided}|
this->im->create(this->size_x, this->size_y, this->pixels);
}
I was trying to use [-1,-1,-1], [-1,8,-1]. [-1,-1,-1] for edge detection but just ended up with a white image except for some pixels near the bottom. I've tried different images and kernels out but any that add to 0 don't work. For example if I take the edge detection kernel above and change the 8 to a 9, it gives an expected result. Is there something wrong with my idea of how convolution kernels work or is it just a bug in my code?
Thank you.
I load the vertical and horizontal gradients into the function posted here and it calculates the sums which than make up the corner response. Why do only boarder pixels get to be found, my threshold is 0 otherwise there is 0 corners on the image. For gradients I used sobel operator.
Look at the output image below.
for (int i = 0; i < width; i++)
{
for (int j = 0; j < height; j++)
{
if ((i - search_size / 2 < 0 || i + search_size / 2 > image1.rows - 1) || (j - search_size / 2 < 0 || j + search_size / 2 > image1.cols - 1)) {
continue;
}
double Ix2 = 0, Iy2 = 0, Ixy = 0;
double detM=0;
double traceM=0;
double R = 0;
for (int m = i-search_size /2; m < i + search_size /2 ; m++){
for (int n = j-search_size /2; n < j + search_size/2 ; n++){
gauss = exp(-(((i - m) * (i - m)) + ((j - n) * (j - n))) / gaus_del);
//Compute Ix^2 , Iy^2 and Ixy
Ix2 += gauss*(image1.at<float>(m, n)*image1.at<float>(m, n));
Iy2 += gauss*(image2.at<float>(m, n)*image2.at<float>(m, n));
Ixy += gauss*(image1.at<float>(m, n)*image2.at<float>(m, n));
}
}
detM = (Ix2*Iy2 - Ixy*Ixy);
traceM = Ix2*Ix2 + Iy2*Iy2;
R = detM / traceM;
//cout <<i+j<< endl;
// std::cout << "R :" << Iy2 << endl;
if (R > threshold)
{
circle(image, cv::Point2f(i, j), 3.5, cv::Scalar(255, 255, 0), 1, 5);
cout << "corner found" << endl;
}
}
}
EDIT : i am using uchars now and the result looks alot better
2
For a project I'm writing some code to compute the HoG of some images, but I'm stuck with the fact that my orientations are only between 0 ~ 90 degrees, while using the atan2 function.
I'm guessing that this problem occurs due to the filter2D function of OpenCV but I'm not sure if this is the reason or that I'm doing something else wrong:
Vector<Vector<Mat_<float>>> HoG(Mat image) {
Mat img_x;
Mat img_y;
IplImage img = image;
Mat kern_x = (Mat_<char>(1, 3) << -1, 0, 1);
Mat kern_y = (Mat_<char>(3, 1) << -1, 0, 1);
filter2D(image, img_x, image.depth(), kern_x);
filter2D(image, img_y, image.depth(), kern_y);
Vector<Vector<Mat_<float>>> histograms;
for(int y = 0; y < image.rows - size; y += size) {
Vector<Mat_<float>> temp_hist;
for(int x = 0; x < image.cols - size; x += size) {
float total_mag = 0;
Mat hist = Mat::zeros(1, 8, CV_32FC1);
for(int i = y; i < y + size; ++i) {
for(int j = x; j < x + size; ++j) {
float grad_x = (float)img_x.at<uchar>(i, j);
float grad_y = (float)img_y.at<uchar>(i, j);
double ori = myatan2(grad_x, grad_y);
float mag = sqrt(pow(grad_x, 2) + pow(grad_y, 2));
int bin = round(ori/45);
hist.at<float>(0, (bin - 1 < 0 ? 7 : bin - 1)) += - (float)(ori - ((round(ori/45) - 1) * 45.0 + 22.5)) / 45.0f;
hist.at<float>(0, bin) += -(float)(ori - ((round(ori/45) - 1) * 45.0 + 22.5)) / 45.0f;
total_mag += mag;
}
}
// Normalize the histogram
for(int i = 0; i < 8; ++i) {
hist.at<float>(0, i) = hist.at<float>(0, i) / total_mag;
}
temp_hist.push_back(hist);
}
histograms.push_back(temp_hist);
}
return histograms;
}
If you have any other tips to increase a speed-up in my code or something else that is also welcome of course.
I notice this:
float grad_x = (float)img_x.at<uchar>(i, j);
float grad_y = (float)img_y.at<uchar>(i, j);
You seem to be using uchar. Should this not be char?