By the OpenCV library, I want to threshold an image like this:
threshold(image, thresh, 220, 255, THRESH_BINARY_INV)
But I want to automatically find the threshold value (220).
I use Otsu to estimate the threshold. But it doesn't work in my case.
therefore, I should use Histogram Peak Technique. I want to find the two peaks in the histogram corresponding to the background and object of the image. It sets the threshold value automatically halfway between the two peaks.
I use this book (pages: 117 and 496-505): "Image Processing in C" by Dwayne Phillips (http://homepages.inf.ed.ac.uk/rbf/BOOKS/PHILLIPS/). And I use source code for find the two peaks in the histogram corresponding to the background and object of the image. this is my image:
this is my c++ code:
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/opencv.hpp>
#include <iostream>
#include <stdio.h>
#include <fstream>
using namespace std;
using namespace cv;
int main()
{
Mat image0 = imread("C:/Users/Alireza/Desktop/contrast950318/2.bmp");
imshow("image0", image0);
Mat image, thresh, Tafrigh;
cvtColor(image0, image, CV_RGB2GRAY);
int N = image.rows*image.cols;
int histogram[256];
for (int i = 0; i < 256; i++) {
histogram[i] = 0;
}
//create histo
for (int i = 0; i < image.rows; i++){
for (int j = 0; j < image.cols; j++){
histogram[((int)image.at<uchar>(i, j))]++;
}
}
int peak1, peak2;
#define PEAKS 30
int distance[PEAKS], peaks[PEAKS][2];
int i, j = 0, max = 0, max_place = 0;
for (int i = 0; i<PEAKS; i++){
distance[i] = 0;
peaks[i][0] = -1;
peaks[i][1] = -1;
}
for (i = 0; i <= 255; i++){
max = histogram[i];
max_place = i;
//insert_into_peaks(peaks, max, max_place);
//int max, max_place, peaks[PEAKS][2];
//int i, j;
/* first case */
if (max > peaks[0][0]){
for (i = PEAKS - 1; i > 0; i--){
peaks[i][0] = peaks[i - 1][0];
peaks[i][1] = peaks[i - 1][1];
}
peaks[0][0] = max;
peaks[0][1] = max_place;
} /* ends if */
/* middle cases */
for (j = 0; j < PEAKS - 3; j++){
if (max < peaks[j][0] && max > peaks[j + 1][0]){
for (i = PEAKS - 1; i > j + 1; i--){
peaks[i][0] = peaks[i - 1][0];
peaks[i][1] = peaks[i - 1][1];
}
peaks[j + 1][0] = max;
peaks[j + 1][1] = max_place;
} /* ends if */
} /* ends loop over j */
/* last case */
if (max < peaks[PEAKS - 2][0] &&
max > peaks[PEAKS - 1][0]){
peaks[PEAKS - 1][0] = max;
peaks[PEAKS - 1][1] = max_place;
} /* ends if */
}/* ends loop over i */
for (int i = 1; i<PEAKS; i++){
distance[i] = peaks[0][1] - peaks[i][1];
if (distance[i] < 0)
distance[i] = distance[i] * (-1);
}
peak1 = peaks[0][1];
cout << " peak1= " << peak1;
for (int i = PEAKS - 1; i > 0; i--){
if (distance[i] > 1)
peak2 = peaks[i][1];
}
cout << " peak2= " << peak2;
int mid_point;
//int peak1, peak2;
short hi, low;
unsigned long sum1 = 0, sum2 = 0;
if (peak1 > peak2)
mid_point = ((peak1 - peak2) / 2) + peak2;
if (peak1 < peak2)
mid_point = ((peak2 - peak1) / 2) + peak1;
for (int i = 0; i<mid_point; i++)
sum1 = sum1 + histogram[i];
for (int i = mid_point; i <= 255; i++)
sum2 = sum2 + histogram[i];
if (sum1 >= sum2){
low = mid_point;
hi = 255;
}
else{
low = 0;
hi = mid_point;
}
cout << " low= " << low << " hi= " << hi;
double threshnum = 0.5* (low + hi);
threshold(image, thresh, threshnum, hi, THRESH_BINARY_INV);
waitKey(0);
return 0;
}
But I don't know this code correct is or not. If it correct, why is threshold value 202?
What ideas on how to solve this task would you suggest? Or on what resource on the internet can I find help?
You can use also the Max Entropy. In some cases using only the high frequency of the entropy could be better
int maxentropie(const cv::Mat1b& src)
{
// Histogram
cv::Mat1d hist(1, 256, 0.0);
for (int r=0; r<src.rows; ++r)
for (int c=0; c<src.cols; ++c)
hist(src(r,c))++;
// Normalize
hist /= double(src.rows * src.cols);
// Cumulative histogram
cv::Mat1d cumhist(1, 256, 0.0);
float sum = 0;
for (int i = 0; i < 256; ++i)
{
sum += hist(i);
cumhist(i) = sum;
}
cv::Mat1d hl(1, 256, 0.0);
cv::Mat1d hh(1, 256, 0.0);
for (int t = 0; t < 256; ++t)
{
// low range entropy
double cl = cumhist(t);
if (cl > 0)
{
for (int i = 0; i <= t; ++i)
{
if (hist(i) > 0)
{
hl(t) = hl(t) - (hist(i) / cl) * log(hist(i) / cl);
}
}
}
// high range entropy
double ch = 1.0 - cl; // constraint cl + ch = 1
if (ch > 0)
{
for (int i = t+1; i < 256; ++i)
{
if (hist(i) > 0)
{
hh(t) = hh(t) - (hist(i) / ch) * log(hist(i) / ch);
}
}
}
}
// choose best threshold
cv::Mat1d entropie(1, 256, 0.0);
double h_max = hl(0) + hh(0);
int threshold = 0;
entropie(0) = h_max;
for (int t = 1; t < 256; ++t)
{
entropie(t) = hl(t) + hh(t);
if (entropie(t) > h_max)
{
h_max = entropie(t);
threshold = uchar(t);
}
}
if(threshold==0) threshold=255;
return threshold;
}
Related
I've been implementing NN recently based on http://neuralnetworksanddeeplearning.com/. I've made whole algorithm for backprop and SGD almost the same way as author of this book. The problem is that while he gets accuracy around 90 % after one epoch i get 30% after 5 epochs even though i have the same hiperparameters. Do you have any idea what might be the cause ?
Here s my respository.
https://github.com/PiPower/Deep-Neural-Network
Here is part with algorithm for backprop and SGD implemented in Network.cpp:
void Network::Train(MatrixD_Array& TrainingData, MatrixD_Array& TrainingLabels, int BatchSize,int epochs, double LearningRate)
{
assert(TrainingData.size() == TrainingLabels.size() && CostFunc != nullptr && CostFuncDer != nullptr && LearningRate > 0);
std::vector<long unsigned int > indexes;
for (int i = 0; i < TrainingData.size(); i++) indexes.push_back(i);
std::random_device rd;
std::mt19937 g(rd());
std::vector<Matrix<double>> NablaWeights;
std::vector<Matrix<double>> NablaBiases;
NablaWeights.resize(Layers.size());
NablaBiases.resize(Layers.size());
for (int i = 0; i < Layers.size(); i++)
{
NablaWeights[i] = Matrix<double>(Layers[i].GetInDim(), Layers[i].GetOutDim());
NablaBiases[i] = Matrix<double>(1, Layers[i].GetOutDim());
}
//---- Epoch iterating
for (int i = 0; i < epochs; i++)
{
cout << "Epoch number: " << i << endl;
shuffle(indexes.begin(), indexes.end(), g);
// Batch iterating
for (int batch = 0; batch < TrainingData.size(); batch = batch + BatchSize)
{
for (int i = 0; i < Layers.size(); i++)
{
NablaWeights[i].Clear();
NablaBiases[i].Clear();
}
int i = 0;
while( i < BatchSize && (i+batch)< TrainingData.size())
{
std::vector<Matrix<double>> ActivationOutput;
std::vector<Matrix<double>> Z_Output;
ActivationOutput.resize(Layers.size() + 1);
Z_Output.resize(Layers.size());
ActivationOutput[0] = TrainingData[indexes[i + batch]];
int index = 0;
// Pushing values through
for (auto layer : Layers)
{
Z_Output[index] = layer.Mul(ActivationOutput[index]);
ActivationOutput[index + 1] = layer.ApplyActivation(Z_Output[index]);
index++;
}
// ---- Calculating Nabla that will be later devided by batch size element wise
auto DeltaNabla = BackPropagation(ActivationOutput, Z_Output, TrainingLabels[indexes[i + batch]]);
for (int i = 0; i < Layers.size(); i++)
{
NablaWeights[i] = NablaWeights[i] + DeltaNabla.first[i];
NablaBiases[i] = NablaBiases[i] + DeltaNabla.second[i];
}
i++;
}
for (int g = 0; g < Layers.size(); g++)
{
Layers[g].Weights = Layers[g].Weights - NablaWeights[g] * LearningRate;
Layers[g].Biases = Layers[g].Biases - NablaBiases[g] * LearningRate;
}
// std::transform(NablaWeights.begin(), NablaWeights.end(), NablaWeights.begin(),[BatchSize, LearningRate](Matrix<double>& Weight) {return Weight * (LearningRate / BatchSize);});
//std::transform(NablaBiases.begin(), NablaBiases.end(), NablaBiases.begin(), [BatchSize, LearningRate](Matrix<double>& Bias) {return Bias * (LearningRate / BatchSize); });
}
}
}
std::pair<MatrixD_Array, MatrixD_Array> Network::BackPropagation( MatrixD_Array& ActivationOutput, MatrixD_Array& Z_Output,Matrix<double>& label)
{
MatrixD_Array NablaWeight;
MatrixD_Array NablaBias;
NablaWeight.resize(Layers.size());
NablaBias.resize(Layers.size());
auto zs = Layers[Layers.size() - 1].ActivationPrime(Z_Output[Z_Output.size() - 1]);
Matrix<double> Delta_L = Hadamard(CostFuncDer(ActivationOutput[ActivationOutput.size() - 1],label), zs);
NablaWeight[Layers.size() - 1] = Delta_L * ActivationOutput[ActivationOutput.size() - 2].Transpose();
NablaBias[Layers.size() - 1] = Delta_L;
for (int j = 2; j <= Layers.size() ; j++)
{
auto sp = Layers[Layers.size() - j].ActivationPrime(Z_Output[Layers.size() -j]);
Delta_L = Hadamard(Layers[Layers.size() - j+1 ].Weights.Transpose() * Delta_L, sp);
NablaWeight[Layers.size() - j] = Delta_L * ActivationOutput[ActivationOutput.size() -j-1].Transpose();
NablaBias[Layers.size() - j] = Delta_L;
}
return make_pair(NablaWeight, NablaBias);
}
It turned out that mnist loader didnt work correctly.
I am trying to implement a sobel operator in both horizontal and vertical direction. But somehow I am getting the reverse output. The code I have attached below. For the horizontal mask
char mask [3][3]= {{-1,-2,-1},{0,0,0},{1,2,1}};
void masking(Mat image){
Mat temImage= image.clone();
for (int i = 1; i < image.rows-1; i++)
{
for (int j = 1; j < image.cols-1; j++)
{
for(int k=0;k<3;k++)
{
int pixel1 = image.at<Vec3b>(i-1,j-1)[k] * -1;
int pixel2 = image.at<Vec3b>(i,j-1)[k] * -2;
int pixel3 = image.at<Vec3b>(i+1,j-1)[k] * -1;
int pixel4 = image.at<Vec3b>(i-1,j)[k] * 0;
int pixel5 = image.at<Vec3b>(i,j)[k] * 0;
int pixel6 = image.at<Vec3b>(i+1,j)[k] * 0;
int pixel7 = image.at<Vec3b>(i-1,j+1)[k] * 1;
int pixel8 = image.at<Vec3b>(i,j+1)[k] * 2;
int pixel9 = image.at<Vec3b>(i+1,j+1)[k] * 1;
int sum = pixel1 + pixel2 + pixel3 + pixel4 + pixel5 + pixel6 + pixel7 + pixel8 + pixel9;
if(sum < 0)
{
sum = 0;
}
if(sum > 255)
sum = 255;
temImage.at<Vec3b>(i,j)[k] = sum;
}
}
}
//printf("conter = %d",counter);
imshow( "Display", temImage );
imwrite("output1.png",temImage);
}
I am getting the output as
where as for the vertical mask
char mask [3][3]= {{-1,0,1},{-2,0,2},{-1,0,1}};
void masking(Mat image){
Mat temImage= image.clone();
for (int i = 1; i < image.rows-1; i++)
{
for (int j = 1; j < image.cols-1; j++)
{
for(int k=0;k<3;k++)
{
int pixel1 = image.at<Vec3b>(i-1,j-1)[k] * -1;
int pixel2 = image.at<Vec3b>(i,j-1)[k] * 0;
int pixel3 = image.at<Vec3b>(i+1,j-1)[k] * 1;
int pixel4 = image.at<Vec3b>(i-1,j)[k] * -2;
int pixel5 = image.at<Vec3b>(i,j)[k] * 0;
int pixel6 = image.at<Vec3b>(i+1,j)[k] * 2;
int pixel7 = image.at<Vec3b>(i-1,j+1)[k] * -1;
int pixel8 = image.at<Vec3b>(i,j+1)[k] * 0;
int pixel9 = image.at<Vec3b>(i+1,j+1)[k] * 1;
int sum = pixel1 + pixel2 + pixel3 + pixel4 + pixel5 + pixel6 + pixel7 + pixel8 + pixel9;
if(sum < 0)
{
sum = 0;
}
if(sum > 255)
sum = 255;
temImage.at<Vec3b>(i,j)[k] = sum;
}
}
}
//printf("conter = %d",counter);
imshow( "Display", temImage );
imwrite("output1.png",temImage);
}
I am getting output as
The main function is attached below
int main( int argc, char** argv ){
Mat input_image = imread("sobel1.jpg",1);
masking(input_image);
waitKey(0);
return 0;
}
According the the guide https://www.tutorialspoint.com/dip/sobel_operator.htm I should get reverse output. Can anyone help me in this
The original image is
No, the tutorial is not wrong, it talks about masks and not gradients. The weak point of that tutorial is that it doesn't mention we are calculating horizontal gradients using what they call the vertical mask.
Here is my code for creating the hough accumulator for lines in image :
void hough_lines_acc(cv::Mat img_a_edges, std::vector<std::vector<int> > &hough_acc) {
for (size_t r = 0; r < img_a_edges.rows; r++) {
for (size_t c = 0; c < img_a_edges.cols; c++) {
int theta = static_cast<int> (std::atan2(r, c) * 180 / M_PI);
int rho = static_cast<int> ((c * cos(theta)) + (r * sin(theta)));
if (theta < -90) theta = -90;
if (theta > 89) theta = 89;
++hough_acc[abs(rho)][theta];
}
}
cv::Mat img_mat(hough_acc.size(), hough_acc[0].size(), CV_8U);
std::cout << hough_acc.size() << " " << hough_acc[0].size() << std::endl;
for (size_t i = 0; i < hough_acc.size(); i++) {
for (size_t j = 0; j < hough_acc[0].size(); j++) {
img_mat.at<int> (i,j) = hough_acc[i][j];
}
}
imwrite("../output/ps1-2-b-1.png", img_mat);
}
theta varies from -90 to 89. I am getting negative rho values. Right now I am just replacing the negative who with a positive one but am not getting a correct answer. What do I do to the negative rho? Please explain the answer.
theta = arctan (y / x)
rho = x * cos(theta) + y * sin(theta)
Edited code :
bool hough_lines_acc(cv::Mat img_a_edges, std::vector<std::vector<int> > &hough_acc,\
std::vector<double> thetas, std::vector<double> rhos, int rho_resolution, int theta_resolution) {
int img_w = img_a_edges.cols;
int img_h = img_a_edges.rows;
int max_votes = 0;
int min_votes = INT_MAX;
for (size_t r = 0; r < img_h; r++) {
for (size_t c = 0; c < img_w; c++) {
if(img_a_edges.at<int>(r, c) == 255) {
for (size_t i = 0; i < thetas.size(); i++) {
thetas[i] = (thetas[i] * M_PI / 180);
double rho = ( (c * cos(thetas[i])) + (r * sin(thetas[i])) );
int buff = ++hough_acc[static_cast<int>(abs(rho))][static_cast<int>(i)];
if (buff > max_votes) {
max_votes = buff;
}
if (buff < min_votes) {
min_votes = buff;
}
}
}
}
}
double div = static_cast<double>(max_votes) / 255;
int threshold = 10;
int possible_edge = round(static_cast<double>(max_votes) / div) - threshold;
props({
{"max votes", max_votes},
{"min votes", min_votes},
{"scale", div}
});
// needed for scaling intensity for contrast
// not sure if I am doing it correctly
for (size_t r = 0; r < hough_acc.size(); r++) {
for (size_t c = 0; c < hough_acc[0].size(); c++) {
double val = hough_acc[r][c] / div;
if (val < 0) {
val = 0;
}
hough_acc[r][c] = static_cast<int>(val);
}
}
cv::Mat img_mat = cv::Mat(hough_acc.size(), hough_acc[0].size(), CV_8UC1, cv::Scalar(0));
for (size_t i = 0; i < hough_acc.size(); i++) {
for (size_t j = 0; j < hough_acc[0].size(); j++) {
img_mat.at<uint8_t> (i,j) = static_cast<uint8_t>(hough_acc[i][j]);
}
}
imwrite("../output/ps1-2-b-1.png", img_mat);
return true;
}
Still not correct output. What is the error here?
atan2 of two positive numbers... should not be giving you negative angles, it should only be giving you a range of 0-90
also for the hough transform, I think you want everything relative to one point (ie 0,0 in this case). I think for that you would actually want to make theta=90-atan2(r,c)
Admittedly though, I am a bit confused as I thought you had to encode line direction, rather than just "edge pt". ie I thought at each edge point you had to provide a discrete array of guessed line trajectories and calculate rho and theta for each one and throw all of those into your accumulator. As is... I am not sure what you are calculating.
I am trying to make an alphatrimmed filter in openCV library. My code is not working properly and the resultant image is not looking as image after filtering.
The filter should work in the following way.
Chossing some (array) of pixels in my example it is 9 pixels '3x3' window.
Ordering them in increasing way.
Cutting our 'array' both sides for alpha-2.
calculating arithmetic mean of remaining pixels and inserting them in proper place.
int alphatrimmed(Mat img, int alpha)
{
Mat img9 = img.clone();
const int start = alpha/2 ;
const int end = 9 - (alpha/2);
//going through whole image
for (int i = 1; i < img.rows - 1; i++)
{
for (int j = 1; j < img.cols - 1; j++)
{
uchar element[9];
Vec3b element3[9];
int k = 0;
int a = 0;
//selecting elements for window 3x3
for (int m = i -1; m < i + 2; m++)
{
for (int n = j - 1; n < j + 2; n++)
{
element3[a] = img.at<Vec3b>(m*img.cols + n);
a++;
for (int c = 0; c < img.channels(); c++)
{
element[k] += img.at<Vec3b>(m*img.cols + n)[c];
}
k++;
}
}
//comparing and sorting elements in window (uchar element [9])
for (int b = 0; b < end; b++)
{
int min = b;
for (int d = b + 1; d < 9; d++)
{
if (element[d] < element[min])
{
min = d;
const uchar temp = element[b];
element[b] = element[min];
element[min] = temp;
const Vec3b temporary = element3[b];
element3[b] = element3[min];
element3[min] = temporary;
}
}
}
// index in resultant image( after alpha-trimmed filter)
int result = (i - 1) * (img.cols - 2) + j - 1;
for (int l = start ; l < end; l++)
img9.at<Vec3b>(result) += element3[l];
img9.at<Vec3b>(result) /= (9 - alpha);
}
}
namedWindow("AlphaTrimmed Filter", WINDOW_AUTOSIZE);
imshow("AlphaTrimmed Filter", img9);
return 0;
}
Without actual data, it's somewhat of a guess, but an uchar can't hold the sum of 3 channels. It works modulo 256 (at least on any platform OpenCV supports).
The proper solution is std::sort with a proper comparator for your Vec3b :
void L1(Vec3b a, Vec3b b) { return a[0]+a[1]+a[2] < b[0]+b[1]+b[2]; }
Here is my following code.
double calculateImageEntropy(Mat image, int sizeReduction)
{
// Manually calculating the histogram.
long double imageHistogram[256];
fill_n(imageHistogram, 256, 0);
int tempPixelValue = 0;
long double grayTotal = ((image.cols - sizeReduction) * (image.rows - sizeReduction));
for (int a = 0 + sizeReduction; a < image.cols - sizeReduction; a++)
{
for (int b = 0 + sizeReduction; b < image.rows - sizeReduction; b++)
{
tempPixelValue = image.at<uchar>(b, a);
imageHistogram[tempPixelValue] = (imageHistogram[tempPixelValue] + (1 / grayTotal));
}
}
/*long double normHist[256];
fill_n(normHist, 255, 0);
normalize(imageHistogram, normHist, 0, image.rows, NORM_MINMAX, -1, Mat());
*/
long double total = 0;
for (int i = 0; i < 255; i++)
{
total = total + imageHistogram[i];
}
cout << total << "\n\n";
long double tempValue = 0;
long double tempEntropy = 0;
long double cumulativeEntropy = 0;
long double finalEntropy = 0;
for (int y = 0; y < 255; y++)
{
tempValue = imageHistogram[y];
if (tempValue != 0)
{
tempEntropy = tempValue * log2(tempValue);
cumulativeEntropy = cumulativeEntropy + tempEntropy;
}
else;
}
finalEntropy = cumulativeEntropy * (-1);
return finalEntropy;
}
I'm using OpenCV to load images and trying to calculate the entropy of an image. I want to normalize the images so that way bigger images won't have a bigger entropy because of size.
But I can't find why my normalised histogram doesn't equal to 1.
Does anyone have any ideas?
Thanks