I'm beginner in NNs. I'm trying to create a NN for XOR function but it's not learning, it's stuck at 50%
Can anyone give me some advice? Thanks.
Here's the code:
/// Matrix.cpp
#include "pch.h"
#include "Matrix.h"
....
Matrix Matrix::sigmoidDerivate(const Matrix &m) {
assert(m.rows >= 1 && m.cols >= 1);
Matrix tmp(m.rows, m.cols);
for (ushort i = 0; i < tmp.rows; i++) {
for (ushort j = 0; j < tmp.cols; j++) {
tmp.mat[i][j] = m.mat[i][j]*(1-m.mat[i][j]);
}
}
return tmp;
}
Matrix Matrix::sigmoid(const Matrix &m) {
assert(m.rows >= 1 && m.cols >= 1);
Matrix tmp(m.rows, m.cols);
for (ushort i = 0; i < tmp.rows; i++) {
for (ushort j = 0; j < tmp.cols; j++) {
tmp.mat[i][j]= 1 / (1 + exp(-m.mat[i][j]));
}
}
return tmp;
}
Matrix Matrix::randomMatrix(ushort rows, ushort cols) {
assert(rows>=1 && cols>=1);
Matrix tmp(rows,cols);
const int range_from = -3;
const int range_to = 3;
std::random_device rand_dev;
std::mt19937 generator(rand_dev());
std::uniform_real_distribution<double> distr(range_from, range_to);
for (ushort i = 0; i < rows; i++) {
for (ushort j = 0; j < cols; j++) {
tmp.mat[i][j] = distr(generator);
}
}
return tmp;
}
And this is main () :
vector<vector<double>> in = {
{0,0},
{1,0},
{0,1},
{1,1}
};
vector<double> out = { 0,1,1,0 };
const ushort inputNeurons = 2;
const ushort hiddenNeurons = 3;
const ushort outputNeurons = 1;
const double learningRate = 0.03;
Matrix w_0_1 = Matrix::randomMatrix(inputNeurons, hiddenNeurons);
Matrix w_1_2 = Matrix::randomMatrix(hiddenNeurons, outputNeurons);
unsigned int epochs = 100000;
for (int i = 0; i < epochs; i++) {
for (int j = 0; j < in.size(); j++) {
Matrix Layer_0 = Matrix::createRowMatrix(in[j]);
Matrix desired_output = Matrix::createRowMatrix({ out[j] });
Matrix Layer_1 = Matrix::sigmoid(Matrix::multiply(Layer_0, w_0_1));
Matrix Layer_2 = Matrix::sigmoid(Matrix::multiply(Layer_1, w_1_2));
Matrix error = Matrix::POW2(Matrix::substract(Layer_2, desired_output));
//backprop
Matrix Layer_2_delta = Matrix::elementWiseMultiply(
Matrix::substract(Layer_2, desired_output),
Matrix::sigmoidDerivate(Layer_2)
);
Matrix Layer_1_delta = Matrix::elementWiseMultiply(
Matrix::multiply(Layer_2_delta, Matrix::transpose(w_1_2)),
Matrix::sigmoidDerivate(Layer_1)
);
Matrix w_1_2_delta = Matrix::multiply(Matrix::transpose(Layer_1), Layer_2_delta);
Matrix w_0_1_delta = Matrix::multiply(Matrix::transpose(Layer_0), Layer_1_delta);
//updating weights
w_0_1 = Matrix::multiply(w_0_1_delta, learningRate);
w_1_2 = Matrix::multiply(w_1_2_delta, learningRate);
}
}
NN architecture is : 2 ->3 ->1
In hidden layer if number is small, like 2-4, the output is 50%. and for 8 neurons on hidden layer ..output becomes around 49%.
Some help please.
I'm not that into c++ so I'm not sure. But in the line:
Matrix::substract(Layer_2, desired_output),
You are doing something like subtracting the desired "good" output from the existing Layer. In my opinion that should be the other way round. So you have to multiply it by -1
For me it's working like that. If you like so I can send you my source code. (it's java)
Related
I've been implementing NN recently based on http://neuralnetworksanddeeplearning.com/. I've made whole algorithm for backprop and SGD almost the same way as author of this book. The problem is that while he gets accuracy around 90 % after one epoch i get 30% after 5 epochs even though i have the same hiperparameters. Do you have any idea what might be the cause ?
Here s my respository.
https://github.com/PiPower/Deep-Neural-Network
Here is part with algorithm for backprop and SGD implemented in Network.cpp:
void Network::Train(MatrixD_Array& TrainingData, MatrixD_Array& TrainingLabels, int BatchSize,int epochs, double LearningRate)
{
assert(TrainingData.size() == TrainingLabels.size() && CostFunc != nullptr && CostFuncDer != nullptr && LearningRate > 0);
std::vector<long unsigned int > indexes;
for (int i = 0; i < TrainingData.size(); i++) indexes.push_back(i);
std::random_device rd;
std::mt19937 g(rd());
std::vector<Matrix<double>> NablaWeights;
std::vector<Matrix<double>> NablaBiases;
NablaWeights.resize(Layers.size());
NablaBiases.resize(Layers.size());
for (int i = 0; i < Layers.size(); i++)
{
NablaWeights[i] = Matrix<double>(Layers[i].GetInDim(), Layers[i].GetOutDim());
NablaBiases[i] = Matrix<double>(1, Layers[i].GetOutDim());
}
//---- Epoch iterating
for (int i = 0; i < epochs; i++)
{
cout << "Epoch number: " << i << endl;
shuffle(indexes.begin(), indexes.end(), g);
// Batch iterating
for (int batch = 0; batch < TrainingData.size(); batch = batch + BatchSize)
{
for (int i = 0; i < Layers.size(); i++)
{
NablaWeights[i].Clear();
NablaBiases[i].Clear();
}
int i = 0;
while( i < BatchSize && (i+batch)< TrainingData.size())
{
std::vector<Matrix<double>> ActivationOutput;
std::vector<Matrix<double>> Z_Output;
ActivationOutput.resize(Layers.size() + 1);
Z_Output.resize(Layers.size());
ActivationOutput[0] = TrainingData[indexes[i + batch]];
int index = 0;
// Pushing values through
for (auto layer : Layers)
{
Z_Output[index] = layer.Mul(ActivationOutput[index]);
ActivationOutput[index + 1] = layer.ApplyActivation(Z_Output[index]);
index++;
}
// ---- Calculating Nabla that will be later devided by batch size element wise
auto DeltaNabla = BackPropagation(ActivationOutput, Z_Output, TrainingLabels[indexes[i + batch]]);
for (int i = 0; i < Layers.size(); i++)
{
NablaWeights[i] = NablaWeights[i] + DeltaNabla.first[i];
NablaBiases[i] = NablaBiases[i] + DeltaNabla.second[i];
}
i++;
}
for (int g = 0; g < Layers.size(); g++)
{
Layers[g].Weights = Layers[g].Weights - NablaWeights[g] * LearningRate;
Layers[g].Biases = Layers[g].Biases - NablaBiases[g] * LearningRate;
}
// std::transform(NablaWeights.begin(), NablaWeights.end(), NablaWeights.begin(),[BatchSize, LearningRate](Matrix<double>& Weight) {return Weight * (LearningRate / BatchSize);});
//std::transform(NablaBiases.begin(), NablaBiases.end(), NablaBiases.begin(), [BatchSize, LearningRate](Matrix<double>& Bias) {return Bias * (LearningRate / BatchSize); });
}
}
}
std::pair<MatrixD_Array, MatrixD_Array> Network::BackPropagation( MatrixD_Array& ActivationOutput, MatrixD_Array& Z_Output,Matrix<double>& label)
{
MatrixD_Array NablaWeight;
MatrixD_Array NablaBias;
NablaWeight.resize(Layers.size());
NablaBias.resize(Layers.size());
auto zs = Layers[Layers.size() - 1].ActivationPrime(Z_Output[Z_Output.size() - 1]);
Matrix<double> Delta_L = Hadamard(CostFuncDer(ActivationOutput[ActivationOutput.size() - 1],label), zs);
NablaWeight[Layers.size() - 1] = Delta_L * ActivationOutput[ActivationOutput.size() - 2].Transpose();
NablaBias[Layers.size() - 1] = Delta_L;
for (int j = 2; j <= Layers.size() ; j++)
{
auto sp = Layers[Layers.size() - j].ActivationPrime(Z_Output[Layers.size() -j]);
Delta_L = Hadamard(Layers[Layers.size() - j+1 ].Weights.Transpose() * Delta_L, sp);
NablaWeight[Layers.size() - j] = Delta_L * ActivationOutput[ActivationOutput.size() -j-1].Transpose();
NablaBias[Layers.size() - j] = Delta_L;
}
return make_pair(NablaWeight, NablaBias);
}
It turned out that mnist loader didnt work correctly.
I work on traffic sign detection, firstly I am applied a segmentation on RGB image to obtain red channel image as it is illustrated in image 1:
Secondely I try to find homogeneous region to eliminate not interested region (not a traffic sign) by calculating the variance of sliding window above the image
I use this code but I have always exception
int main(int argc, char** argv)
{
IplImage *image1;
if ((image1 = cvLoadImage("segmenter1/00051.jpg", 0)) == 0)
return NULL;
int rows = image1->width;
int cols = image1->height;
Mat image = Mat::zeros(cols, rows, CV_32FC1);
double x = 0;
double temp = 0;
for (int i = 0; i < rows; i++){
for (int j = 0; j < cols; j++){
temp = cvGet2D(image1, j, i).val[0];
x = temp / 255;
image.at<float>(j, i) = x;
x = image.at<float>(j, i);
}
}
int k = 16;
double seuil = 0.0013;
CvScalar blanc;//pixel blanc
blanc.val[0] = 255;
cv::Scalar mean, stddev; //0:1st channel, 1:2nd channel and 2:3rd channel
for (int j = 0; j < rows - k; j++)
{
for (int i = 0; i < cols - k; i++)
{
double som = 0;
double var = 0;
double t = 0;
for (int jj = j; jj < k+j; jj++)
{
for (int ii = i; ii < k+i; ii++)
{
t = image.at<float>(jj, ii);
som = som + t;
t = t*t;
var =var+ t;
}
}
som = som / (k*k);
if (som>0.18){
var = (var / (k*k)) - (som*som);
if (var < seuil)
cvSet2D(image1, j, i, blanc);
}
}
}
char stsave[80];
cvSaveImage("variance/00051.jpg", image1);
cv::waitKey(0);
return 0;
}
Without the specific exception, I can only guess it is out_of_range. According to opencv docs, cvGet2D and cvSet2D parameters are image, y, x which effectively translates to image, rows, cols. You have flipped the definition of rows, cols and have conflicting usage between the two loops. Maybe fix these and try again.
Here is my code for creating the hough accumulator for lines in image :
void hough_lines_acc(cv::Mat img_a_edges, std::vector<std::vector<int> > &hough_acc) {
for (size_t r = 0; r < img_a_edges.rows; r++) {
for (size_t c = 0; c < img_a_edges.cols; c++) {
int theta = static_cast<int> (std::atan2(r, c) * 180 / M_PI);
int rho = static_cast<int> ((c * cos(theta)) + (r * sin(theta)));
if (theta < -90) theta = -90;
if (theta > 89) theta = 89;
++hough_acc[abs(rho)][theta];
}
}
cv::Mat img_mat(hough_acc.size(), hough_acc[0].size(), CV_8U);
std::cout << hough_acc.size() << " " << hough_acc[0].size() << std::endl;
for (size_t i = 0; i < hough_acc.size(); i++) {
for (size_t j = 0; j < hough_acc[0].size(); j++) {
img_mat.at<int> (i,j) = hough_acc[i][j];
}
}
imwrite("../output/ps1-2-b-1.png", img_mat);
}
theta varies from -90 to 89. I am getting negative rho values. Right now I am just replacing the negative who with a positive one but am not getting a correct answer. What do I do to the negative rho? Please explain the answer.
theta = arctan (y / x)
rho = x * cos(theta) + y * sin(theta)
Edited code :
bool hough_lines_acc(cv::Mat img_a_edges, std::vector<std::vector<int> > &hough_acc,\
std::vector<double> thetas, std::vector<double> rhos, int rho_resolution, int theta_resolution) {
int img_w = img_a_edges.cols;
int img_h = img_a_edges.rows;
int max_votes = 0;
int min_votes = INT_MAX;
for (size_t r = 0; r < img_h; r++) {
for (size_t c = 0; c < img_w; c++) {
if(img_a_edges.at<int>(r, c) == 255) {
for (size_t i = 0; i < thetas.size(); i++) {
thetas[i] = (thetas[i] * M_PI / 180);
double rho = ( (c * cos(thetas[i])) + (r * sin(thetas[i])) );
int buff = ++hough_acc[static_cast<int>(abs(rho))][static_cast<int>(i)];
if (buff > max_votes) {
max_votes = buff;
}
if (buff < min_votes) {
min_votes = buff;
}
}
}
}
}
double div = static_cast<double>(max_votes) / 255;
int threshold = 10;
int possible_edge = round(static_cast<double>(max_votes) / div) - threshold;
props({
{"max votes", max_votes},
{"min votes", min_votes},
{"scale", div}
});
// needed for scaling intensity for contrast
// not sure if I am doing it correctly
for (size_t r = 0; r < hough_acc.size(); r++) {
for (size_t c = 0; c < hough_acc[0].size(); c++) {
double val = hough_acc[r][c] / div;
if (val < 0) {
val = 0;
}
hough_acc[r][c] = static_cast<int>(val);
}
}
cv::Mat img_mat = cv::Mat(hough_acc.size(), hough_acc[0].size(), CV_8UC1, cv::Scalar(0));
for (size_t i = 0; i < hough_acc.size(); i++) {
for (size_t j = 0; j < hough_acc[0].size(); j++) {
img_mat.at<uint8_t> (i,j) = static_cast<uint8_t>(hough_acc[i][j]);
}
}
imwrite("../output/ps1-2-b-1.png", img_mat);
return true;
}
Still not correct output. What is the error here?
atan2 of two positive numbers... should not be giving you negative angles, it should only be giving you a range of 0-90
also for the hough transform, I think you want everything relative to one point (ie 0,0 in this case). I think for that you would actually want to make theta=90-atan2(r,c)
Admittedly though, I am a bit confused as I thought you had to encode line direction, rather than just "edge pt". ie I thought at each edge point you had to provide a discrete array of guessed line trajectories and calculate rho and theta for each one and throw all of those into your accumulator. As is... I am not sure what you are calculating.
I am trying to make an alphatrimmed filter in openCV library. My code is not working properly and the resultant image is not looking as image after filtering.
The filter should work in the following way.
Chossing some (array) of pixels in my example it is 9 pixels '3x3' window.
Ordering them in increasing way.
Cutting our 'array' both sides for alpha-2.
calculating arithmetic mean of remaining pixels and inserting them in proper place.
int alphatrimmed(Mat img, int alpha)
{
Mat img9 = img.clone();
const int start = alpha/2 ;
const int end = 9 - (alpha/2);
//going through whole image
for (int i = 1; i < img.rows - 1; i++)
{
for (int j = 1; j < img.cols - 1; j++)
{
uchar element[9];
Vec3b element3[9];
int k = 0;
int a = 0;
//selecting elements for window 3x3
for (int m = i -1; m < i + 2; m++)
{
for (int n = j - 1; n < j + 2; n++)
{
element3[a] = img.at<Vec3b>(m*img.cols + n);
a++;
for (int c = 0; c < img.channels(); c++)
{
element[k] += img.at<Vec3b>(m*img.cols + n)[c];
}
k++;
}
}
//comparing and sorting elements in window (uchar element [9])
for (int b = 0; b < end; b++)
{
int min = b;
for (int d = b + 1; d < 9; d++)
{
if (element[d] < element[min])
{
min = d;
const uchar temp = element[b];
element[b] = element[min];
element[min] = temp;
const Vec3b temporary = element3[b];
element3[b] = element3[min];
element3[min] = temporary;
}
}
}
// index in resultant image( after alpha-trimmed filter)
int result = (i - 1) * (img.cols - 2) + j - 1;
for (int l = start ; l < end; l++)
img9.at<Vec3b>(result) += element3[l];
img9.at<Vec3b>(result) /= (9 - alpha);
}
}
namedWindow("AlphaTrimmed Filter", WINDOW_AUTOSIZE);
imshow("AlphaTrimmed Filter", img9);
return 0;
}
Without actual data, it's somewhat of a guess, but an uchar can't hold the sum of 3 channels. It works modulo 256 (at least on any platform OpenCV supports).
The proper solution is std::sort with a proper comparator for your Vec3b :
void L1(Vec3b a, Vec3b b) { return a[0]+a[1]+a[2] < b[0]+b[1]+b[2]; }
Integer Range = 1;
for(Integer k = -Range; k <= Range; ++k)
{
for(Integer j = -Range; j <= Range; ++j)
{
for(Integer i = -Range; i <= Range; ++i)
{
Integer MCID = GetCellID(&CONSTANT_BOUNDINGBOX,CIDX +i, CIDY + j,CIDZ
+ k);
if(MCID < 0 || MCID >= c_CellNum)
{
continue;
}
unsigned int TriangleNum = c_daCell[MCID].m_TriangleNum;
for(unsigned int l = 0; l < TriangleNum; ++l)
{
TriangleID=c_daCell[MCID].m_TriangleID[l];
if( TriangleID >= 0 && TriangleID < c_TriangleNum && TriangleID
!= NearestID)// No need to calculate again for the same triangle
{
CDistance Distance ;
Distance.Magnitude = CalcDistance(&c_daTriangles[TriangleID], &TargetPosition,
&Distance.Direction);
if(Distance.Magnitude < NearestDistance.Magnitude)
{
NearestDistance = Distance;
NearestID = TriangleID;
}
}
}
}
}
}
}
c_daSTLDistance[ID] = NearestDistance;
c_daSTLID[ID] = NearestID;
GetCellID is the function to return the cellid in the variable CID with CIDX,CIDY,CIDZ with its position in the 3 axes
here the above code is a function to calculate the distance ,actually STL distance between a point and the triangles of the stl. This code runs fine however the problem is it is too slow as it has large number of loops within the code. Now my concern is to optimize the loop. Is there any technique of optimizing the loops within the code?