I'm trying to make a neural network for solving XOR problem.But I couldn't make it.Always giving false results.Maybe I'm making a mistake in your math.The network does not learn.Result always similarly.
I am not using BIAS.
Note: execute function = (feed-forward + backpropagation)
ALPHA = 0.5
Here is the code:
//main.cpp
#include <iostream>
#include "neural_network.h"
int main(int argc, char const *argv[])
{
srand(time(NULL));
double array[][3] = {{0.0, 0.0, 0.0},
{0.0, 1.0, 1.0},
{1.0, 0.0, 1.0},
{1.0, 1.0, 0.0}};
neural_network* nn = new neural_network(3, 2, 2, 1, 1.0);
nn->create_network();
for(int i = 0; i < 15000; i++)
{
int index = rand() % 4;
#if DEBUG
std::cout<<"Inputs :"<<array[index][0]<<" , "<<array[index][1]<<std::endl;
std::cout<<"Outputs :"<<array[index][2]<<std::endl;
#endif
nn->execute(array[index], &array[index][2]);
}
nn->print_weight();
nn->execute(array[0], &array[0][2]);
nn->print_output();
nn->execute(array[1], &array[1][2]);
nn->print_output();
nn->execute(array[2], &array[2][2]);
nn->print_output();
nn->execute(array[3], &array[3][2]);
nn->print_output();
return 0;
}
//feed-forward function
void neural_network::feed_forward(double* inputs)
{
int index = 0;
for(int i = 0; i < neural_network::input_layer_size; i++)
neural_network::input_neuron[i] = inputs[i];
for(int i = 0; i < neural_network::hidden_layer_size; i++)
{
for(int j = 0; j < neural_network::input_layer_size; j++)
{
neural_network::hidden_neuron[i] += neural_network::input_neuron[j] * weight_I_H[index++];
}
neural_network::hidden_neuron[i] = neural_network::activation_func(neural_network::hidden_neuron[i]);
}
index = 0;
for(int i = 0; i < neural_network::output_layer_size; i++)
{
for(int j = 0; j < neural_network::hidden_layer_size; j++)
{
neural_network::output_neuron[i] += neural_network::hidden_neuron[j] * weight_H_O[index++];
}
neural_network::output_neuron[i] = neural_network::activation_func(neural_network::output_neuron[i]);
}
}
//backpropagation function
void neural_network::back_propagation(double* outputs)
{
int index;
for(int i = 0; i < neural_network::output_layer_size; i++)
neural_network::err_output[i] = (outputs[i] - neural_network::output_neuron[i]);
for(int i = 0; i < neural_network::hidden_layer_size; i++)
{
index = i;
for(int j = 0; j < neural_network::output_layer_size; j++)
{
neural_network::err_hidden[i] += neural_network::weight_H_O[index] * neural_network::err_output[j] * neural_network::derivative_act_func(neural_network::output_neuron[j]);
neural_network::weight_H_O[index] += ALPHA * neural_network::err_output[j] * neural_network::derivative_act_func(neural_network::output_neuron[j]) * neural_network::hidden_neuron[i];
index += neural_network::hidden_layer_size;
}
}
for(int i = 0; i < neural_network::input_layer_size; i++)
{
index = i;
for(int j = 0; j < neural_network::hidden_layer_size; j++)
{
neural_network::weight_I_H[index] += ALPHA * neural_network::err_hidden[j] * neural_network::derivative_act_func(neural_network::hidden_neuron[j]) * neural_network::input_neuron[i];
index += neural_network::input_layer_size;
}
}
}
//output
Input To Hidden :
H-1 :
Weight :-13.269
Weight :-13.2705
H-2 :
Weight :-12.5172
Weight :-12.5195
Hidden To Output :
O-1 :
Weight :-5.37707
Weight :-2.93218
Outputs for (0,0):
O-1 :0.0294265
Outputs for (0,1):
O-1 :0.507348
Outputs for (1,0):
O-1 :0.62418
Outputs for (1,1):
O-1 :0.651169
It is real impossible no keras no my developed net based on Furye transformation(which is more power than keras) real decide this XOR task.I tested very accuracy both of this ANN.Maximum the recognize is 3 examples of 4 (acc=0.75->75%).No one answered 1 xor 1=0.It seems nowbody realy tested this case seriosly.(ANN were multilayered)
Related
I've been implementing NN recently based on http://neuralnetworksanddeeplearning.com/. I've made whole algorithm for backprop and SGD almost the same way as author of this book. The problem is that while he gets accuracy around 90 % after one epoch i get 30% after 5 epochs even though i have the same hiperparameters. Do you have any idea what might be the cause ?
Here s my respository.
https://github.com/PiPower/Deep-Neural-Network
Here is part with algorithm for backprop and SGD implemented in Network.cpp:
void Network::Train(MatrixD_Array& TrainingData, MatrixD_Array& TrainingLabels, int BatchSize,int epochs, double LearningRate)
{
assert(TrainingData.size() == TrainingLabels.size() && CostFunc != nullptr && CostFuncDer != nullptr && LearningRate > 0);
std::vector<long unsigned int > indexes;
for (int i = 0; i < TrainingData.size(); i++) indexes.push_back(i);
std::random_device rd;
std::mt19937 g(rd());
std::vector<Matrix<double>> NablaWeights;
std::vector<Matrix<double>> NablaBiases;
NablaWeights.resize(Layers.size());
NablaBiases.resize(Layers.size());
for (int i = 0; i < Layers.size(); i++)
{
NablaWeights[i] = Matrix<double>(Layers[i].GetInDim(), Layers[i].GetOutDim());
NablaBiases[i] = Matrix<double>(1, Layers[i].GetOutDim());
}
//---- Epoch iterating
for (int i = 0; i < epochs; i++)
{
cout << "Epoch number: " << i << endl;
shuffle(indexes.begin(), indexes.end(), g);
// Batch iterating
for (int batch = 0; batch < TrainingData.size(); batch = batch + BatchSize)
{
for (int i = 0; i < Layers.size(); i++)
{
NablaWeights[i].Clear();
NablaBiases[i].Clear();
}
int i = 0;
while( i < BatchSize && (i+batch)< TrainingData.size())
{
std::vector<Matrix<double>> ActivationOutput;
std::vector<Matrix<double>> Z_Output;
ActivationOutput.resize(Layers.size() + 1);
Z_Output.resize(Layers.size());
ActivationOutput[0] = TrainingData[indexes[i + batch]];
int index = 0;
// Pushing values through
for (auto layer : Layers)
{
Z_Output[index] = layer.Mul(ActivationOutput[index]);
ActivationOutput[index + 1] = layer.ApplyActivation(Z_Output[index]);
index++;
}
// ---- Calculating Nabla that will be later devided by batch size element wise
auto DeltaNabla = BackPropagation(ActivationOutput, Z_Output, TrainingLabels[indexes[i + batch]]);
for (int i = 0; i < Layers.size(); i++)
{
NablaWeights[i] = NablaWeights[i] + DeltaNabla.first[i];
NablaBiases[i] = NablaBiases[i] + DeltaNabla.second[i];
}
i++;
}
for (int g = 0; g < Layers.size(); g++)
{
Layers[g].Weights = Layers[g].Weights - NablaWeights[g] * LearningRate;
Layers[g].Biases = Layers[g].Biases - NablaBiases[g] * LearningRate;
}
// std::transform(NablaWeights.begin(), NablaWeights.end(), NablaWeights.begin(),[BatchSize, LearningRate](Matrix<double>& Weight) {return Weight * (LearningRate / BatchSize);});
//std::transform(NablaBiases.begin(), NablaBiases.end(), NablaBiases.begin(), [BatchSize, LearningRate](Matrix<double>& Bias) {return Bias * (LearningRate / BatchSize); });
}
}
}
std::pair<MatrixD_Array, MatrixD_Array> Network::BackPropagation( MatrixD_Array& ActivationOutput, MatrixD_Array& Z_Output,Matrix<double>& label)
{
MatrixD_Array NablaWeight;
MatrixD_Array NablaBias;
NablaWeight.resize(Layers.size());
NablaBias.resize(Layers.size());
auto zs = Layers[Layers.size() - 1].ActivationPrime(Z_Output[Z_Output.size() - 1]);
Matrix<double> Delta_L = Hadamard(CostFuncDer(ActivationOutput[ActivationOutput.size() - 1],label), zs);
NablaWeight[Layers.size() - 1] = Delta_L * ActivationOutput[ActivationOutput.size() - 2].Transpose();
NablaBias[Layers.size() - 1] = Delta_L;
for (int j = 2; j <= Layers.size() ; j++)
{
auto sp = Layers[Layers.size() - j].ActivationPrime(Z_Output[Layers.size() -j]);
Delta_L = Hadamard(Layers[Layers.size() - j+1 ].Weights.Transpose() * Delta_L, sp);
NablaWeight[Layers.size() - j] = Delta_L * ActivationOutput[ActivationOutput.size() -j-1].Transpose();
NablaBias[Layers.size() - j] = Delta_L;
}
return make_pair(NablaWeight, NablaBias);
}
It turned out that mnist loader didnt work correctly.
I'm beginner in NNs. I'm trying to create a NN for XOR function but it's not learning, it's stuck at 50%
Can anyone give me some advice? Thanks.
Here's the code:
/// Matrix.cpp
#include "pch.h"
#include "Matrix.h"
....
Matrix Matrix::sigmoidDerivate(const Matrix &m) {
assert(m.rows >= 1 && m.cols >= 1);
Matrix tmp(m.rows, m.cols);
for (ushort i = 0; i < tmp.rows; i++) {
for (ushort j = 0; j < tmp.cols; j++) {
tmp.mat[i][j] = m.mat[i][j]*(1-m.mat[i][j]);
}
}
return tmp;
}
Matrix Matrix::sigmoid(const Matrix &m) {
assert(m.rows >= 1 && m.cols >= 1);
Matrix tmp(m.rows, m.cols);
for (ushort i = 0; i < tmp.rows; i++) {
for (ushort j = 0; j < tmp.cols; j++) {
tmp.mat[i][j]= 1 / (1 + exp(-m.mat[i][j]));
}
}
return tmp;
}
Matrix Matrix::randomMatrix(ushort rows, ushort cols) {
assert(rows>=1 && cols>=1);
Matrix tmp(rows,cols);
const int range_from = -3;
const int range_to = 3;
std::random_device rand_dev;
std::mt19937 generator(rand_dev());
std::uniform_real_distribution<double> distr(range_from, range_to);
for (ushort i = 0; i < rows; i++) {
for (ushort j = 0; j < cols; j++) {
tmp.mat[i][j] = distr(generator);
}
}
return tmp;
}
And this is main () :
vector<vector<double>> in = {
{0,0},
{1,0},
{0,1},
{1,1}
};
vector<double> out = { 0,1,1,0 };
const ushort inputNeurons = 2;
const ushort hiddenNeurons = 3;
const ushort outputNeurons = 1;
const double learningRate = 0.03;
Matrix w_0_1 = Matrix::randomMatrix(inputNeurons, hiddenNeurons);
Matrix w_1_2 = Matrix::randomMatrix(hiddenNeurons, outputNeurons);
unsigned int epochs = 100000;
for (int i = 0; i < epochs; i++) {
for (int j = 0; j < in.size(); j++) {
Matrix Layer_0 = Matrix::createRowMatrix(in[j]);
Matrix desired_output = Matrix::createRowMatrix({ out[j] });
Matrix Layer_1 = Matrix::sigmoid(Matrix::multiply(Layer_0, w_0_1));
Matrix Layer_2 = Matrix::sigmoid(Matrix::multiply(Layer_1, w_1_2));
Matrix error = Matrix::POW2(Matrix::substract(Layer_2, desired_output));
//backprop
Matrix Layer_2_delta = Matrix::elementWiseMultiply(
Matrix::substract(Layer_2, desired_output),
Matrix::sigmoidDerivate(Layer_2)
);
Matrix Layer_1_delta = Matrix::elementWiseMultiply(
Matrix::multiply(Layer_2_delta, Matrix::transpose(w_1_2)),
Matrix::sigmoidDerivate(Layer_1)
);
Matrix w_1_2_delta = Matrix::multiply(Matrix::transpose(Layer_1), Layer_2_delta);
Matrix w_0_1_delta = Matrix::multiply(Matrix::transpose(Layer_0), Layer_1_delta);
//updating weights
w_0_1 = Matrix::multiply(w_0_1_delta, learningRate);
w_1_2 = Matrix::multiply(w_1_2_delta, learningRate);
}
}
NN architecture is : 2 ->3 ->1
In hidden layer if number is small, like 2-4, the output is 50%. and for 8 neurons on hidden layer ..output becomes around 49%.
Some help please.
I'm not that into c++ so I'm not sure. But in the line:
Matrix::substract(Layer_2, desired_output),
You are doing something like subtracting the desired "good" output from the existing Layer. In my opinion that should be the other way round. So you have to multiply it by -1
For me it's working like that. If you like so I can send you my source code. (it's java)
I have used openm to parallelize my c++ code as below:
int shell_num = 50, grparallel[shell_num],grbot[shell_num];
double p_x,p_y,grp[shell_num];
for (int f = 0; f < shell_num; f++)
{
grp[f] = 0;
grparallel[f] = 0;
grbot[f] = 0;
}
//some code...
#pragma omp parallel for reduction(+ : grp,grparallel,grbot)
for(int i = 0; i < N; i++){ //some code
for(int j = 0; j < N; j++){
if (j==i) continue;
double delta_x = x[i]-x[j],
delta_y = y[i]-y[j],
e_dot_e = e_x[i] * e_x[j] + e_y[i] * e_y[j],
e_cross_e = e_x[i] * e_y[j] - e_y[i] * e_x[j];
if (j > i)
{
double fasele = sqrt(dist(x[i],y[i],x[j],y[j],L));
for (int h = 0; h < shell_num; h++) //determine periodic distance between i and j is in which shel
{
if( L * h / 100 < fasele && fasele < L * (h + 1) / 100 )
{grp[h]+= e_dot_e;
double pdotr = abs(periodic(delta_x,L) * p_x + periodic(delta_y,L) * p_y)/fasele;
if (pdotr > 0.9659)
{
grparallel[h]+= 1;}else if(pdotr < 0.2588)
{
grbot[h]+= 1;
}
break;
}
}
}
}
}
When I run the code in terminal, there is an error:
‘grp’ has invalid type for ‘reduction’
The same error occurs for grparallel and grbot.
How can I remove the error?
By the OpenCV library, I want to threshold an image like this:
threshold(image, thresh, 220, 255, THRESH_BINARY_INV)
But I want to automatically find the threshold value (220).
I use Otsu to estimate the threshold. But it doesn't work in my case.
therefore, I should use Histogram Peak Technique. I want to find the two peaks in the histogram corresponding to the background and object of the image. It sets the threshold value automatically halfway between the two peaks.
I use this book (pages: 117 and 496-505): "Image Processing in C" by Dwayne Phillips (http://homepages.inf.ed.ac.uk/rbf/BOOKS/PHILLIPS/). And I use source code for find the two peaks in the histogram corresponding to the background and object of the image. this is my image:
this is my c++ code:
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/opencv.hpp>
#include <iostream>
#include <stdio.h>
#include <fstream>
using namespace std;
using namespace cv;
int main()
{
Mat image0 = imread("C:/Users/Alireza/Desktop/contrast950318/2.bmp");
imshow("image0", image0);
Mat image, thresh, Tafrigh;
cvtColor(image0, image, CV_RGB2GRAY);
int N = image.rows*image.cols;
int histogram[256];
for (int i = 0; i < 256; i++) {
histogram[i] = 0;
}
//create histo
for (int i = 0; i < image.rows; i++){
for (int j = 0; j < image.cols; j++){
histogram[((int)image.at<uchar>(i, j))]++;
}
}
int peak1, peak2;
#define PEAKS 30
int distance[PEAKS], peaks[PEAKS][2];
int i, j = 0, max = 0, max_place = 0;
for (int i = 0; i<PEAKS; i++){
distance[i] = 0;
peaks[i][0] = -1;
peaks[i][1] = -1;
}
for (i = 0; i <= 255; i++){
max = histogram[i];
max_place = i;
//insert_into_peaks(peaks, max, max_place);
//int max, max_place, peaks[PEAKS][2];
//int i, j;
/* first case */
if (max > peaks[0][0]){
for (i = PEAKS - 1; i > 0; i--){
peaks[i][0] = peaks[i - 1][0];
peaks[i][1] = peaks[i - 1][1];
}
peaks[0][0] = max;
peaks[0][1] = max_place;
} /* ends if */
/* middle cases */
for (j = 0; j < PEAKS - 3; j++){
if (max < peaks[j][0] && max > peaks[j + 1][0]){
for (i = PEAKS - 1; i > j + 1; i--){
peaks[i][0] = peaks[i - 1][0];
peaks[i][1] = peaks[i - 1][1];
}
peaks[j + 1][0] = max;
peaks[j + 1][1] = max_place;
} /* ends if */
} /* ends loop over j */
/* last case */
if (max < peaks[PEAKS - 2][0] &&
max > peaks[PEAKS - 1][0]){
peaks[PEAKS - 1][0] = max;
peaks[PEAKS - 1][1] = max_place;
} /* ends if */
}/* ends loop over i */
for (int i = 1; i<PEAKS; i++){
distance[i] = peaks[0][1] - peaks[i][1];
if (distance[i] < 0)
distance[i] = distance[i] * (-1);
}
peak1 = peaks[0][1];
cout << " peak1= " << peak1;
for (int i = PEAKS - 1; i > 0; i--){
if (distance[i] > 1)
peak2 = peaks[i][1];
}
cout << " peak2= " << peak2;
int mid_point;
//int peak1, peak2;
short hi, low;
unsigned long sum1 = 0, sum2 = 0;
if (peak1 > peak2)
mid_point = ((peak1 - peak2) / 2) + peak2;
if (peak1 < peak2)
mid_point = ((peak2 - peak1) / 2) + peak1;
for (int i = 0; i<mid_point; i++)
sum1 = sum1 + histogram[i];
for (int i = mid_point; i <= 255; i++)
sum2 = sum2 + histogram[i];
if (sum1 >= sum2){
low = mid_point;
hi = 255;
}
else{
low = 0;
hi = mid_point;
}
cout << " low= " << low << " hi= " << hi;
double threshnum = 0.5* (low + hi);
threshold(image, thresh, threshnum, hi, THRESH_BINARY_INV);
waitKey(0);
return 0;
}
But I don't know this code correct is or not. If it correct, why is threshold value 202?
What ideas on how to solve this task would you suggest? Or on what resource on the internet can I find help?
You can use also the Max Entropy. In some cases using only the high frequency of the entropy could be better
int maxentropie(const cv::Mat1b& src)
{
// Histogram
cv::Mat1d hist(1, 256, 0.0);
for (int r=0; r<src.rows; ++r)
for (int c=0; c<src.cols; ++c)
hist(src(r,c))++;
// Normalize
hist /= double(src.rows * src.cols);
// Cumulative histogram
cv::Mat1d cumhist(1, 256, 0.0);
float sum = 0;
for (int i = 0; i < 256; ++i)
{
sum += hist(i);
cumhist(i) = sum;
}
cv::Mat1d hl(1, 256, 0.0);
cv::Mat1d hh(1, 256, 0.0);
for (int t = 0; t < 256; ++t)
{
// low range entropy
double cl = cumhist(t);
if (cl > 0)
{
for (int i = 0; i <= t; ++i)
{
if (hist(i) > 0)
{
hl(t) = hl(t) - (hist(i) / cl) * log(hist(i) / cl);
}
}
}
// high range entropy
double ch = 1.0 - cl; // constraint cl + ch = 1
if (ch > 0)
{
for (int i = t+1; i < 256; ++i)
{
if (hist(i) > 0)
{
hh(t) = hh(t) - (hist(i) / ch) * log(hist(i) / ch);
}
}
}
}
// choose best threshold
cv::Mat1d entropie(1, 256, 0.0);
double h_max = hl(0) + hh(0);
int threshold = 0;
entropie(0) = h_max;
for (int t = 1; t < 256; ++t)
{
entropie(t) = hl(t) + hh(t);
if (entropie(t) > h_max)
{
h_max = entropie(t);
threshold = uchar(t);
}
}
if(threshold==0) threshold=255;
return threshold;
}
I'm trying to implement a gradient descent algorithm in C++. Here's the code I have so far :
#include <iostream>
double X[] {163,169,158,158,161,172,156,161,154,145};
double Y[] {52, 68, 49, 73, 71, 99, 50, 82, 56, 46 };
double m, p;
int n = sizeof(X)/sizeof(X[0]);
int main(void) {
double alpha = 0.00004; // 0.00007;
m = (Y[1] - Y[0]) / (X[1] - X[0]);
p = Y[0] - m * X[0];
for (int i = 1; i <= 8; i++) {
gradientStep(alpha);
}
return 0;
}
double Loss_function(void) {
double res = 0;
double tmp;
for (int i = 0; i < n; i++) {
tmp = Y[i] - m * X[i] - p;
res += tmp * tmp;
}
return res / 2.0 / (double)n;
}
void gradientStep(double alpha) {
double pg = 0, mg = 0;
for (int i = 0; i < n; i++) {
pg += Y[i] - m * X[i] - p;
mg += X[i] * (Y[i] - m * X[i] - p);
}
p += alpha * pg / n;
m += alpha * mg / n;
}
This code converges towards m = 2.79822, p = -382.666, and an error of 102.88. But if I use my calculator to find out the correct linear regression model, I find that the correct values of m and p should respectively be 1.601 and -191.1.
I also noticed that the algorithm won't converge for alpha > 0.00007, which seems quite low, and the value of p barely changes during the 8 iterations (or even after 2000 iterations).
What's wrong with my code?
Here's a good overview of the algorithm I'm trying to implement. The values of theta0 and theta1 are called p and m in my program.
Other implementation in python
More about the algorithm
This link gives a comprehensive view of the algorithm; it turns out I was following a completely wrong approach.
The following code does not work properly (and I have no plans to work on it further), but should put on track anyone who's confronted to the same problem as me :
#include <vector>
#include <iostream>
typedef std::vector<double> vect;
std::vector<double> y, omega(2, 0), omega2(2, 0);;
std::vector<std::vector<double>> X;
int n = 10;
int main(void) {
/* Initialize x so that each members contains (1, x_i) */
/* Initialize x so that each members contains y_i */
double alpha = 0.00001;
display();
for (int i = 1; i <= 8; i++) {
gradientStep(alpha);
display();
}
return 0;
}
double f_function(const std::vector<double> &x) {
double c;
for (unsigned int i = 0; i < omega.size(); i++) {
c += omega[i] * x[i];
}
return c;
}
void gradientStep(double alpha) {
for (int i = 0; i < n; i++) {
for (unsigned int j = 0; j < X[0].size(); j++) {
omega2[j] -= alpha/(double)n * (f_function(X[i]) - y[i]) * X[i][j];
}
}
omega = omega2;
}
void display(void) {
double res = 0, tmp = 0;
for (int i = 0; i < n; i++) {
tmp = y[i] - f_function(X[i]);
res += tmp * tmp; // Loss functionn
}
std::cout << "omega = ";
for (unsigned int i = 0; i < omega.size(); i++) {
std::cout << "[" << omega[i] << "] ";
}
std::cout << "\tError : " << res * .5/(double)n << std::endl;
}