Gaussian Curve Calculation (C++) - c++

Given a histogram, perform a bi-means gaussian fitting.
This project fits two Gaussian curves to one histogram with an initial threshold value, and uses the curves to determine the ideal threshold value. At each iteration, the histogram is divided into 2 parts, a Gaussian is called for each part of the histogram.
My calculations must be wrong considering my resultant curve is nonsensical. Can anyone spot what's going on here? I've included my functions for calculating the biMeans Gaussian curve, the fitGauss (computes Gaussian Curve), the mean, the variance, and the Gaussian value.
int biMeanGauss (int thrVal){
double sum1, sum2, total = 0.0;
int bestThr = thrVal;
double minDiff = 99999999.0;
for (int i = thrVal; i < (maxVal-offSet); i++) {
set1DZero(GaussAry);
sum1 = fitGauss(0, i);
cout << "Sum1: " <<sum1 << endl;
sum2 = fitGauss(i, maxVal);
cout << "Sum2: " << sum2 << endl;
total = sum1 + sum2;
cout << "Total: " << total << endl;
if (total < minDiff) {
minDiff = total;
bestThr = i;
cout << "Total < MinDiff, so BestThr: " <<bestThr << endl;
}
else;
}
return bestThr;
}
double fitGauss(int leftIndex, int rightIndex){
double mean, var, sum = 0.0;
double Gval;
mean = computeMean(leftIndex, rightIndex, maxCount);
var = computeVar (leftIndex, rightIndex, mean);
for (int index = leftIndex; index <= rightIndex; index ++){
Gval = computeGaussian(histAry[index], mean, var);
sum += abs(Gval - histAry[index]);
GaussAry[index] = (int)Gval;
GaussImg[index][(int)Gval]= 1;
}
return sum;
};
double computeMean(int leftIndex, int rightIndex, int maxCount) {
int mean, n, sum = 0;
for (int i = leftIndex; i <= rightIndex; i++){
sum += (i*histAry[i]);
n++;
if (histAry[i] > maxCount){
maxCount = histAry[i];
}
}
mean = (sum / (double) n);
return mean;
}
double computeVar(int leftIndex, int rightIndex, int mean) {
double var = 0;
double sum = 0;
double n = 0;
for (int i = leftIndex; i <= rightIndex; i++) {
sum += histAry[i]*(i - mean) * (i - mean);
n++;
}
var = (sum / n);
return var;
}
double computeGaussian(int index, double mean, double var) {
double Gval = 0;
Gval = maxCount * exp(-(((index - mean)*(index - mean)) / (2 * var)));
return Gval;
}

Related

Problem with getting calculations of an array inside of an array done right

Using the formula in the pic, I need to write a program that allows the user to calculate sin(x), cos(x), tan(x). The user should enter the angle in degrees, and then the program should transform it into radians before performing the three requested calculations. For each requested calculation (i.e., sin(x), cos(x), tan(x)), I only need to calculate the first 15 terms of the series.
The problem seems to be in the arrays of the last block in the code, it keeps returning wrong results of the tan(x) series; how can I fix it?
#include <iostream>
using namespace std;
//create a function to convert angles from degrees to radian
double convertToRadian(double deg)
{ //formula : radian = (degree * pi)/180
const double pi = 3.14159265359; //declaring pi's value as a constant
return (deg * (pi / 180)); //returning the radian value
}
//create a function to calculate the exponent/power
double power(double base, unsigned int exp)
{
double result = 1;
for(int i = 0; i < exp; i++){
result = result * base;
}
return result;
}
//create a function to get the factorial of a value
double factorial(int fac)
{
if(fac > 1)
return fac * factorial(fac - 1);
else
return 1;
}
//create a function to print out arrays as we will use it to print the terms in the series
void printTerms(double terms[15])
{ for (int i = 0; i < 15; i++)
{
cout<<terms[i]<<endl;
}
}
int main()
{
double degree; //declare the variables used in the program
double valueOfCos, valueOfSin, valueOfTan; //declare variables for terms of each function
cout << "Enter angle (x) in degrees: " << endl; //prompt for user to enter angle in deg
cin >> degree;
double radian = convertToRadian(degree); //first, converting from degrees to radian
//make an array for the first 15 terms of cos(x):
double cos[15];
//make a loop to insert values in the array
for (int n = 0; n < 15; n++)
{ //type the maclaurin series formula for cos(x):
valueOfCos = (( power(-1 , n)) / (factorial(2*n))) * (power(radian, (2*n)));
cos[n] = valueOfCos;
}
//print out the first 15 terms of cos(x) in the maclaurin series:
cout << "cos(x)= ";
printTerms (cos);
//make an array for the first 15 terms of sin(x):
double sin[15];
for (int n = 0; n < 15; n++)
{
valueOfSin = ((power(-1 , n)) / (factorial((2*n + 1)))) * (power(radian, (2*n + 1)));
sin[n] = valueOfSin;
}
cout << "sin(x)= ";
printTerms (sin);
double tan[15];
for (int n = 0; n < 15; n++)
{ double bernoulli[15] = {(1/6), (-1/30),(1/42), (-1/30), (5/66), (-691/2730),
(7/6), (-3617/510), (43867/798), (-174611/330), (854513/138), (-236364091/2730),
(8553103/6),(-23749461029/870),(8615841276005/14322) };
for (int i = 0; i < 15; i++)
{
double firstNum = 0, secondNum = 0 , thirdNum = 0 , denominator = 0;
firstNum = power(-1 , n);
secondNum = power(2 , 2*n + 2);
thirdNum = ((secondNum) - 1);
denominator = factorial(2*n + 2);
valueOfTan = ((firstNum * secondNum * thirdNum * (bernoulli[i])) / denominator) *
(power(radian, 2*n + 1));
tan [n] = valueOfTan;
}
}
cout << "tan(x)= ";
printTerms (tan);
return 0;
}
This loop : for (int n = 0; n < 15; n++) is not running or entire expression. You'll need to correct something like this :
double bernoulli[15] = {(1/6), (-1/30),(1/42), (-1/30), (5/66), (-691/2730),(7/6), (-3617/510), (43867/798), (-174611/330), (854513/138), (-236364091/2730),(8553103/6),(-23749461029/870),(8615841276005/14322) };
for (int n = 0; n < 15; n++){
double firstNum = 0, secondNum = 0 , thirdNum = 0 , denominator = 0;
firstNum = power(-1 , n);
secondNum = power(2 , 2*n + 2);
thirdNum = ((secondNum) - 1);
denominator = factorial(2*n + 2);
valueOfTan = ((firstNum * secondNum * thirdNum * (bernoulli[n])) / denominator) * (power(radian, 2*n + 1));
tan [n] = valueOfTan;
}
}
You are incorrectly calculating the tan value.
In valueOfTan = ((firstNum * secondNum * thirdNum * (bernoulli[i])) / denominator) * (power(radian, 2 * n + 1));
Instead of bernoulli[i], you need to have bernoulli[2*i+2] as per the formulae.
And one more suggestion please pull the double bernoulli[15] = {(1/6), (-1/30),(1/42), (-1/30), (5/66), (-691/2730), (7/6), (-3617/510), (43867/798), (-174611/330), (854513/138), (-236364091/2730), (8553103/6),(-23749461029/870),(8615841276005/14322) array initialization out of the for loop, as it's constant you don't need to initialize it every time unnecessarily. It will increase your code runtime

How do I find the average of a column in a list?

I have a list of vectors currently, stored as a std::list<Vec3f> unsortedRays;
-4.5 4.5 -8.66025
-3.5 4.5 -8.66025
-2.5 4.5 -8.66025
-1.5 4.5 -8.66025
-0.5 4.5 -8.66025
I'm generating these earlier in the program and can calculate the size beforehand (or after too).
How do I find the average of each column (0, 1, 2)?
In Python, I can do:
x_avg = np.mean(column(list, 0))
y_avg = np.mean(column(list, 1))
z_avg = np.mean(column(list, 2))
I have tried the answer here but haven't been able to get it to work so far.
Edit:
Here's what I have so far in my simple.cpp file:
std::list<Vec3f> unsortedRays;
void generateRays() {
for (size_t y = 0; y < height; ++y) {
for (size_t x = 0; x < width; ++x) {
Vec3f direction;
float dir_x = (x + 0.5) - width/2.;
float dir_y = -(y + 0.5) + height/2.;
float dir_z = -height/(2.*tan(fov/2.));
direction = Vec3f(dir_x, dir_y, dir_z);
unsortedRays.push_back (direction);
}
}
}
static void sortRays() {
const int rows = width * height, columns = 3;
std::vector<double> matrix;
double sum=0;
for (auto const& i : unsortedRays) { // for each row
//std::vector<double> row;
//row.push_back[i];
for (unsigned int j = 0; j < columns; ++j) { // for each column, or "value"
double value = i[j];
//std::cout << value;
matrix.push_back (value);
sum += value;
//std::cout << "sum: " << sum << "\n";
}
}
const double quantity = unsortedRays.size();
const double average = sum / quantity;
std::cout << "average: " << average << "\n";
}
You could do something like this:
std::vector<std::vector<double>> matrix;
double c1, c2, c3;
double sum1=0.0, sum2=0.0, sum3=0.0;
while (my_file >> c1 >> c2 >> c3)
{
std::vector<double> row;
row.push_back(c1);
row.push_back(c2);
row.push_back(c3);
matrix.push_back(row);
sum1 += c1;
sum2 += c2;
sum3 += c3;
}
const double quantity = matrix.size();
const double average_c1 = sum1 / quantity;
const double average_c2 = sum2 / quantity;
const double average_c3 = sum3 / quantity;
The above code calculates the sum of each column as the data is read, and stores the values into a matrix.
The average of each column is calculated using the sum variables for the appropriate column and the amount of data read in.

Fixing Neural Net vanishing gradients problem?

This is going to be a long one. I am still very new to coding, started 3 months ago so I know my code is not perfect, any criticism beyond the question is more than welcome. I have specifically avoided using pointers because I do not fully understand them, I can use them but I dont trust that I will use them correctly in a program like this.
First things first, I have a version of this where there is only 1 hidden layer and the net works perfectly. I have started running into problems since I tried to expand the number of hidden layers.
Some info on the net:
-I am using softmax output activation as I have 3 output neurons.
-I am using tanh as my activation function on the rest of the net.
-The file being read for the input has a format of
"input: 0.56 0.76 0.23 0.67"
"output: 0.0 0.0 1.0" (this is the target)
-The weights for connecting layer 1 neuron to layer 2 neuron are stored in layer 1 one neuron.
-The bias's for each neuron are stored in that neuron.
-The target is 1.0 0.0 0.0 if the sum of the input numbers is below one, 0.0 1.0 0.0 if sum is between 1 and 2, 0.0 0.0 1.0 if sum is above 2.
-using L1 regularization.
Those problems specifically being:
The softmax output values do not move from an relatively equalised range ie:
(position 1 and 2 in the target vector have a roughly 50/50 occurance rate while position 3 less than 3% occurance rate. so by relatively equalised I mean the softmax output generally looks something like
"0.56.... 0.48.... 0.02..." even after 500 epochs.
The weights at the hidden layer closer to inputlayer dont change much at all, which is what i think vanishing gradients are. I might be wrong on this. But the weights at hiddenlayer closest to output are ending up at between -50 & 50 (which i think is okay?)
Things I have tried:
I have tried using Relu, parametric Relu, exponential Relu, but with all of these the softmax output value for neuron 3 keeps rising, the other 2 neurons values keep falling. these values continue their trajectory until either 500 epochs have been reached or they just turn into nans. (I think this is to do with the structure of my code rather than the Relu function itself).
If I set the number of hidden layers above 3 while using relu, it immediately spits out nans, within the first epoch.
The backprop function is pretty long, but this is specifically because I have deconstructed it many times over to try and figure out where I might be mismatching values or something. I do have it in a condensed version but I feel I have a higher chance of being completely off the mark there than I do if I have it deconstructed.
I have included the Relu function code that I used, it is the first time I use it so I might be wrong on that aswell but I dont think so, I have double checked multiple times. The Relu in the code is specifically "Elu" or exponential relu.
here is the code for the net:
#include <iostream>
#include <fstream>
#include <cmath>
#include <vector>
#include <sstream>
#include <random>
#include <string>
#include <iomanip>
double randomt(double x, double y)
{
std::random_device rd;
std::mt19937 mt(rd());
std::uniform_real_distribution<double> dist(x, y);
return dist(mt);
}
class InputN
{
public:
double val{};
std::vector <double> weights{};
};
class HiddenN
{
public:
double preactval{};
double actval{};
double actvalPD{};
double preactvalpd{};
std::vector <double> weights{};
double bias{};
};
class OutputN
{
public:
double preactval{};
double actval{};
double preactvalpd{};
double bias{};
};
class Net
{
public:
std::vector <InputN> inneurons{};
std::vector <std::vector <HiddenN>> hiddenneurons{};
std::vector <OutputN> outputneurons{};
double lambda{ 0.015 };
double alpha{ 0.02 };
};
double tanhderiv(double val)
{
return 1 - tanh(val) * tanh(val);
}
double Relu(double val)
{
if (val < 0) return 0.01 *(exp(val) - 1);
else return val;
}
double Reluderiv(double val)
{
if (val < 0) return Relu(val) + 0.01;
else return 1;
}
double regularizer(double weight)
{
double absval{};
if (weight < 0) absval = weight - weight - weight;
else if (weight > 0 || weight == 0) absval = weight;
else;
if (absval > 0) return 1;
else if (absval < 0) return -1;
else if (absval == 0) return 0;
else return 2;
}
void feedforward(Net& net)
{
double sum{};
int prevlayer{};
for (size_t Hsize = 0; Hsize < net.hiddenneurons.size(); Hsize++)
{
//std::cout << "in first loop" << '\n';
prevlayer = Hsize - 1;
for (size_t Hel = 0; Hel < net.hiddenneurons[Hsize].size(); Hel++)
{
//std::cout << "in second loop" << '\n';
if (Hsize == 0)
{
//std::cout << "in first if" << '\n';
for (size_t Isize = 0; Isize < net.inneurons.size(); Isize++)
{
//std::cout << "in fourth loop" << '\n';
sum += (net.inneurons[Isize].val * net.inneurons[Isize].weights[Hel]);
}
net.hiddenneurons[Hsize][Hel].preactval = net.hiddenneurons[Hsize][Hel].bias + sum;
net.hiddenneurons[Hsize][Hel].actval = tanh(sum);
sum = 0;
//std::cout << "first if done" << '\n';
}
else
{
//std::cout << "in else" << '\n';
for (size_t prs = 0; prs < net.hiddenneurons[prevlayer].size(); prs++)
{
//std::cout << "in fourth loop" << '\n';
sum += net.hiddenneurons[prevlayer][prs].actval * net.hiddenneurons[prevlayer][prs].weights[Hel];
}
//std::cout << "fourth loop done" << '\n';
net.hiddenneurons[Hsize][Hel].preactval = net.hiddenneurons[Hsize][Hel].bias + sum;
net.hiddenneurons[Hsize][Hel].actval = tanh(sum);
//std::cout << "else done" << '\n';
sum = 0;
}
}
}
//std::cout << "first loop done " << '\n';
int lasthid = net.hiddenneurons.size() - 1;
for (size_t Osize = 0; Osize < net.outputneurons.size(); Osize++)
{
for (size_t Hsize = 0; Hsize < net.hiddenneurons[lasthid].size(); Hsize++)
{
sum += (net.hiddenneurons[lasthid][Hsize].actval * net.hiddenneurons[lasthid][Hsize].weights[Osize]);
}
net.outputneurons[Osize].preactval = net.outputneurons[Osize].bias + sum;
}
}
void softmax(Net& net)
{
double sum{};
for (size_t Osize = 0; Osize < net.outputneurons.size(); Osize++)
{
sum += exp(net.outputneurons[Osize].preactval);
}
for (size_t Osize = 0; Osize < net.outputneurons.size(); Osize++)
{
net.outputneurons[Osize].actval = exp(net.outputneurons[Osize].preactval) / sum;
}
}
void lossfunc(Net& net, std::vector <double> target)
{
int pos{ -1 };
double val{};
for (size_t t = 0; t < target.size(); t++)
{
pos += 1;
if (target[t] > 0)
{
break;
}
}
for (size_t s = 0; net.outputneurons.size(); s++)
{
val = -log(net.outputneurons[pos].actval);
}
}
void backprop(Net& net, std::vector<double>& target)
{
for (size_t outI = 0; outI < net.outputneurons.size(); outI++)
{
double PD = target[outI] - net.outputneurons[outI].actval;
net.outputneurons[outI].preactvalpd = PD * -1;
}
size_t lasthid = net.hiddenneurons.size() - 1;
for (size_t LH = 0; LH < net.hiddenneurons[lasthid].size(); LH++)
{
for (size_t LHW = 0; LHW < net.hiddenneurons[lasthid][LH].weights.size(); LHW++)
{
double weight = net.hiddenneurons[lasthid][LH].weights[LHW];
double PD = net.outputneurons[LHW].preactvalpd * net.hiddenneurons[lasthid][LH].actval;
PD = PD * -1;
double delta = PD - (net.lambda * regularizer(weight));
weight = weight + (net.alpha * delta);
net.hiddenneurons[lasthid][LH].weights[LHW] = weight;
}
}
for (size_t OB = 0; OB < net.outputneurons.size(); OB++)
{
double bias = net.outputneurons[OB].bias;
double BPD = net.outputneurons[OB].preactvalpd;
BPD = BPD * -1;
double Delta = BPD;
bias = bias + (net.alpha * Delta);
}
for (size_t HPD = 0; HPD < net.hiddenneurons[lasthid].size(); HPD++)
{
double PD{};
for (size_t HW = 0; HW < net.outputneurons.size(); HW++)
{
PD += net.hiddenneurons[lasthid][HPD].weights[HW] * net.outputneurons[HW].preactvalpd;
}
net.hiddenneurons[lasthid][HPD].actvalPD = PD;
PD = 0;
}
for (size_t HPD = 0; HPD < net.hiddenneurons[lasthid].size(); HPD++)
{
net.hiddenneurons[lasthid][HPD].preactvalpd = net.hiddenneurons[lasthid][HPD].actvalPD * tanhderiv(net.hiddenneurons[lasthid][HPD].preactval);
}
for (size_t AllHid = net.hiddenneurons.size() - 2; AllHid > -1; AllHid--)
{
size_t uplayer = AllHid + 1;
for (size_t cl = 0; cl < net.hiddenneurons[AllHid].size(); cl++)
{
for (size_t clw = 0; clw < net.hiddenneurons[AllHid][cl].weights.size(); clw++)
{
double weight = net.hiddenneurons[AllHid][cl].weights[clw];
double PD = net.hiddenneurons[uplayer][clw].preactvalpd * net.hiddenneurons[AllHid][cl].actval;
PD = PD * -1;
double delta = PD - (net.lambda * regularizer(weight));
weight = weight + (net.alpha * delta);
net.hiddenneurons[AllHid][cl].weights[clw] = weight;
}
}
for (size_t up = 0; up < net.hiddenneurons[uplayer].size(); up++)
{
double bias = net.hiddenneurons[uplayer][up].bias;
double PD = net.hiddenneurons[uplayer][up].preactvalpd;
PD = PD * -1;
double delta = PD;
bias = bias + (net.alpha * delta);
}
for (size_t APD = 0; APD < net.hiddenneurons[AllHid].size(); APD++)
{
double PD{};
for (size_t APDW = 0; APDW < net.hiddenneurons[AllHid][APD].weights.size(); APDW++)
{
PD += net.hiddenneurons[AllHid][APD].weights[APDW] * net.hiddenneurons[uplayer][APDW].preactvalpd;
}
net.hiddenneurons[AllHid][APD].actvalPD = PD;
PD = 0;
}
for (size_t PPD = 0; PPD < net.hiddenneurons[AllHid].size(); PPD++)
{
double PD = net.hiddenneurons[AllHid][PPD].actvalPD * tanhderiv(net.hiddenneurons[AllHid][PPD].preactval);
net.hiddenneurons[AllHid][PPD].preactvalpd = PD;
}
}
for (size_t IN = 0; IN < net.inneurons.size(); IN++)
{
for (size_t INW = 0; INW < net.inneurons[IN].weights.size(); INW++)
{
double weight = net.inneurons[IN].weights[INW];
double PD = net.hiddenneurons[0][INW].preactvalpd * net.inneurons[IN].val;
PD = PD * -1;
double delta = PD - (net.lambda * regularizer(weight));
weight = weight + (net.alpha * delta);
net.inneurons[IN].weights[INW] = weight;
}
}
for (size_t hidB = 0; hidB < net.hiddenneurons[0].size(); hidB++)
{
double bias = net.hiddenneurons[0][hidB].bias;
double PD = net.hiddenneurons[0][hidB].preactvalpd;
PD = PD * -1;
double delta = PD;
bias = bias + (net.alpha * delta);
net.hiddenneurons[0][hidB].bias = bias;
}
}
int main()
{
std::vector <double> invals{ };
std::vector <double> target{ };
Net net;
InputN Ineuron;
HiddenN Hneuron;
OutputN Oneuron;
int IN = 4;
int HIDLAYERS = 4;
int HID = 8;
int OUT = 3;
for (int i = 0; i < IN; i++)
{
net.inneurons.push_back(Ineuron);
for (int m = 0; m < HID; m++)
{
net.inneurons.back().weights.push_back(randomt(0.0, 0.5));
}
}
//std::cout << "first loop done" << '\n';
for (int s = 0; s < HIDLAYERS; s++)
{
net.hiddenneurons.push_back(std::vector <HiddenN>());
if (s == HIDLAYERS - 1)
{
for (int i = 0; i < HID; i++)
{
net.hiddenneurons[s].push_back(Hneuron);
for (int m = 0; m < OUT; m++)
{
net.hiddenneurons[s].back().weights.push_back(randomt(0.0, 0.5));
}
net.hiddenneurons[s].back().bias = 1.0;
}
}
else
{
for (int i = 0; i < HID; i++)
{
net.hiddenneurons[s].push_back(Hneuron);
for (int m = 0; m < HID; m++)
{
net.hiddenneurons[s].back().weights.push_back(randomt(0.0, 0.5));
}
net.hiddenneurons[s].back().bias = 1.0;
}
}
}
//std::cout << "second loop done" << '\n';
for (int i = 0; i < OUT; i++)
{
net.outputneurons.push_back(Oneuron);
net.outputneurons.back().bias = randomt(0.0, 0.5);
}
//std::cout << "third loop done" << '\n';
int count{};
std::ifstream fileread("N.txt");
for (int epoch = 0; epoch < 500; epoch++)
{
count = 0;
if (epoch == 100 || epoch == 100 * 2 || epoch == 100 * 3 || epoch == 100 * 4 || epoch == 499)
{
printvals("no", net);
}
fileread.clear(); fileread.seekg(0, std::ios::beg);
while (fileread.is_open())
{
std::cout << '\n' << "epoch: " << epoch << '\n';
std::string fileline{};
fileread >> fileline;
if (fileline == "in:")
{
std::string input{};
double nums{};
std::getline(fileread, input);
std::stringstream ss(input);
while (ss >> nums)
{
invals.push_back(nums);
}
}
if (fileline == "out:")
{
std::string output{};
double num{};
std::getline(fileread, output);
std::stringstream ss(output);
while (ss >> num)
{
target.push_back(num);
}
}
count += 1;
if (count == 2)
{
for (size_t inv = 0; inv < invals.size(); inv++)
{
net.inneurons[inv].val = invals[inv];
}
//std::cout << "calling feedforward" << '\n';
feedforward(net);
//std::cout << "ff done" << '\n';
softmax(net);
printvals("output", net);
std::cout << "target: " << '\n';
for (auto element : target) std::cout << element << " / ";
std::cout << '\n';
backprop(net, target);
invals.clear();
target.clear();
count = 0;
}
if (fileread.eof()) break;
}
}
//std::cout << "fourth loop done" << '\n';
return 1;
}
Much aprecciated to anyone who actually made it through all that! :)

Logistic Regression Returning Wrong Prediction

I'm trying to implement logistic regression in C++, but the predictions I'm getting are not even close to what I am expecting. I'm not sure if there is an error in my understanding of logistic regression or the code.
I have reviewed the algorithms and messed with the learning rate, but the results are very inconsistent.
double theta[4] = {0,0,0,0};
double x[2][3] = {
{1,1,1},
{9,9,9},
};
double y[2] = {0,1};
//prediction data
double test_x[1][3] = {
{9,9,9},
};
int test_m = sizeof(test_x) / sizeof(test_x[0]);
int m = sizeof(x) / sizeof(x[0]);
int n = sizeof(theta) / sizeof(theta[0]);
int xn = n - 1;
struct Logistic
{
double sigmoid(double total)
{
double e = 2.71828;
double sigmoid_x = 1 / (1 + pow(e, -total));
return sigmoid_x;
}
double h(int x_row)
{
double total = theta[0] * 1;
for(int c1 = 0; c1 < xn; ++c1)
{
total += theta[c1 + 1] * x[x_row][c1];
}
double final_total = sigmoid(total);
//cout << "final total: " << final_total;
return final_total;
}
double cost()
{
double hyp;
double temp_y;
double error;
for(int c1 = 0; c1 < m; ++c1)
{
//passes row of x to h to calculate sigmoid(xi * thetai)
hyp = h(c1);
temp_y = y[c1];
error += temp_y * log(hyp) + (1 - temp_y) * log(1 - hyp);
}// 1 / m
double final_error = -.5 * error;
return final_error;
}
void gradient_descent()
{
double alpha = .01;
for(int c1 = 0; c1 < n; ++c1)
{
double error = cost();
cout << "final error: " << error << "\n";
theta[c1] = theta[c1] - alpha * error;
cout << "theta: " << c1 << " " << theta[c1] << "\n";
}
}
void train()
{
for(int epoch = 0; epoch <= 10; ++epoch)
{
gradient_descent();
cout << "epoch: " << epoch << "\n";
}
}
vector<double> predict()
{
double temp_total;
double total;
vector<double> final_total;
//hypothesis equivalent function
temp_total = theta[0] * 1;
for(int c1 = 0; c1 < test_m; ++c1)
{
for(int c2 = 0; c2 < xn; ++c2)
{
temp_total += theta[c2 + 1] * test_x[c1][c2];
}
total = sigmoid(temp_total);
//cout << "final total: " << final_total;
final_total.push_back(total);
}
return final_total;
}
};
int main()
{
Logistic test;
test.train();
vector<double> prediction = test.predict();
for(int c1 = 0; c1 < test_m; ++c1)
{
cout << "prediction: " << prediction[c1] << "\n";
}
}
start with a very small learning rate wither larger iteration number at try. Haven`t tested ur code. But I guess the cost/error/energy jumps from hump to hump.
Somewhat unrelated to your question, but rather than computing e^-total using pow, use exp instead (it's a hell of a lot faster!). Also there is no need to make the sigmoid function a member func, make it static or just a normal C func (it doesn't require any member variable from your struct).
static double sigmoid(double total)
{
return 1.0 / (1.0 + exp(-total));
}

Anderson Darling Test in C++

I am trying to compute the Anderson-Darling test found here. I followed the steps on Wikipedia and made sure that when I calculate the average and standard deviation of the data I am testing denoted X by using MATLAB. Also, I used a function called phi for computing the standard normal CDF, I have also tested this function to make sure it is correct which it is. Now I seem to have a problem when I actually compute the A-squared (denoted in Wikipedia, I denote it as A in C++).
Here is my function I made for Anderson-Darling Test:
void Anderson_Darling(int n, double X[]){
sort(X,X + n);
// Find the mean of X
double X_avg = 0.0;
double sum = 0.0;
for(int i = 0; i < n; i++){
sum += X[i];
}
X_avg = ((double)sum)/n;
// Find the variance of X
double X_sig = 0.0;
for(int i = 0; i < n; i++){
X_sig += (X[i] - X_avg)*(X[i] - X_avg);
}
X_sig /= n;
// The values X_i are standardized to create new values Y_i
double Y[n];
for(int i = 0; i < n; i++){
Y[i] = (X[i] - X_avg)/(sqrt(X_sig));
//cout << Y[i] << endl;
}
// With a standard normal CDF, we calculate the Anderson_Darling Statistic
double A = 0.0;
for(int i = 0; i < n; i++){
A += -n - 1/n *(2*(i) - 1)*(log(phi(Y[i])) + log(1 - phi(Y[n+1 - i])));
}
cout << A << endl;
}
Note, I know that the formula for Anderson-Darling (A-squared) starts with i = 1 to i = n, although when I changed the index to make it work in C++, I still get the same result without changing the index.
The value I get in C++ is:
-4e+006
The value I should get, received in MATLAB is:
0.2330
Any suggestions are greatly appreciated.
Here is my whole code:
#include <iostream>
#include <math.h>
#include <cmath>
#include <random>
#include <algorithm>
#include <chrono>
using namespace std;
double *Box_Muller(int n, double u[]);
double *Beasley_Springer_Moro(int n, double u[]);
void Anderson_Darling(int n, double X[]);
double phi(double x);
int main(){
int n = 2000;
double Mersenne[n];
random_device rd;
mt19937 e2(1);
uniform_real_distribution<double> dist(0, 1);
for(int i = 0; i < n; i++){
Mersenne[i] = dist(e2);
}
// Print Anderson Statistic for Mersenne 6a
double *result = new double[n];
result = Box_Muller(n,Mersenne);
Anderson_Darling(n,result);
return 0;
}
double *Box_Muller(int n, double u[]){
double *X = new double[n];
double Y[n];
double R_2[n];
double theta[n];
for(int i = 0; i < n; i++){
R_2[i] = -2.0*log(u[i]);
theta[i] = 2.0*M_PI*u[i+1];
}
for(int i = 0; i < n; i++){
X[i] = sqrt(-2.0*log(u[i]))*cos(2.0*M_PI*u[i+1]);
Y[i] = sqrt(-2.0*log(u[i]))*sin(2.0*M_PI*u[i+1]);
}
return X;
}
double *Beasley_Springer_Moro(int n, double u[]){
double y[n];
double r[n+1];
double *x = new double(n);
// Constants needed for algo
double a_0 = 2.50662823884; double b_0 = -8.47351093090;
double a_1 = -18.61500062529; double b_1 = 23.08336743743;
double a_2 = 41.39119773534; double b_2 = -21.06224101826;
double a_3 = -25.44106049637; double b_3 = 3.13082909833;
double c_0 = 0.3374754822726147; double c_5 = 0.0003951896511919;
double c_1 = 0.9761690190917186; double c_6 = 0.0000321767881768;
double c_2 = 0.1607979714918209; double c_7 = 0.0000002888167364;
double c_3 = 0.0276438810333863; double c_8 = 0.0000003960315187;
double c_4 = 0.0038405729373609;
// Set r and x to empty for now
for(int i = 0; i <= n; i++){
r[i] = 0.0;
x[i] = 0.0;
}
for(int i = 1; i <= n; i++){
y[i] = u[i] - 0.5;
if(fabs(y[i]) < 0.42){
r[i] = pow(y[i],2.0);
x[i] = y[i]*(((a_3*r[i] + a_2)*r[i] + a_1)*r[i] + a_0)/((((b_3*r[i] + b_2)*r[i] + b_1)*r[i] + b_0)*r[i] + 1);
}else{
r[i] = u[i];
if(y[i] > 0.0){
r[i] = 1.0 - u[i];
r[i] = log(-log(r[i]));
x[i] = c_0 + r[i]*(c_1 + r[i]*(c_2 + r[i]*(c_3 + r[i]*(c_4 + r[i]*(c_5 + r[i]*(c_6 + r[i]*(c_7 + r[i]*c_8)))))));
}
if(y[i] < 0){
x[i] = -x[i];
}
}
}
return x;
}
double phi(double x){
return 0.5 * erfc(-x * M_SQRT1_2);
}
void Anderson_Darling(int n, double X[]){
sort(X,X + n);
// Find the mean of X
double X_avg = 0.0;
double sum = 0.0;
for(int i = 0; i < n; i++){
sum += X[i];
}
X_avg = ((double)sum)/n;
// Find the variance of X
double X_sig = 0.0;
for(int i = 0; i < n; i++){
X_sig += (X[i] - X_avg)*(X[i] - X_avg);
}
X_sig /= (n-1);
// The values X_i are standardized to create new values Y_i
double Y[n];
for(int i = 0; i < n; i++){
Y[i] = (X[i] - X_avg)/(sqrt(X_sig));
//cout << Y[i] << endl;
}
// With a standard normal CDF, we calculate the Anderson_Darling Statistic
double A = -n;
for(int i = 0; i < n; i++){
A += -1.0/(double)n *(2*(i+1) - 1)*(log(phi(Y[i])) + log(1 - phi(Y[n - i])));
}
cout << A << endl;
}
Let me guess, your n was 2000. Right?
The major issue here is when you do 1/n in the last expression. 1 is an int and ao is n. When you divide 1 by n it performs integer division. Now 1 divided by any number > 1 is 0 under integer division (think if it as only keeping only integer part of the quotient. What you need to do is cast n as double by writing 1/(double)n.
Rest all should work fine.
Summary from discussions -
Indexes to Y[] should be i and n-1-i respectively.
n should not be added in the loop but only once.
Minor fixes like changing divisor to n instead of n-1 while calculating Variance.
You have integer division here:
A += -n - 1/n *(2*(i) - 1)*(log(phi(Y[i])) + log(1 - phi(Y[n+1 - i])));
^^^
1/n is zero when n > 1 - you need to change this to, e.g.: 1.0/n:
A += -n - 1.0/n *(2*(i) - 1)*(log(phi(Y[i])) + log(1 - phi(Y[n+1 - i])));
^^^^^