Perceptron Model in C++ not Converging - c++

So I was writing a simple Perceptron model and when I finished the code and saw that there were no errors I was pretty surprised. But it seems like my model doesn't converge (along with some other oddities).
Basically it keeps getting 25/100 samples right at every epoch. And when every epoch ends the weights are always coming back to 0.
Due to the fact that the code is on multiple files I put it on Google Drive here it is:
https://drive.google.com/folderview?id=0B_r3mf9HbUrLaDNlc1F6RXhNMnM&usp=sharing
It is a Visual Studio Community 2013 project. You can open and run it so that you get a better idea.
Here's a quick preview of the files though.
main.cpp:
#include <iostream>
#include <vector>
#include <algorithm>
#include <fstream>
#include <string>
#include <math.h>
#include "LinearAlgebra.h"
#include "MachineLearning.h"
using namespace std;
using namespace LinearAlgebra;
using namespace MachineLearning;
void printVector(vector< vector<float> > X);
vector< vector<float> > getIrisX();
vector<float> getIrisy();
int main()
{
vector< vector<float> > X = getIrisX();
vector<float> y = getIrisy();
vector<float> test1;
test1.push_back(5.0);
test1.push_back(3.3);
test1.push_back(1.4);
test1.push_back(0.2);
vector<float> test2;
test2.push_back(6.0);
test2.push_back(2.2);
test2.push_back(5.0);
test2.push_back(1.5);
//printVector(X);
//for (int i = 0; i < y.size(); i++){ cout << y[i] << " "; }cout << endl;
perceptron clf(0.1, 10);
clf.fit(X, y);
cout << "Now Predicting: 5.0,3.3,1.4,0.2(CorrectClass=1,Iris-setosa) -> " << clf.predict(test1) << endl;
cout << "Now Predicting: 6.0,2.2,5.0,1.5(CorrectClass=-1,Iris-virginica) -> " << clf.predict(test2) << endl;
system("PAUSE");
return 0;
}
void printVector(vector< vector<float> > X)
{
for (int i = 0; i < X.size(); i++)
{
for (int j = 0; j < X[i].size(); j++)
{
cout << X[i][j] << " ";
}
cout << endl;
}
}
vector<float> getIrisy()
{
vector<float> y;
ifstream inFile;
inFile.open("y.data");
string sampleClass;
for (int i = 0; i < 100; i++)
{
inFile >> sampleClass;
if (sampleClass == "Iris-setosa")
{
y.push_back(1);
}
else
{
y.push_back(-1);
}
}
return y;
}
vector< vector<float> > getIrisX()
{
ifstream af;
ifstream bf;
ifstream cf;
ifstream df;
af.open("a.data");
bf.open("b.data");
cf.open("c.data");
df.open("d.data");
vector< vector<float> > X;
for (int i = 0; i < 100; i++)
{
char scrap;
int scrapN;
af >> scrapN;
bf >> scrapN;
cf >> scrapN;
df >> scrapN;
af >> scrap;
bf >> scrap;
cf >> scrap;
df >> scrap;
float a, b, c, d;
af >> a;
bf >> b;
cf >> c;
df >> d;
X.push_back(vector < float > {a, b, c, d});
}
af.close();
bf.close();
cf.close();
df.close();
return X;
}
MachineLearning.h:
#pragma once
#include<vector>
using namespace std;
namespace MachineLearning{
class perceptron
{
public:
perceptron(float eta,int epochs);
float netInput(vector<float> X);
int predict(vector<float> X);
void fit(vector< vector<float> > X, vector<float> y);
private:
float m_eta;
int m_epochs;
vector < float > m_w;
vector < float > m_errors;
};
}
MachineLearning.cpp
#include<vector>
#include <algorithm>
#include <iostream>
#include<fstream>
#include <math.h>
#include "MachineLearning.h"
using namespace std;
namespace MachineLearning{
perceptron::perceptron(float eta, int epochs)
{
m_epochs = epochs;
m_eta = eta;
}
void perceptron::fit(vector< vector<float> > X, vector<float> y)
{
for (int i = 0; i < X[0].size() + 1; i++) // X[0].size() + 1 -> I am using +1 to add the bias term
{
m_w.push_back(0);
}
for (int i = 0; i < m_epochs; i++)
{
int errors = 0;
for (int j = 0; j < X.size(); j++)
{
float update = m_eta * (y[j] - predict(X[j]));
m_w[0] = update;
for (int w = 1; w < m_w.size(); w++){ m_w[w] = update * X[j][w - 1]; }
errors += update != 0 ? 1 : 0;
}
m_errors.push_back(errors);
}
}
float perceptron::netInput(vector<float> X)
{
// Sum(Vector of weights * Input vector) + bias
float probabilities = m_w[0];
for (int i = 0; i < X.size(); i++)
{
probabilities += X[i] * m_w[i + 1];
}
return probabilities;
}
int perceptron::predict(vector<float> X)
{
return netInput(X) > 0 ? 1 : -1; //Step Function
}
}
Any kind of help is much appreciated.
Thanks in advance.
Panos P.

After hours of tedious debugging I finally found the mistake. There was a bug in my code when I updated the weights.
for (int j = 0; j < X.size(); j++)
{
float update = m_eta * (y[j] - predict(X[j]));
m_w[0] = update;
for (int w = 1; w < m_w.size(); w++){ m_w[w] = update * X[j][w - 1]; }
errors += update != 0 ? 1 : 0;
}
notice that:
m_w[w] = update * X[j][w - 1]
I am setting the weights as equal to the update. It looks like I forgot a "+" sign. Now it works fine.
Here's it is now:
m_w[w] += update * X[j][w - 1]
Sometimes the silliest mistakes can cause the most annoying of errors.
I hope that this might help anyone making the same mistake.

Related

Trying to implement Willans' formula for the n-th prime, what's the problem with my code?

The formula is listed in the following article: https://en.wikipedia.org/wiki/Formula_for_primes. I am trying to implement it but to no success, for whatever reason the code is producing number which seem to be nth power of two + 1, which is obviously not what I want to achieve.
#include <iostream>
#include <cmath>
using namespace std;
int nth_prime(int n) {
double s = 1;
for (int i = 1; i <= pow(2, n); i++) {
double c = 0;
for (int j = 1; j <= i; j++) {
double f = (tgamma(j)+1)/j;
c+=floor(pow(cos(M_PI*f), 2));
}
s+=floor(pow(n/c, 1/n));
}
return s;
}
int main() {
int n;
while (cin >> n) {
cout << nth_prime(n) << endl;
}
return 0;
}

error: 'int' is not a class, struct, or union type typedef typename _Iterator::iterator_category iterator_category;

I am trying this code about Kohonen Network:
// Fausett.cpp
#include <iostream>
#include <cstdlib>
#include <ctime>
using namespace std;
double euclidean(double *vec1,double *vec2,int n)
{
double dist = 0.0;
for(int i=0;i<n;i++) dist += (vec1[i]-vec2[i])*(vec1[i]-vec2[i]);
return dist;
}
double distance(int i,int jstar)
{
return double(i!=jstar);
// returns 1.0 if i!=jstar, returns 0.0 if i==jstar
}
double h(double d) { return 1.0-d; }
void train(double **W,int n,int cols,double *vec,double rate)
{
int i,j;
int win = 0;
double windist = euclidean(W[0],vec,n),edist;
for(i=0;i<cols;i++){
if((edist=euclidean(W[i],vec,n)) < windist)
{ win = i; windist = edist;}
for(i=0;i<cols;i++)
for(j=0;j<n;j++)
W[i][j] += (rate*h(distance(i,win)))*(vec[j]-W[i][j]);
}
int main(void)
{
int i, j;
int T = 10000; // number of iterations
double eta = 0.6; // learning rate
const int m = 4;
int cols;
// training vectors
double x0[m] = { 1.0,1.0,0.0,0.0 };
double x1[m] = { 0.0,0.0,0.0,1.0 };
double x2[m] = { 1.0,0.0,0.0,0.0 };
double x3[m] = { 0.0,0.0,1.0,1.0 };
cout << "Enter number of columns for weight matrix: ";
cin >> cols;
double** W = NULL; W = new double*[cols];
for(i=0;i<cols;i++) W[i] = new double[m];
srand(time(NULL));
for(i=0;i<cols;i++)
for(j=0;j<m;j++) W[i][j] = rand()/double(RAND_MAX);
for(i=0;i<T;i++)
{
train(W,m,cols,x0,eta); train(W,m,cols,x1,eta);
train(W,m,cols,x2,eta); train(W,m,cols,x3,eta);
eta /= 1.05; // learning rate decreased
}
for(i=0;i<cols;i++)
{
cout << "W[" << i << "]= [";
for(j=0;j<m;j++) cout << W[i][j] << " ";
cout << "]" << endl;
}
for(i=0;i<cols;i++) delete[] W[i];
delete[] W;
return 0;
}
its from book Willi - Hans Steeb, I copy paste that, but it gives me error when compiled. error: 'int' is not a class, struct, or union type typedef typename _Iterator::iterator_category iterator_category, this is one of the error messages and I have no idea at all why this happen. Can someone please explain it? I am a newbie in C++.

Program crashes after checking index at array

Please note: I am not sure if this fits here, if not, please move to the proper forum.
So I have a progrmam that tries to solve th Traveling Salesman Problem, TSP for short.
My code seems to run fine until I try to use 33810 cities, in which the program crashes after trying to access the position costs[69378120], it simply stops responding and end soon after.
I am trying the folowing code:
#include <iostream>
#include <stdlib.h>
#include <malloc.h>
#include <fstream>
#include <math.h>
#include <vector>
#include <limits>
using namespace std;
typedef long long int itype;
int main(int argc, char *argv[]) {
itype n;
ifstream fenter;
fenter.open(argv[1]);
ofstream fexit;
fexit.open(argv[2]);
fenter>> n;
double *x;
double *y;
x = (double*) malloc(sizeof(double)*n);
y = (double*) malloc(sizeof(double)*n);
cout<<"N : "<<n<<endl;
for (int p = 0; p < n; p++) {
fenter>> x[p] >> y[p];
}
fenter.close();
int *costs;
costs = (int*) malloc(sizeof(int)*(n*n));
for (int u = 0; u < n; u++) {
for (int v = u+1; v < n; v++) {
itype cost = floor(sqrt(pow(x[u] - x[v], 2) + pow(y[u] - y[v], 2)));
cout<<"U: "<<u<<" V: "<<v<<" COST: "<<cost<<endl;
costs[u*n + v] = cost;
cout<<"POS (u*n + v): "<<(u*n + v)<<endl;
cout<<"POS (v*n + u): "<<(v*n + u)<<endl;
costs[v*n + u] = cost;
}
}
return 0;
}
According with some verifications, the cost array should use 9.14493GB, but Windows only gives 0.277497GB. Then after triying to read costs[69378120], it closes.
For now, I not worried about the efficiency, nor the solution to the TSP, just need to fix this issue. Any clues?
---UPDATE---
Following the sugestions I tried changing a few things. the result is the code below
int main(int argc, char *argv[]) {
int n;
ifstream entrada;
entrada.open(argv[1]);
ofstream saida;
saida.open(argv[2]);
entrada >> n;
vector<double> x(n);
vector<double> y(n);
for (int p = 0; p < n; p++) {
entrada >> x[p] >> y[p];
}
entrada.close();
vector<itype> costs(n*n);
if(costs == NULL){ cout << "Sem memória!" << endl; return -1;}
for (int u = 0; u < n; u++) {
for (int v = u+1; v < n; v++) {
itype cost = floor(sqrt(pow(x[u] - x[v], 2) + pow(y[u] - y[v], 2)));
costs[u*n + v] = cost;
costs[v*n + u] = cost;
}
}
return 0;
}
The problem still persists
If compiling in 32-bit size_t is 32 bit and then
1143116100*4
is larger than the largest 32-bit number hence int overrun.
Compiling in 64-bit
size_t siz = 1143116100;
std::vector<long long> big(siz);
std::cout << big.size() << ", " << big.max_size() << std::endl;
which prints
1143116100, 2305843009213693951
if I change it to
size_t siz = 1024*1143116100;
I get a bad_alloc as my swap disk is not big enough for that.

c++ - Segmentation fault for class function of vector of custom class

I am using following code to run kmeans algorithm on Iris flower dataset- https://github.com/marcoscastro/kmeans/blob/master/kmeans.cpp
I have modified the above code to read input from files. Below is my code -
#include <iostream>
#include <vector>
#include <math.h>
#include <stdlib.h>
#include <time.h>
#include <algorithm>
#include <fstream>
using namespace std;
class Point
{
private:
int id_point, id_cluster;
vector<double> values;
int total_values;
string name;
public:
Point(int id_point, vector<double>& values, string name = "")
{
this->id_point = id_point;
total_values = values.size();
for(int i = 0; i < total_values; i++)
this->values.push_back(values[i]);
this->name = name;
this->id_cluster = -1;
}
int getID()
{
return id_point;
}
void setCluster(int id_cluster)
{
this->id_cluster = id_cluster;
}
int getCluster()
{
return id_cluster;
}
double getValue(int index)
{
return values[index];
}
int getTotalValues()
{
return total_values;
}
void addValue(double value)
{
values.push_back(value);
}
string getName()
{
return name;
}
};
class Cluster
{
private:
int id_cluster;
vector<double> central_values;
vector<Point> points;
public:
Cluster(int id_cluster, Point point)
{
this->id_cluster = id_cluster;
int total_values = point.getTotalValues();
for(int i = 0; i < total_values; i++)
central_values.push_back(point.getValue(i));
points.push_back(point);
}
void addPoint(Point point)
{
points.push_back(point);
}
bool removePoint(int id_point)
{
int total_points = points.size();
for(int i = 0; i < total_points; i++)
{
if(points[i].getID() == id_point)
{
points.erase(points.begin() + i);
return true;
}
}
return false;
}
double getCentralValue(int index)
{
return central_values[index];
}
void setCentralValue(int index, double value)
{
central_values[index] = value;
}
Point getPoint(int index)
{
return points[index];
}
int getTotalPoints()
{
return points.size();
}
int getID()
{
return id_cluster;
}
};
class KMeans
{
private:
int K; // number of clusters
int total_values, total_points, max_iterations;
vector<Cluster> clusters;
// return ID of nearest center (uses euclidean distance)
int getIDNearestCenter(Point point)
{
double sum = 0.0, min_dist;
int id_cluster_center = 0;
for(int i = 0; i < total_values; i++)
{
sum += pow(clusters[0].getCentralValue(i) -
point.getValue(i), 2.0);
}
min_dist = sqrt(sum);
for(int i = 1; i < K; i++)
{
double dist;
sum = 0.0;
for(int j = 0; j < total_values; j++)
{
sum += pow(clusters[i].getCentralValue(j) -
point.getValue(j), 2.0);
}
dist = sqrt(sum);
if(dist < min_dist)
{
min_dist = dist;
id_cluster_center = i;
}
}
return id_cluster_center;
}
public:
KMeans(int K, int total_points, int total_values, int max_iterations)
{
this->K = K;
this->total_points = total_points;
this->total_values = total_values;
this->max_iterations = max_iterations;
}
void run(vector<Point> & points)
{
if(K > total_points)
return;
vector<int> prohibited_indexes;
printf("Inside run \n");
// choose K distinct values for the centers of the clusters
printf(" K distinct cluster\n");
for(int i = 0; i < K; i++)
{
while(true)
{
int index_point = rand() % total_points;
if(find(prohibited_indexes.begin(), prohibited_indexes.end(),
index_point) == prohibited_indexes.end())
{
printf("i= %d\n",i);
prohibited_indexes.push_back(index_point);
points[index_point].setCluster(i);
Cluster cluster(i, points[index_point]);
clusters.push_back(cluster);
break;
}
}
}
int iter = 1;
printf(" Each point to nearest cluster\n");
while(true)
{
bool done = true;
// associates each point to the nearest center
for(int i = 0; i < total_points; i++)
{
int id_old_cluster = points[i].getCluster();
int id_nearest_center = getIDNearestCenter(points[i]);
if(id_old_cluster != id_nearest_center)
{
if(id_old_cluster != -1)
clusters[id_old_cluster].removePoint(points[i].getID());
points[i].setCluster(id_nearest_center);
clusters[id_nearest_center].addPoint(points[i]);
done = false;
}
}
// recalculating the center of each cluster
for(int i = 0; i < K; i++)
{
for(int j = 0; j < total_values; j++)
{
int total_points_cluster = clusters[i].getTotalPoints();
double sum = 0.0;
if(total_points_cluster > 0)
{
for(int p = 0; p < total_points_cluster; p++)
sum += clusters[i].getPoint(p).getValue(j);
clusters[i].setCentralValue(j, sum / total_points_cluster);
}
}
}
if(done == true || iter >= max_iterations)
{
cout << "Break in iteration " << iter << "\n\n";
break;
}
iter++;
}
// shows elements of clusters
for(int i = 0; i < K; i++)
{
int total_points_cluster = clusters[i].getTotalPoints();
cout << "Cluster " << clusters[i].getID() + 1 << endl;
for(int j = 0; j < total_points_cluster; j++)
{
cout << "Point " << clusters[i].getPoint(j).getID() + 1 << ": ";
for(int p = 0; p < total_values; p++)
cout << clusters[i].getPoint(j).getValue(p) << " ";
string point_name = clusters[i].getPoint(j).getName();
if(point_name != "")
cout << "- " << point_name;
cout << endl;
}
cout << "Cluster values: ";
for(int j = 0; j < total_values; j++)
cout << clusters[i].getCentralValue(j) << " ";
cout << "\n\n";
}
}
};
int main(int argc, char *argv[])
{
srand(time(NULL));
int total_points, total_values, K, max_iterations, has_name;
ifstream inFile("datafile.txt");
if (!inFile) {
cerr << "Unable to open file datafile.txt";
exit(1); // call system to stop
}
inFile >> total_points >> total_values >> K >> max_iterations >> has_name;
cout << "Details- \n";
vector<Point> points;
string point_name,str;
int i=0;
while(inFile.eof())
{
string temp;
vector<double> values;
for(int j = 0; j < total_values; j++)
{
double value;
inFile >> value;
values.push_back(value);
}
if(has_name)
{
inFile >> point_name;
Point p(i, values, point_name);
points.push_back(p);
i++;
}
else
{
inFile >> temp;
Point p(i, values);
points.push_back(p);
i++;
}
}
inFile.close();
KMeans kmeans(K, total_points, total_values, max_iterations);
kmeans.run(points);
return 0;
}
Output of code is -
Details-
15043100000Inside run
K distinct cluster i= 0
Segmentation fault
When I run it in gdb, the error shown is -
Program received signal SIGSEGV, Segmentation fault.
0x0000000000401db6 in Point::setCluster (this=0x540, id_cluster=0)
at kmeans.cpp:41
41 this->id_cluster = id_cluster;
I am stuck at this as I cannot find the cause for this segmentation fault.
My dataset file looks like -
150 4 3 10000 1
5.1,3.5,1.4,0.2,Iris-setosa
4.9,3.0,1.4,0.2,Iris-setosa
4.7,3.2,1.3,0.2,Iris-setosa
. . .
7.0,3.2,4.7,1.4,Iris-versicolor
6.4,3.2,4.5,1.5,Iris-versicolor
6.9,3.1,4.9,1.5,Iris-versicolor
5.5,2.3,4.0,1.3,Iris-versicolor
6.5,2.8,4.6,1.5,Iris-versicolor
. . .
in KMeans::run(vector<Point>&) you call points[index_point].setCluster(i); without any guarantee that index_point is within bounds.
index_point is determined by int index_point = rand() % total_points;, and total_points is retrieved from the input file "datafile.txt" which could be anything. It certainly does not have to match points.size(), but it should. Make sure it does, or just use points.size() instead.
A bit offtopic, but using rand() and only using modulo is almost always wrong. If you use C++11 or newer, please consider using std::uniform_int_distribution.
points[index_point].setCluster(i); could be accessing the vector out of bounds. The code you quoted actually always sets a number of total_points in the vector points before calling run, while your modified code just reads until end of file and has no guarantees that the number of total points passed to the constructor of KMeans matches the value of entries in points. Either fix your file I/O or fix the logic of bounds checking.

Geometric median/meeting point 2D realization

I have some problems with my program, it currently gives the wrong results for finding a meeting point.
I choose to use geometric median algorithm for searching for a meeting point, as described here .
Also I have implemented a brute-force algorithm, just to compare the results.
Source code were EDIT to possible solution, correct me, it's not working sometimes for > 100000 points:
#include <vector>
#include <random>
#include <cstdlib>
#include <algorithm>
#include <iostream>
#include <cmath>
using namespace std;
long double ComputeMean(vector<long long> InputData) {
long double rtn = 0;
for (unsigned int i = 0; i < InputData.size(); i++) {
rtn += InputData[i];
}
if(rtn == 0) return rtn;
return rtn/InputData.size();
}
long double CallRecursiveAverage(long double m0, vector<long long> X) {
long double m1 =0 ;
long double numerator = 0, denominator = 0;
for (unsigned int i = 0; i < X.size(); i++) {
long double temp =abs((X[i] - m0));
if(X[i]!=0 && temp!=0) {
numerator += X[i] / temp;
}
if(temp!=0) {
denominator += 1 / temp;
}
}
if( denominator != 0 ) {
m1 = numerator / denominator;
}
return m1;
}
long double ComputeReWeightedAverage(vector<long long> InputVector) {
long double m0 = ComputeMean(InputVector);
long double m1 = CallRecursiveAverage(m0, InputVector);
while (abs(m1 - m0) > 1e-6) {
m0 = m1;
m1 = CallRecursiveAverage(m0, InputVector);
}
return m1;
}
int randomizer(){
int n =(rand() % 1000000 + 1)*(-1 + ((rand() & 1) << 1));
return(n);
}
struct points
{
long double ch;
long long remp;
bool operator<(const points& a) const
{
return ch < a.ch;
}
};
int main () {
long double houses=10;
// rand() % 100 + 1;
// cin >> houses;
vector <long long> x;
vector <long long> y;
vector <long long> xr;
vector <long long> yr;
vector <long long> sums;
vector <long long> remp;
long long x0, y0;
long double path = 1e9;
long double sumy = 0;
long double sumx = 0;
long double avgx = 1;
long double avgy = 1;
srand((unsigned)time(NULL));
int rnd;
for(int i = 0; i < houses; i++) {
// cin>>x0>>y0;
x0 = randomizer();
x.push_back(x0);
sumx += x0;
y0 = randomizer();
y.push_back(y0);
sumy += y0;
}
if(sumx!=0) {
avgx=ComputeReWeightedAverage(x);
} else {
avgx=0;
}
if(sumy!=0) {
avgy=ComputeReWeightedAverage(y);
} else {
avgy=0;
}
long double check=1e9;
long double pathr=0;
int rx, ry;
long double wpath=1e9;
///brute force////
for(int j = 0; j < houses; j++) {
pathr = 0;
for(int i = 0; i < houses; i++) {
pathr += max(abs(x[i] - x[j]), abs(y[i] - y[j]));
}
if(pathr<wpath)
{
wpath = pathr;
ry=j;
}
}
cout << "\nx ="<<x[ry]<<"\n";
cout << "y ="<<y[ry]<<"\n";
cout << "bruteForce path ="<<wpath<<"\n\n";
////end brute force///
cout << "avgx ="<<avgx<<"\n";
cout << "avgy ="<<avgy<<"\n";
vector<points> ch;
for(int j = 0; j < houses; j++) {
remp.push_back(j);
points tb;
tb.ch=max(abs(x[j] - (avgx)), abs(y[j] - (avgy)));
tb.remp=j;
ch.push_back(tb) ;
}
sort(ch.begin(),ch.end());
path =1e9;
for(unsigned int z = 0; z < 10; z++) {
pathr = 0;
for(int i = 0; i < houses; i++) {
pathr += max(abs(x[i] - x[ch[z].remp]), abs(y[i] - y[ch[z].remp]));
}
if(pathr<path)
{
path = pathr;
}
}
cout << "x ="<<x[remp[0]]<<"\n";
cout << "y ="<<y[remp[0]]<<"\n";
cout << "Weizsfield path ="<<path<<"\n\n";
if (wpath!=path){ cout <<"ERRROR"<<"\n";
cout << "dots\n";
for(int i = 0; i < houses; i++) {
cout << x[i]<<" "<<y[i]<<"\n";
}
cout << "dots\n\n";
}
return 0;
}
Where did I make a mistake in my program? Any help will be appreciated.
EDIT
Is changing search radius of nearest points to geometric median and checking path for all of them the best approach? If answer is yes, how do I find the optimal start radius?
The Weiszfeld algorithm is one that approximates the geometric median and will therefore very often deviate from the real one computed by brute force.
Increasing the search radius will probably help.