Related
I'm implementing a modified compressed sparse row matrix [reference],
but I have a problem with Matrix * vector multiplication, I wrote the function but I don't reach to find the bug !
the class used 2 container (std::vector) for store
Diagonal element (aa_[0] to aa_[dim])
the non zero value off-diagonal (aa_[dim+2] to aa_[size_of_non_zero])
pointer of the first element in the row (ja_[0] to ja_[dim] )
in the previous pointer this rules is used : ja_[0]=dim+1 ; ja_[i+1]-ja[i]= number of element in i-th row
column index stored in ja_[ja_[row]] for ja_[row] described above is range is ja[0] to ja[dim+1] ,so the colum index are in ja_[dim+2] to ja_[size_of_non_zero elment]
here the minimal code :
# include <initializer_list>
# include <vector>
# include <iosfwd>
# include <string>
# include <cstdlib>
# include <cassert>
# include <iomanip>
# include <cmath> for(auto i=0; i< A.dim ; i++)
{
//for(auto k=A.ja_.at(i) ; k <= A.ja_.at(i+1)-1 ; k++ )
auto k=A.ja_.at(i)-1;
do
{
b.at(i) += A.aa_.at(k)* x.at(A.ja_.at(k)-1);
k++ ; for(auto i=0; i< A.dim ; i++)
{
//for(auto k=A.ja_.at(i) ; k <= A.ja_.at(i+1)-1 ; k++ )
auto k=A.ja_.at(i)-1;
do
{
b.at(i) += A.aa_.at(k)* x.at(A.ja_.at(k)-1);
k++ ;
}while (k < A.ja_.at(i+1)-1 ); // ;
}
return b;
}while (k < A.ja_.at(i+1)-1 ); // ;
}
return b;
# include <set>
# include <fstream>
template <typename data_type>
class MCSRmatrix {
public:
using itype = std::size_t ;
template <typename T>
friend std::vector<T> operator*(const MCSRmatrix<T>& A, const std::vector<T>& x ) noexcept ;
public:
constexpr MCSRmatrix( std::initializer_list<std::initializer_list<data_type>> rows);
private:
std::vector<data_type> aa_ ; // vector of value
std::vector<itype> ja_ ; // pointer vector
int dim ;
};
//constructor
template <typename T>
constexpr MCSRmatrix<T>::MCSRmatrix( std::initializer_list<std::initializer_list<T>> rows)
{
this->dim = rows.size();
auto _rows = *(rows.begin());
aa_.resize(dim+1);
ja_.resize(dim+1);
if(dim != _rows.size()) for(auto i=0; i< A.dim ; i++)
{
//for(auto k=A.ja_.at(i) ; k <= A.ja_.at(i+1)-1 ; k++ )
auto k=A.ja_.at(i)-1;
do
{
b.at(i) += A.aa_.at(k)* x.at(A.ja_.at(k)-1);
k++ ;
}while (k < A.ja_.at(i+1)-1 ); // ;
}
return b;
{
throw std::runtime_error("error matrix must be square");
}
itype w = 0 ;
ja_.at(w) = dim+2 ;
for(auto ii = rows.begin(), i=1; ii != rows.end() ; ++ii, i++)
{
for(auto ij = ii->begin(), j=1, elemCount = 0 ; ij != ii->end() ; ++ij, j++ )
{
if(i==j)
aa_[i-1] = *ij ;
else if( i != j && *ij != 0 )
{
ja_.push_back(j);
aa_.push_back(*ij);
elemCount++ ;
}
ja_[i] = ja_[i-1] + elemCount;
}
}
for(auto& x : aa_ )
std::cout << x << ' ' ;
std::cout << std::endl;
for(auto& x : ja_ )
std::cout << x << ' ' ;
std::cout << std::endl;
}
template <typename T>
std::vector<T> operator*(const MCSRmatrix<T>& A, const std::vector<T>& x ) noexcept
{
std::vector<T> b(A.dim);
for(auto i=0; i < A.dim ; i++ )
b.at(i) = A.aa_.at(i)* x.at(i) ;
for(auto i=0; i< A.dim ; i++)
{
for(auto k=A.ja_.at(i) ; k < A.ja_.at(i+1)-1 ; k++ )
{
b.at(i) += A.aa_.at(k)* x.at(A.ja_.at(k));
}
}
return b;
}
and finally the main
# include "ModCSRmatrix.H"
using namespace std;
int main(){
std::vector<double> v1={0,1.3,4.2,0.8};
MCSRmatrix<double> m1 = {{1.01, 0 , 2.34,0}, {0, 4.07, 0,0},{3.12,0,6.08,0},{1.06,0,2.2,9.9} };
std::vector<double> v2 = m1*v1 ;
for(auto& x : v2)
cout << x << ' ' ;
cout << endl;
}
but the result is different from the result obtain in octave !
I've correct the code and now compile ! it give me the result :
0 5.291 25.536 9.68
but the correct result obtained using octave is :
9.8280 5.2910 25.5360 17.1600
the strange thing is that the same code written in Fortran works!
MODULE MSR
IMPLICIT NONE
CONTAINS
subroutine amuxms (n, x, y, a,ja)
real*8 x(*), y(*), a(*)
integer n, ja(*)
integer i, k
do 10 i=1, n
y(i) = a(i)*x(i)
10 continue
do 100 i = 1,n
do 99 k=ja(i), ja(i+1)-1
y(i) = y(i) + a(k) *x(ja(k))
99 continue
100 continue
return
end
END MODULE
PROGRAM MSRtest
USE MSR
IMPLICIT NONE
INTEGER :: i
REAL(KIND(0.D0)), DIMENSION(4) :: y, x= (/0.,1.3,4.2,0.8/)
REAL(KIND(0.D0)), DIMENSION(9) :: AA = (/ 1.01, 4.07, 6.08, 9.9, 0., 2.34, 3.12, 1.06, 2.2/)
INTEGER , DIMENSION(9) :: JA = (/6, 7, 7, 8, 10, 3, 1, 1, 3/)
WRITE(6,FMT='(4F8.3)') (x(I), I=1,4)
CALL amuxms(4,x,y,aa,ja)
WRITE(6,FMT='(4F8.3)') (y(I), I=1,4)
END PROGRAM
in the above code the value of aa and ja is given by the c++ constructor putting this member
template <typename T>
inline auto constexpr MCSRmatrix<T>::printMCSR() const noexcept
{
for(auto& x : aa_ )
std::cout << x << ' ' ;
std::cout << std::endl;
for(auto& x : ja_ )
std::cout << x << ' ' ;
std::cout << std::endl;
}
and call it at the end of constructor! now I have added the lines of the member at the end of constructor so if you try the constructor you get exactly the same vector written in the fortran code
thanks I followed your advice #Paul H. and rewrite the operator + as follow:
(I didn't change the ja_ indexing because in my class I have a lot of already more or less un-bugged method )
template <typename T>
std::vector<T> operator*(const MCSRmatrix<T>& A, const std::vector<T>& x ) noexcept
{
std::vector<T> b(A.dim);
for(auto i=0; i < A.dim ; i++ )
b.at(i) = A.aa_.at(i)* x.at(i) ;
for(auto i=0; i< A.dim ; i++)
{
//for(auto k=A.ja_.at(i) ; k <= A.ja_.at(i+1)-1 ; k++ )
auto k=A.ja_.at(i)-1;
do
{
b.at(i) += A.aa_.at(k)* x.at(A.ja_.at(k)-1);
k++ ;
}while (k < A.ja_.at(i+1)-1 ); // ;
}
return b;
}
as You can see I have subtracts 1 from all ja_ using as indices :
x.at(A.ja_.at(k)-1) instead of x.at(A.ja_.at(k))
different start of index K k=A.ja_.at(i)-1
and different end of cicle (I've used a do while instead of for)
The debugger is your friend! For future reference, here is a link to a very good blog post on debugging small programs: How to debug small programs.
There are a couple of off by one mistakes in your code. If you create the 4 x 4 matrix used as an example in the reference you linked to, you will see that the ja_ values you calculate are all off by one. The reason your Fortran version works is because arrays in Fortran are by default indexed starting from 1, not 0. So in class MCSRmatrix change
ja_.at(w) = dim+2;
to
ja_.at(w) = dim+1;
and
ja_.push_back(j);
to
ja_.push_back(j-1);
Then in your operator* method change
for(auto k=A.ja_.at(i) ; k < A.ja_.at(i+1)-1 ; k++ )
to
for(auto k = A.ja_.at(i); k < A.ja_.at(i+1); k++)
I just don't know what to do with it...
The functions runs well in debug, but not in release.
I am trying to learn about artificial neural networks and C++ vectors.
Here is the code (in Python 2.7) that I'm writing in C++:
http://neuralnetworksanddeeplearning.com/chap1.html#exercise_852508
(just scroll a little to reach it)
I'm using MinGW 7.2.0 from MSYS2 (C++11).
There are some "teste" prints inside the backpropagation method, that is where the problem is comming from (I guess). I also overloaded operators +, - and * to make things easier.
I know that there are some libs like Armadillo that could make things easier, but I really wanna use this problem to learn better.
And here is the files:
neuralnetwork.h
(I made everything public to make things easier to look at)
#define MIN_NUMBER_TOLERANCE 1e-8
namespace nn
{
class neuralnetwork
{
//private:
public:
//total number of weights. useful to reserve memory
int numWeights;
//total number of biases. useful to reserve memory
int numBiases;
//total number of layers: 1 for input, n hidden layers and 1 for output
int numLayers;
//a vector to store the number of neurons in each layer: 0 index is about the input layer, last index is about the output layer
std::vector<int> sizes;
//stores all biases: num of neurons of layer 1 + ... + num of neurons of layer (numLayers - 1) (input layer has no bias)
std::vector<std::vector<double>> biases;
//stores all weights: (num of neurons of layer 1) x (num of neurons of layer ) + ... + ( num of neurons of layer (numLayers - 1) ) x ( num of neurons of layer (numLayers - 2) ) (input layer has no bias)
std::vector<std::vector<std::vector<double>>> weights;
//stores the output of each neuron of each layer
std::vector<std::vector<double>> layersOutput;
std::vector<std::vector<std::vector<double>>> derivativeWeights;
std::vector<std::vector<double>> derivativeBiases;
std::default_random_engine generator;
std::normal_distribution<double> distribution;
double randomNormalNumber(void);
double costDerivatives(const double&, const double&);
std::vector<double> costDerivatives(const std::vector<double> &, const std::vector<double> &);
void backPropagation(const std::vector<double>& neuralNetworkInputs, const std::vector<double>& expectedOutputs, // inputs
std::vector<std::vector<std::vector<double>>>& derivativeWeights, std::vector<std::vector<double>>& derivativeBiases); // outputs
void update_mini_batch( const std::vector<std::pair<std::vector<double>,std::vector<double>>> & mini_batch, double eta);
//public:
neuralnetwork(const std::vector<int>& sizes);
std::vector<double> feedforward(const std::vector<double>&);
};
std::vector<double> sigmoid(const std::vector<double> &);
double sigmoid(double);
std::vector<double> sigmoid_prime(const std::vector<double> &);
//double sigmoid_prime(double);
}
neuralnetwork.cpp
#include "neuralnetwork.h"
#include <iostream>
#include <assert.h>
#include <algorithm>
namespace nn
{
int counter = 0;
neuralnetwork::neuralnetwork(const std::vector<int> &sizes)
{
this->distribution = std::normal_distribution<double>( 0.0 , 1.0 );
this->numLayers = sizes.size();
this->sizes = sizes;
this->numWeights = 0;
this->numBiases = 0;
for ( int i = 1 ; i < this->numLayers ; i++ )
{
numWeights += this->sizes[ i ] * this->sizes[ i - 1 ];
numBiases += this->sizes[ i ];
}
this->weights.reserve( numWeights );
this->biases.reserve( numBiases );
this->derivativeWeights.reserve( numWeights );
this->derivativeBiases.reserve( numBiases );
this->layersOutput.reserve( this->sizes[ 0 ] + numBiases );
std::vector<double> auxVectorWeights;
std::vector<std::vector<double> > auxMatrixWeights;
std::vector<double> auxVectorBiases;
#ifdef DEBUG_BUILD
std::cout << "debugging!\n";
#endif
//just to accommodate the input layer with null biases and inputs (makes things easier to iterate and reading :D).
this->layersOutput.push_back( std::vector<double>( this->sizes[ 0 ] ) );
std::vector<std::vector<double>> matrixNothing( 0 );
this->weights.push_back( matrixNothing );
this->biases.push_back( std::vector<double>( 0 ) );
//since the second layer (index 1) because there is no weights (nor biases) for the neurons of the first layer
for ( int layer = 1 ; layer < this->numLayers ; layer++ )
{
//preallocate memory for the output of each layer.
layersOutput.push_back( std::vector<double>( this->sizes[ layer ] ) );
//-----------weights begin--------------
//auxMatrixWeights will store the weights connections between one layer (number of columns) and its subsequent layer (number of rows)
//auxMatrixWeights = new std::vector(this->sizes[layer], std::vector<double>( this->sizes[layer - 1] )); // it is not working...
//size[layer] stores the number of neurons on the layer
for ( int i = 0 ; i < this->sizes[ layer ] ; i++ )
{
//auxVectorWeights will have the size of the amount of wights necessary to connect the neuron i (from this layer) to neuron j (from next layer)
auxVectorWeights = std::vector<double>( this->sizes[ layer - 1 ] );
for ( int j = 0 ; j < auxVectorWeights.size() ; j++ )
{
auxVectorWeights[ j ] = this->randomNormalNumber();
}
auxMatrixWeights.push_back( auxVectorWeights );
}
this->weights.push_back( auxMatrixWeights );
auxMatrixWeights.clear();
//-----------weights end----------------
//-----------biases begin---------------
auxVectorBiases = std::vector<double>( this->sizes[ layer ] );
for ( int i = 0 ; i < auxVectorBiases.size() ; i++ )
{
auxVectorBiases[ i ] = this->randomNormalNumber();
}
this->biases.push_back( auxVectorBiases );
//-----------biases end-----------------
}
#ifdef _DEBUG
for ( int i = 0 ; i < this->weights.size() ; i++ )
{
std::cout << "layer " << i << "\n";
for ( int j = 0 ; j < this->weights[ i ].size() ; j++ )
{
std::cout << "neuron" << j << std::endl;
for ( const auto k : this->weights[ i ][ j ] )
{
std::cout << '\t' << k << ' ';
}
std::cout << std::endl;
}
}
#endif
}
template <class T>
inline int lastIndex(std::vector<T> vector , int tail)
{
return (vector.size() - tail);
}
double neuralnetwork::randomNormalNumber(void)
{
return this->distribution( this->generator );
}
double sigmoid(double z)
{
return 1.0 / ( 1.0 + exp( -z ) );
}
std::vector<double> sigmoid(const std::vector<double> & z)
{
int max = z.size();
std::vector<double> output;
output.reserve(max);
for(int i=0;i<max;i++)
{
output.push_back(0);
output[i] = 1.0 / ( 1.0 + exp( -z[i] ) );
}
return output;
}
/*double sigmoid_prime(double z)
{
return sigmoid( z ) * ( 1 - sigmoid( z ) );
}*/
std::vector<double> sigmoid_prime(const std::vector<double>& z)
{
int max = z.size();
std::vector<double> output;
output.reserve(max);
for(int i=0;i<max;i++)
{
output.push_back(sigmoid( z[i] ) * ( 1 - sigmoid( z[i] ) ) );
}
return output;
}
//scalar times vector
std::vector<double> operator* (double a , const std::vector<double> & b)
{
int size = b.size();
std::vector<double> result(size);
for ( int i = 0 ; i < size ; i++ )
{
result[i] = a * b[ i ];
}
return result;
}
// inner product
std::vector<double> operator* (const std::vector<double> & a , const std::vector<double> & b)
{
#ifdef _DEBUG
assert(a.size() == b.size());
#endif
int size = a.size(); // or b.size(). they should have the same size.
std::vector<double> result;
result.reserve(size); // or b.size(). they should have the same size.
for ( int i = 0 ; i < size ; i++ )
{
result.push_back( a[ i ] * b[ i ] );
}
return result;
}
//matrix times columns vector
std::vector<double> operator* (const std::vector<std::vector<double>> & a , const std::vector<double> & b)
{
#ifdef _DEBUG
assert(a[0].size() == b.size());
for(int i = 0 ; i < ( lastIndex( a , 1 )) ; i++)
{
assert(a[i].size() == a[i+1].size());
}
#endif
int lines = a.size();
int columns = a[0].size();
std::vector<double> result;
result.reserve(lines);
int j = 0;
for ( int i = 0 ; i < lines ; i++ )
{
result.push_back(0);
for(j = 0 ; j < columns ; j++)
{
result[i] += a[ i ][ j ] * b[ j ];
}
}
return result;
}
//scalar times matrix (calls scalar times vector)
std::vector<std::vector<double>> operator* (double a , const std::vector<std::vector<double>> & b)
{
#ifdef _DEBUG
for(int i = 0 ; i < b.size()-1 ; i++)
{
assert(b[i].size() == b[i+1].size());
}
#endif
int lines = b.size();
int columns = b[0].size();
std::vector<std::vector<double>> result;
int j = 0;
for ( int i = 0 ; i < lines ; i++ )
{
result.push_back(a * b[ j ]);
}
return result;
}
std::vector<double> operator+(const std::vector<double>& a, const std::vector<double>& b)
{
assert(a.size() == b.size());
int size = a.size();
std::vector<double> result;
result.reserve(size);
for(int i = 0 ; i < size ; i++)
{
result.push_back(0);
result[i] = a[i] + b[i];
}
return result;
}
//sum of matrices
std::vector<std::vector<double>> operator+(const std::vector<std::vector<double>>& a, const std::vector<std::vector<double>>& b)
{
#ifdef _DEBUG
assert(a.size() == b.size());
#endif
int size = a.size();
#ifdef _DEBUG
for(int i = 0 ; i < size ; i++)
{
assert(a[i].size() == b[i].size());
}
#endif
std::vector<std::vector<double>> result;
result.resize(size);
for(int i = 0 ; i < size ; i++)
{
result.push_back(a[i] + b[i]);
}
return result;
}
//subtraction of vectors
std::vector<double> operator-(const std::vector<double>& a, const std::vector<double>& b)
{
#ifdef _DEBUG
assert(a.size() == b.size());
#endif
int size = a.size();
std::vector<double> result;
result.resize(size);
for(int i = 0 ; i < size ; i++)
{
result[i] = a[i] - b[i];
}
return result;
}
//subtraction of matrices (calls subtraction of vectors)
std::vector<std::vector<double>> operator-(const std::vector<std::vector<double>>& a, const std::vector<std::vector<double>>& b)
{
#ifdef _DEBUG
assert(a.size() == b.size());
#endif
int size = a.size();
#ifdef _DEBUG
for(int i = 0 ; i < size ; i++)
{
assert(a[i].size() == b[i].size());
}
#endif
std::vector<std::vector<double>> result;
result.resize(size);
for(int i = 0 ; i < size ; i++)
{
result.push_back(a[i] - b[i]);
}
return result;
}
//elementwise division
std::vector<double> operator/(const std::vector<double>& a, const std::vector<double>& b)
{
assert(a.size() == b.size());
int size = a.size();
std::vector<double> result;
result.reserve(size);
for(int i = 0 ; i < size ; i++)
{
if(b[i] < MIN_NUMBER_TOLERANCE)
{
throw std::runtime_error("Can't divide by zero!");
}
result[i] = a[i] / b[i];
}
return result;
}
double neuralnetwork::costDerivatives(const double &networkOutput , const double &expectedOutput)
{
return expectedOutput - networkOutput;
}
std::vector<double> neuralnetwork::costDerivatives(const std::vector<double> &networkOutput , const std::vector<double> &expectedOutput)
{
assert(expectedOutput.size() == networkOutput.size());
int size = networkOutput.size();
std::vector<double> output;
output.reserve(size);
for(int i = 0 ; i < size ; i++)
{
output.push_back(networkOutput[i] - expectedOutput[i]);
}
return output;
}
void neuralnetwork::backPropagation(const std::vector<double> &neuralNetworkInputs , const std::vector<double> &expectedOutputs, // inputs
std::vector<std::vector<std::vector<double>>>& derivativeWeights , std::vector<std::vector<double>>& derivativeBiases) // outputs
{
std::cout << "teste "<< counter++ << std::endl;
system("PAUSE");
derivativeWeights.reserve( sizes.size() - 1 );
derivativeBiases.reserve( sizes.size() - 1 );
//to store one activation layer
std::vector<double> activation = neuralNetworkInputs;
//to store each one of the activation layers
std::vector<std::vector<double>> activations;
activations.reserve(sizes.size()); // numBiases is the same as the number of neurons (except 1st layer)
activations.push_back(activation);
int maxLayerSize = 0;
std::cout << "teste "<< counter++ << std::endl;
system("PAUSE");
for ( int i = 1 ; i < numBiases ; i++ )
{
maxLayerSize = std::max(sizes[i], maxLayerSize);
}
std::cout << "teste "<< counter++ << std::endl;
system("PAUSE");
// to store one weighted sum
std::vector<double> z;
z.reserve(maxLayerSize);
// to store each one of the weighted sums
std::vector<std::vector<double>> zs;
zs.reserve(sizes.size());
// layer and neuron counter
int layer, neuron;
for ( layer = 1 ; layer < numLayers ; layer++ )
{
z = (weights[layer] * activation) + biases[layer];
zs.push_back(z);
activation = sigmoid(z);
activations.push_back(activation);
}
std::cout << "teste "<< counter++ << std::endl;
system("PAUSE");
std::vector<double> delta = costDerivatives(activations[ lastIndex( activations , 1 )] , expectedOutputs) * sigmoid_prime(z);
delta.reserve(maxLayerSize);
derivativeBiases.push_back(delta);
int j;
std::vector<std::vector<double>> dummyMatrix;
dummyMatrix.reserve(maxLayerSize);
for (neuron = 0; neuron < sizes[ lastIndex( sizes , 1 )]; neuron++)
{
dummyMatrix.push_back(std::vector<double>(activations[ lastIndex( activations , 2 )].size()));
for (j = 0; j < activations[ lastIndex( activations , 2 )].size(); j++)
{
dummyMatrix[neuron][j] = delta[neuron] * activations[ lastIndex( activations , 2 )][j];
}
}
std::cout << "teste "<< counter++ << std::endl;
system("PAUSE");
derivativeWeights.push_back(dummyMatrix);
dummyMatrix.clear();
std::vector<double> sp;
sp.reserve(maxLayerSize);
std::vector<double> dummyVector;
dummyVector.reserve(maxLayerSize);
double dummyDouble = 0;
for(layer = 2 ; layer < numLayers ; layer++)
{
z = zs[ lastIndex( zs , layer )];
sp = sigmoid_prime(z);
for(j = 0 ; j < sizes[ lastIndex( weights , layer )] ; j++)
{
for (neuron = 0; neuron < sizes[ lastIndex( sizes , layer - 1 )]; neuron++)
{
dummyDouble += weights[ lastIndex( weights , layer - 1 )][neuron][j] * delta[neuron];
}
dummyVector.push_back(dummyDouble * sp[j]);
dummyDouble = 0;
}
delta = dummyVector;
dummyVector.clear();
derivativeBiases.push_back(delta);
for (neuron = 0; neuron < sizes[ lastIndex( sizes , layer )]; neuron++)
{
dummyMatrix.push_back(std::vector<double>(sizes[ lastIndex( sizes , layer + 1 )]));
for (j = 0; j < sizes[ lastIndex( sizes , layer + 1 )]; j++)
{
dummyMatrix[neuron][j] = activations[ lastIndex( activations , layer + 1 )][j] * delta[neuron];
}
}
derivativeWeights.push_back(dummyMatrix);
dummyMatrix.clear();
}
std::cout << "teste "<< counter++ << std::endl;
system("PAUSE");
//both derivativeWeights and derivativeBiases are reversed. so let's reverse it.
std::reverse(derivativeWeights.begin(),derivativeWeights.end());
std::reverse(derivativeBiases.begin(),derivativeBiases.end());
std::cout << "teste "<< counter++ << std::endl;
system("PAUSE");
}
}
main.cpp
#include <stdio.h>
#include <opencv2/opencv.hpp>
#include "neuralnetwork.h"
#include <string>
void printAll(const std::vector<double> & v, const std::string & name)
{
int size = v.size();
std::cout << "\t" << name << ":\t";
for(int i = 0 ; i < size ; i++)
{
std::cout << v[i] << "\t";
}
std::cout << std::endl;
}
template<class T>
void printAll(const std::vector<T> & v, const std::string & name)
{
int size = v.size();
std::cout << name << ":" << std::endl;
for(int i = 0 ; i < size ; i++)
{
printAll(v[i], "\t" + ("[" + std::to_string(i)) + "]");
}
}
int main(int argc, char** argv )
{
nn::neuralnetwork n({2,4,3});
n.weights = {{},{{1,2},{3,4},{5,6},{7,8}} , {{9,8,7,6},{5,4,3,2},{1,2,3,4}}};
n.biases = {{},{1, 4, 6, 8} , {9, 2, 4}};
printAll(n.weights,"weights");
printAll(n.biases,"biases");
std::vector<std::vector<std::vector<double>>> derivativeWeights;
std::vector<std::vector<double>> derivativeBiases;
n.backPropagation({1,2},{1,2,3},derivativeWeights,derivativeBiases);
printAll(n.derivativeWeights,"derivativeWeights");
printAll(n.derivativeBiases,"derivativeBiases");
system("PAUSE");
return 0;
}
It looks like your problem is that you are only reserving memory for the vectors in the constructor, not allocating it.
The reserve method does not resize the vector, it is a performance optimization in cases where you know you will resize the vector in the future, but an optimizing compiler is free to ignore it.
This isn't causing a problem for 'weights' and 'biases' in this particular code becuase you are initializing them with vectors of the proper size, which does set them to the correct size. The problems are with derivativeWeights and derivativeBiases, where you reserve memory for the vectors, but they are never actually resized. This makes this memory potentially invalid if you try to dereference it. You could use resize instead of reserve, or push back the elements one by one, which will also resize the vector.
Another comment is that you don't have to use this-> for every member of the class, the 'this->' is assumed for members of the class if you don't use it.
I didn't found the problem, but I realised that, for this problem (artificial neural network), I can initialize each property of the class with each right sizes without loss of generality. So this is what I'm going to do.
I feel a little ashamed that I am not finding it... :/
I am writing a program to find out "Strongly Connected Component" of a given graph.
For that I am using following algorithm.
1) Apply DFS and push the nodes in decreasing order of their Finishing time in stack.
2) Transpose the graph. ( reverse the edges of the graph)
3) Pop the node and apply DFS again.
For this if I accept the graph edges by function addEdge(int u, int v) (edge from node u to node v).
then program works correctly.
Question : But when I try to accept graph through input file, it is giving me error.
I am not giving whole program as it is working correctly.
class Graph
{
int V;
list<int> *adj;
void fillorder( int v , bool visited[] , stack<int> &stack);
void DFSUtil(int v , bool visited[]);
public:
Graph( int V);
void printSCCs();
Graph get_Transpose();
void print_graph();
void read_input_file();
};
Graph:: Graph(int V)
{
this-> V = V;
adj = new list<int>[V];
}
void Graph :: read_input_file()
{
const long int N = 100;
std::ifstream infile("S.txt");
std::string line;
int i = 0;
while ( i < N && getline(infile, line) )
{
std::istringstream str(line);
int u;
str >> u;
if ( u > N )
{
// Problem.
abort();
}
int v;
while ( str >> v )
{
adj[u-1].push_back(v);
}
++i;
}
}
void Graph :: print_graph()
{
for( int i = 0 ; i < 9 ; i++ )
{
list<int> :: iterator itr;
for( itr = adj[i].begin(); itr != adj[i].end(); ++itr )
{
cout<< i+1 <<" --> " << *itr <<endl;
}
}
}
void Graph:: printSCCs()
{
stack<int>Stack;
bool *visited = new bool[V];
for( int i = 0 ; i < V ; i++ )
visited[i] = false;
for( int i = 0 ; i < V ; i++ )
if(visited[i] == false)
fillorder(i , visited , Stack);
Graph gr = get_Transpose();
for( int i = 0 ; i < V ; i++ )
visited[i] = false;
while( Stack.empty() == false)
{
int no = Stack.top();
Stack.pop();
if(visited[no] == false)
{
gr.DFSUtil(no , visited);
cout<< endl;
}
}
}
int main()
{
Graph g(9);
g.read_file(); // Working perfectly
g.print_graph(); // showing graph perfectly
g.printSCCs(); // Giving error : Segmentation fault: core dump
return 0;
}
Graph is getting printed correctly but g.printSCCs() is not working.
Error is : Segmentation fault: core dump
Now if g.print_graph() is working perfectly then why not g.printSCCs() ?
NOTE : I have not pasted whole program just to insure that it will be readable.
Moreover , by manually entering edges , whole program works perfectly , so I think there will not be error in any function.
I/P file data :
1 4
2 8
3 6
4 7
5 2
6 9
7 1
8 5
8 6
9 7
9 3
Suppose I have the following vector:
The vector is a vector of pairs, and we are comparing based on the first element.
[(1,0),(0,1),(3,2),(6,3),(2,4),(4,5),(7,6),(5,7)]
I want to erase all elements in a specific range except the largest.
For example, if the range was $l = 2$ and $r = 5$, then the output:
[(1,0),(0,1),(6,3),(7,6),(5,7)]
Now if we do this again to the output array for $l = 1$, $r = 4$, then the output:
[(1,0),(7,6)]
I found this which I thought would be useful here, but I don't know how to make it work for pairs.
Here is my attempt:
int main(int argc, char const *argv[]) {
int N;
cin >> N;
vector< pair<int,int> > vector_of_pairs(N);
for (int i = 0; i < N; i++) {
int input;
cin >> input;
vector_of_pairs[i] = make_pair(input, i);
}
int l, r;
cin >> l >> r;
int max_in_range = vector_of_pairs[l].first;
for (int i = l+1; i <= r; i++) {
if (vector_of_pairs[i].first > max_in_range) {
max_in_range = vector_of_pairs[i].first;
}
}
for (int i = l; i <= r; i++) {
if (vector_of_pairs[i].first != max_in_range) {
vector_of_pairs.erase(vector_of_pairs.begin() + i);
}
}
printf("[");
for(int i = 0; i < vector_of_pairs.size(); i++) {
printf("(%d,%d)", vector_of_pairs[i].first, vector_of_pairs[i].second);
}
printf("]\n");
}
For the following input:
8
1 0 5 6 2 3 7 4
1 3
This is the output:
[(1,0)(5,2)(6,3)(3,5)(7,6)(4,7)]
But it should be
[(1,0)(6,3)(3,5)(7,6)(4,7)]
Also, for certain inputs I get seg faults so how can I safe guard against that?
Erase remove idiom at the rescue:
auto begin = v.begin() + l;
auto end = v.begin() + r + 1;
auto max_value = *std::max_element(begin, end);
v.erase(std::remove_if(begin, end,
[&](const auto& p) {return p.first != max_value.first; }),
end);
Demo
Probabaly you want this
#include <iostream>
#include <vector>
using namespace std;
int main(int argc, char const *argv[]) {
int N;
cin >> N;
vector< pair<int,int> > vector_of_pairs(N);
for (int i = 0; i < N; i++) {
int input;
cin >> input;
vector_of_pairs[i] = make_pair(input, i);
}
int l, r;
cin >> l >> r;
int max_in_range = vector_of_pairs[l].first;
for (int i = l+1; i <= r; i++) {
if (vector_of_pairs[i].first > max_in_range) {
max_in_range = vector_of_pairs[i].first;
}
}
int p=l;
for (int i = l; i <= r;i++ ) {
if (vector_of_pairs[p].first != max_in_range) {
vector_of_pairs.erase(vector_of_pairs.begin()+p);
}
else p++;
}
printf("[");
for(int i = 0; i < vector_of_pairs.size(); i++) {
printf("(%d,%d)", vector_of_pairs[i].first, vector_of_pairs[i].second);
}
printf("]\n");
}
Getting correct output :
[(1,0)(6,3)(2,4)(3,5)(7,6)(4,7)]
Explanation: When you delete an item in vector, items present after the deleted element get decreased their index by 1. Hence in the ith loop from l to r you shouldn't delete vec.begin()+i element, instead delete vec.begin()+l item until you find largest element, and delete vec.begin()+l+1 element after you find largest element.
Hope it helps.
If you don't need a stable algorithm you can simply sort the vector, get iterator for l in begin - end, get iterator for r in l - end and then remove all but the very last element between both found iterators.
#include <algorithm>
template<class V, class T>
void remove_range_but_max(V& vop, T const& l, T const& r)
{
std::sort(vop.begin(), vop.end());
auto lo = std::lower_bound(vop.begin(), vop.end(), l);
auto hi = std::lower_bound(lo, vop.end(), r);
if(hi != lo) --hi;
vop.erase(lo, hi);
}
can be used like:
std::vector< std::pair< int , int > > input {
std::make_pair( 1 , 0 ) , std::make_pair( 0 , 1 ) ,
std::make_pair( 3 , 2 ) , std::make_pair( 6 , 3 ) ,
std::make_pair( 2 , 4 ) , std::make_pair( 4 , 5 ) ,
std::make_pair( 7 , 6 ) , std::make_pair( 5 , 7 ) };
/* remove all pairs in range [(2,0), (6,0)) */
remove_range_but_max(input, std::make_pair(2,0), std::make_pair(6,0));
for(auto&& p : input) std::cout << p.first << ", " << p.second << "\n";
Prints:
0, 1
1, 0
5, 7
6, 3
7, 6
Quick and sleazy way:
std::sort(vector_of_pairs.begin(), vector_of_pairs.end());
vector_of_pairs.resize(1);
Just be sure to supply sort with the appropriate function to make the largest element get pushed to the first of the list.
Let me spell it out for you:
#include <iostream>
#include <vector>
#include <algorithm>
using namespace std;
bool i_said_to_do_this(const pair<int, int> &lhs, const pair<int, int> &rhs) {
return lhs.first > rhs.first;
}
int main(int argc, char const *argv[]) {
int pairs[][2] = {{1,0},{0,1},{3,2},{6,3},{2,4},{4,5},{7,6},{5,7}};
vector< pair<int,int> > vector_of_pairs(8);
for (int i{0}; i < 8; i++) {
vector_of_pairs[i] = make_pair(pairs[i][0], pairs[i][1]);
}
std::sort(vector_of_pairs.begin(), vector_of_pairs.end(), i_said_to_do_this);
vector_of_pairs.erase(vector_of_pairs.begin() + 1, vector_of_pairs.end());
cout << "(" << vector_of_pairs[0].first << ", " << vector_of_pairs[0].second << ")" << endl;
}
I'm practicing ACM problems to become a better programmer, but I'm still fairly new to c++ and I'm having trouble interpreting some of the judges code I'm reading. The beginning of a class starts with
public:
State(int n) : _n(n), _p(2*n+1)
{
and then later it's initialized with
State s(n);
s(0,0) = 1;
I'm trying to read the code but I can't make sense of that. The State class only seems to have 1 argument passed, but the programmer is passing 2 in his initialization. Also, what exactly is being set = to 1? As far as I can tell, the = operator isn't being overloaded but just in case I missed something I've included the full code below.
Any help would be greatly appreciated.
Thanks in advance
/*
* D - Maximum Random Walk solution
* ICPC 2012 Greater NY Regional
* Solution by Adam Florence
* Problem by Adam Florence
*/
#include <cstdio> // for printf
#include <cstdlib> // for exit
#include <algorithm> // for max
#include <iostream>
#include <vector>
using namespace std;
class State
{
public:
State(int n) : _n(n), _p(2*n+1)
{
if (n < 1)
{
cout << "Ctor error, n = " << n << endl;
exit(1);
}
for (int i = -n; i <= n; ++i)
_p.at(i+_n) = vector<double>(n+1, 0.0);
}
void zero(const int n)
{
for (int i = -n; i < n; ++i)
for (int m = 0; m <= n; ++m)
_p[i+_n][m] = 0;
}
double operator()(int i, int m) const
{
#ifdef DEBUG
if ((i < -_n) || (i > _n))
{
cout << "Out of range error, i = " << i << ", n = " << _n << endl;
exit(1);
}
if ((m < 0) || (m > _n))
{
cout << "Out of range error, m = " << m << ", n = " << _n << endl;
exit(1);
}
#endif
return _p[i+_n][m];
}
double& operator()(int i, int m)
{
#ifdef DEBUG
if ((i < -_n) || (i > _n))
{
cout << "Out of range error, i = " << i << ", n = " << _n << endl;
exit(1);
}
if ((m < 0) || (m > _n))
{
cout << "Out of range error, m = " << m << ", n = " << _n << endl;
exit(1);
}
#endif
return _p[i+_n][m];
}
static int min(int x, int y)
{
return(x < y ? x : y);
}
static int max(int x, int y)
{
return(x > y ? x : y);
}
private:
int _n;
// First index is the current position, from -n to n.
// Second index is the maximum position so far, from 0 to n.
// Value is probability.
vector< vector<double> > _p;
};
void go(int ds)
{
// Read n, l, r
int n, nds;
double l, r;
cin >> nds >> n >> l >> r;
const double c = 1 - l - r;
if(nds != ds){
cout << "Dataset number " << nds << " does not match " << ds << endl;
return;
}
// Initialize state, probability 1 at (0,0)
State s(n);
s(0,0) = 1;
State t(n);
State* p1 = &s;
State* p2 = &t;
for (int k = 1; k <= n; ++k)
{
// Compute probabilities at step k
p2->zero(k);
// At step k, the farthest from the origin you can be is k
for (int i = -k; i <= k; ++i)
{
const int mm = State::min( State::max(0, i+k), k);
for (int m = 0; m <= mm; ++m)
{
// At step k-1, p = probability of (i,m)
const double p = p1->operator()(i,m);
if (p > 0)
{
// Step left
p2->operator()(i-1, m) += p*l;
// Step right
p2->operator()(i+1, State::max(i+1,m)) += p*r;
// Stay put
p2->operator()(i, m) += p*c;
}
}
}
swap(p1, p2);
}
// Compute expected maximum position
double p = 0;
for (int i = -n; i <= n; ++i)
for (int m = 0; m <= n; ++m)
p += m * p1->operator()(i,m);
printf("%d %0.4f\n", ds, p);
}
int main(int argc, char* argv[])
{
// Read number of data sets to process
int num;
cin >> num;
// Process each data set identically
for (int i = 1; i <= num; ++i)
go(i);
// We're done
return 0;
}
You are confusing a call to state::operator()(int, int) with an initialization. That operator call lets you set the value of an element of the class instance.
State s(n); // this is the only initialization
s(0,0) = 1; // this calls operator()(int, int) on instance s
In this line:
s(0,0) = 1;
it's calling this:
double& operator()(int i, int m)
and because it returns a reference to a double, you can assign to it.
The second line is no longer initialization. The constructor was invoked in line 1, the second line invokes
double& operator()(int i, int m)
with n=0 and m=0 and writing 1 to the reference that is returned.
This part:
State(int n) : _n(n), _p(2*n+1)
...is a member initializer list. It's sort of similar to if you'd written the construct like:
state(int n) { _n = n; _p = 2*n+1; }
...except that it initializes _n and _p instead of starting with them unitialized, then assigning values to them. In this specific case that may not make much difference, but when you have things like references that can only be initialized (not assigned) it becomes crucial.
The s(0,0) = 1 looks like s is intended to act a little like a 2D array, and they've overloaded operator() to act as a subscripting operator for that array. I posted a class that does that in a previous answer.