I just don't know what to do with it...
The functions runs well in debug, but not in release.
I am trying to learn about artificial neural networks and C++ vectors.
Here is the code (in Python 2.7) that I'm writing in C++:
http://neuralnetworksanddeeplearning.com/chap1.html#exercise_852508
(just scroll a little to reach it)
I'm using MinGW 7.2.0 from MSYS2 (C++11).
There are some "teste" prints inside the backpropagation method, that is where the problem is comming from (I guess). I also overloaded operators +, - and * to make things easier.
I know that there are some libs like Armadillo that could make things easier, but I really wanna use this problem to learn better.
And here is the files:
neuralnetwork.h
(I made everything public to make things easier to look at)
#define MIN_NUMBER_TOLERANCE 1e-8
namespace nn
{
class neuralnetwork
{
//private:
public:
//total number of weights. useful to reserve memory
int numWeights;
//total number of biases. useful to reserve memory
int numBiases;
//total number of layers: 1 for input, n hidden layers and 1 for output
int numLayers;
//a vector to store the number of neurons in each layer: 0 index is about the input layer, last index is about the output layer
std::vector<int> sizes;
//stores all biases: num of neurons of layer 1 + ... + num of neurons of layer (numLayers - 1) (input layer has no bias)
std::vector<std::vector<double>> biases;
//stores all weights: (num of neurons of layer 1) x (num of neurons of layer ) + ... + ( num of neurons of layer (numLayers - 1) ) x ( num of neurons of layer (numLayers - 2) ) (input layer has no bias)
std::vector<std::vector<std::vector<double>>> weights;
//stores the output of each neuron of each layer
std::vector<std::vector<double>> layersOutput;
std::vector<std::vector<std::vector<double>>> derivativeWeights;
std::vector<std::vector<double>> derivativeBiases;
std::default_random_engine generator;
std::normal_distribution<double> distribution;
double randomNormalNumber(void);
double costDerivatives(const double&, const double&);
std::vector<double> costDerivatives(const std::vector<double> &, const std::vector<double> &);
void backPropagation(const std::vector<double>& neuralNetworkInputs, const std::vector<double>& expectedOutputs, // inputs
std::vector<std::vector<std::vector<double>>>& derivativeWeights, std::vector<std::vector<double>>& derivativeBiases); // outputs
void update_mini_batch( const std::vector<std::pair<std::vector<double>,std::vector<double>>> & mini_batch, double eta);
//public:
neuralnetwork(const std::vector<int>& sizes);
std::vector<double> feedforward(const std::vector<double>&);
};
std::vector<double> sigmoid(const std::vector<double> &);
double sigmoid(double);
std::vector<double> sigmoid_prime(const std::vector<double> &);
//double sigmoid_prime(double);
}
neuralnetwork.cpp
#include "neuralnetwork.h"
#include <iostream>
#include <assert.h>
#include <algorithm>
namespace nn
{
int counter = 0;
neuralnetwork::neuralnetwork(const std::vector<int> &sizes)
{
this->distribution = std::normal_distribution<double>( 0.0 , 1.0 );
this->numLayers = sizes.size();
this->sizes = sizes;
this->numWeights = 0;
this->numBiases = 0;
for ( int i = 1 ; i < this->numLayers ; i++ )
{
numWeights += this->sizes[ i ] * this->sizes[ i - 1 ];
numBiases += this->sizes[ i ];
}
this->weights.reserve( numWeights );
this->biases.reserve( numBiases );
this->derivativeWeights.reserve( numWeights );
this->derivativeBiases.reserve( numBiases );
this->layersOutput.reserve( this->sizes[ 0 ] + numBiases );
std::vector<double> auxVectorWeights;
std::vector<std::vector<double> > auxMatrixWeights;
std::vector<double> auxVectorBiases;
#ifdef DEBUG_BUILD
std::cout << "debugging!\n";
#endif
//just to accommodate the input layer with null biases and inputs (makes things easier to iterate and reading :D).
this->layersOutput.push_back( std::vector<double>( this->sizes[ 0 ] ) );
std::vector<std::vector<double>> matrixNothing( 0 );
this->weights.push_back( matrixNothing );
this->biases.push_back( std::vector<double>( 0 ) );
//since the second layer (index 1) because there is no weights (nor biases) for the neurons of the first layer
for ( int layer = 1 ; layer < this->numLayers ; layer++ )
{
//preallocate memory for the output of each layer.
layersOutput.push_back( std::vector<double>( this->sizes[ layer ] ) );
//-----------weights begin--------------
//auxMatrixWeights will store the weights connections between one layer (number of columns) and its subsequent layer (number of rows)
//auxMatrixWeights = new std::vector(this->sizes[layer], std::vector<double>( this->sizes[layer - 1] )); // it is not working...
//size[layer] stores the number of neurons on the layer
for ( int i = 0 ; i < this->sizes[ layer ] ; i++ )
{
//auxVectorWeights will have the size of the amount of wights necessary to connect the neuron i (from this layer) to neuron j (from next layer)
auxVectorWeights = std::vector<double>( this->sizes[ layer - 1 ] );
for ( int j = 0 ; j < auxVectorWeights.size() ; j++ )
{
auxVectorWeights[ j ] = this->randomNormalNumber();
}
auxMatrixWeights.push_back( auxVectorWeights );
}
this->weights.push_back( auxMatrixWeights );
auxMatrixWeights.clear();
//-----------weights end----------------
//-----------biases begin---------------
auxVectorBiases = std::vector<double>( this->sizes[ layer ] );
for ( int i = 0 ; i < auxVectorBiases.size() ; i++ )
{
auxVectorBiases[ i ] = this->randomNormalNumber();
}
this->biases.push_back( auxVectorBiases );
//-----------biases end-----------------
}
#ifdef _DEBUG
for ( int i = 0 ; i < this->weights.size() ; i++ )
{
std::cout << "layer " << i << "\n";
for ( int j = 0 ; j < this->weights[ i ].size() ; j++ )
{
std::cout << "neuron" << j << std::endl;
for ( const auto k : this->weights[ i ][ j ] )
{
std::cout << '\t' << k << ' ';
}
std::cout << std::endl;
}
}
#endif
}
template <class T>
inline int lastIndex(std::vector<T> vector , int tail)
{
return (vector.size() - tail);
}
double neuralnetwork::randomNormalNumber(void)
{
return this->distribution( this->generator );
}
double sigmoid(double z)
{
return 1.0 / ( 1.0 + exp( -z ) );
}
std::vector<double> sigmoid(const std::vector<double> & z)
{
int max = z.size();
std::vector<double> output;
output.reserve(max);
for(int i=0;i<max;i++)
{
output.push_back(0);
output[i] = 1.0 / ( 1.0 + exp( -z[i] ) );
}
return output;
}
/*double sigmoid_prime(double z)
{
return sigmoid( z ) * ( 1 - sigmoid( z ) );
}*/
std::vector<double> sigmoid_prime(const std::vector<double>& z)
{
int max = z.size();
std::vector<double> output;
output.reserve(max);
for(int i=0;i<max;i++)
{
output.push_back(sigmoid( z[i] ) * ( 1 - sigmoid( z[i] ) ) );
}
return output;
}
//scalar times vector
std::vector<double> operator* (double a , const std::vector<double> & b)
{
int size = b.size();
std::vector<double> result(size);
for ( int i = 0 ; i < size ; i++ )
{
result[i] = a * b[ i ];
}
return result;
}
// inner product
std::vector<double> operator* (const std::vector<double> & a , const std::vector<double> & b)
{
#ifdef _DEBUG
assert(a.size() == b.size());
#endif
int size = a.size(); // or b.size(). they should have the same size.
std::vector<double> result;
result.reserve(size); // or b.size(). they should have the same size.
for ( int i = 0 ; i < size ; i++ )
{
result.push_back( a[ i ] * b[ i ] );
}
return result;
}
//matrix times columns vector
std::vector<double> operator* (const std::vector<std::vector<double>> & a , const std::vector<double> & b)
{
#ifdef _DEBUG
assert(a[0].size() == b.size());
for(int i = 0 ; i < ( lastIndex( a , 1 )) ; i++)
{
assert(a[i].size() == a[i+1].size());
}
#endif
int lines = a.size();
int columns = a[0].size();
std::vector<double> result;
result.reserve(lines);
int j = 0;
for ( int i = 0 ; i < lines ; i++ )
{
result.push_back(0);
for(j = 0 ; j < columns ; j++)
{
result[i] += a[ i ][ j ] * b[ j ];
}
}
return result;
}
//scalar times matrix (calls scalar times vector)
std::vector<std::vector<double>> operator* (double a , const std::vector<std::vector<double>> & b)
{
#ifdef _DEBUG
for(int i = 0 ; i < b.size()-1 ; i++)
{
assert(b[i].size() == b[i+1].size());
}
#endif
int lines = b.size();
int columns = b[0].size();
std::vector<std::vector<double>> result;
int j = 0;
for ( int i = 0 ; i < lines ; i++ )
{
result.push_back(a * b[ j ]);
}
return result;
}
std::vector<double> operator+(const std::vector<double>& a, const std::vector<double>& b)
{
assert(a.size() == b.size());
int size = a.size();
std::vector<double> result;
result.reserve(size);
for(int i = 0 ; i < size ; i++)
{
result.push_back(0);
result[i] = a[i] + b[i];
}
return result;
}
//sum of matrices
std::vector<std::vector<double>> operator+(const std::vector<std::vector<double>>& a, const std::vector<std::vector<double>>& b)
{
#ifdef _DEBUG
assert(a.size() == b.size());
#endif
int size = a.size();
#ifdef _DEBUG
for(int i = 0 ; i < size ; i++)
{
assert(a[i].size() == b[i].size());
}
#endif
std::vector<std::vector<double>> result;
result.resize(size);
for(int i = 0 ; i < size ; i++)
{
result.push_back(a[i] + b[i]);
}
return result;
}
//subtraction of vectors
std::vector<double> operator-(const std::vector<double>& a, const std::vector<double>& b)
{
#ifdef _DEBUG
assert(a.size() == b.size());
#endif
int size = a.size();
std::vector<double> result;
result.resize(size);
for(int i = 0 ; i < size ; i++)
{
result[i] = a[i] - b[i];
}
return result;
}
//subtraction of matrices (calls subtraction of vectors)
std::vector<std::vector<double>> operator-(const std::vector<std::vector<double>>& a, const std::vector<std::vector<double>>& b)
{
#ifdef _DEBUG
assert(a.size() == b.size());
#endif
int size = a.size();
#ifdef _DEBUG
for(int i = 0 ; i < size ; i++)
{
assert(a[i].size() == b[i].size());
}
#endif
std::vector<std::vector<double>> result;
result.resize(size);
for(int i = 0 ; i < size ; i++)
{
result.push_back(a[i] - b[i]);
}
return result;
}
//elementwise division
std::vector<double> operator/(const std::vector<double>& a, const std::vector<double>& b)
{
assert(a.size() == b.size());
int size = a.size();
std::vector<double> result;
result.reserve(size);
for(int i = 0 ; i < size ; i++)
{
if(b[i] < MIN_NUMBER_TOLERANCE)
{
throw std::runtime_error("Can't divide by zero!");
}
result[i] = a[i] / b[i];
}
return result;
}
double neuralnetwork::costDerivatives(const double &networkOutput , const double &expectedOutput)
{
return expectedOutput - networkOutput;
}
std::vector<double> neuralnetwork::costDerivatives(const std::vector<double> &networkOutput , const std::vector<double> &expectedOutput)
{
assert(expectedOutput.size() == networkOutput.size());
int size = networkOutput.size();
std::vector<double> output;
output.reserve(size);
for(int i = 0 ; i < size ; i++)
{
output.push_back(networkOutput[i] - expectedOutput[i]);
}
return output;
}
void neuralnetwork::backPropagation(const std::vector<double> &neuralNetworkInputs , const std::vector<double> &expectedOutputs, // inputs
std::vector<std::vector<std::vector<double>>>& derivativeWeights , std::vector<std::vector<double>>& derivativeBiases) // outputs
{
std::cout << "teste "<< counter++ << std::endl;
system("PAUSE");
derivativeWeights.reserve( sizes.size() - 1 );
derivativeBiases.reserve( sizes.size() - 1 );
//to store one activation layer
std::vector<double> activation = neuralNetworkInputs;
//to store each one of the activation layers
std::vector<std::vector<double>> activations;
activations.reserve(sizes.size()); // numBiases is the same as the number of neurons (except 1st layer)
activations.push_back(activation);
int maxLayerSize = 0;
std::cout << "teste "<< counter++ << std::endl;
system("PAUSE");
for ( int i = 1 ; i < numBiases ; i++ )
{
maxLayerSize = std::max(sizes[i], maxLayerSize);
}
std::cout << "teste "<< counter++ << std::endl;
system("PAUSE");
// to store one weighted sum
std::vector<double> z;
z.reserve(maxLayerSize);
// to store each one of the weighted sums
std::vector<std::vector<double>> zs;
zs.reserve(sizes.size());
// layer and neuron counter
int layer, neuron;
for ( layer = 1 ; layer < numLayers ; layer++ )
{
z = (weights[layer] * activation) + biases[layer];
zs.push_back(z);
activation = sigmoid(z);
activations.push_back(activation);
}
std::cout << "teste "<< counter++ << std::endl;
system("PAUSE");
std::vector<double> delta = costDerivatives(activations[ lastIndex( activations , 1 )] , expectedOutputs) * sigmoid_prime(z);
delta.reserve(maxLayerSize);
derivativeBiases.push_back(delta);
int j;
std::vector<std::vector<double>> dummyMatrix;
dummyMatrix.reserve(maxLayerSize);
for (neuron = 0; neuron < sizes[ lastIndex( sizes , 1 )]; neuron++)
{
dummyMatrix.push_back(std::vector<double>(activations[ lastIndex( activations , 2 )].size()));
for (j = 0; j < activations[ lastIndex( activations , 2 )].size(); j++)
{
dummyMatrix[neuron][j] = delta[neuron] * activations[ lastIndex( activations , 2 )][j];
}
}
std::cout << "teste "<< counter++ << std::endl;
system("PAUSE");
derivativeWeights.push_back(dummyMatrix);
dummyMatrix.clear();
std::vector<double> sp;
sp.reserve(maxLayerSize);
std::vector<double> dummyVector;
dummyVector.reserve(maxLayerSize);
double dummyDouble = 0;
for(layer = 2 ; layer < numLayers ; layer++)
{
z = zs[ lastIndex( zs , layer )];
sp = sigmoid_prime(z);
for(j = 0 ; j < sizes[ lastIndex( weights , layer )] ; j++)
{
for (neuron = 0; neuron < sizes[ lastIndex( sizes , layer - 1 )]; neuron++)
{
dummyDouble += weights[ lastIndex( weights , layer - 1 )][neuron][j] * delta[neuron];
}
dummyVector.push_back(dummyDouble * sp[j]);
dummyDouble = 0;
}
delta = dummyVector;
dummyVector.clear();
derivativeBiases.push_back(delta);
for (neuron = 0; neuron < sizes[ lastIndex( sizes , layer )]; neuron++)
{
dummyMatrix.push_back(std::vector<double>(sizes[ lastIndex( sizes , layer + 1 )]));
for (j = 0; j < sizes[ lastIndex( sizes , layer + 1 )]; j++)
{
dummyMatrix[neuron][j] = activations[ lastIndex( activations , layer + 1 )][j] * delta[neuron];
}
}
derivativeWeights.push_back(dummyMatrix);
dummyMatrix.clear();
}
std::cout << "teste "<< counter++ << std::endl;
system("PAUSE");
//both derivativeWeights and derivativeBiases are reversed. so let's reverse it.
std::reverse(derivativeWeights.begin(),derivativeWeights.end());
std::reverse(derivativeBiases.begin(),derivativeBiases.end());
std::cout << "teste "<< counter++ << std::endl;
system("PAUSE");
}
}
main.cpp
#include <stdio.h>
#include <opencv2/opencv.hpp>
#include "neuralnetwork.h"
#include <string>
void printAll(const std::vector<double> & v, const std::string & name)
{
int size = v.size();
std::cout << "\t" << name << ":\t";
for(int i = 0 ; i < size ; i++)
{
std::cout << v[i] << "\t";
}
std::cout << std::endl;
}
template<class T>
void printAll(const std::vector<T> & v, const std::string & name)
{
int size = v.size();
std::cout << name << ":" << std::endl;
for(int i = 0 ; i < size ; i++)
{
printAll(v[i], "\t" + ("[" + std::to_string(i)) + "]");
}
}
int main(int argc, char** argv )
{
nn::neuralnetwork n({2,4,3});
n.weights = {{},{{1,2},{3,4},{5,6},{7,8}} , {{9,8,7,6},{5,4,3,2},{1,2,3,4}}};
n.biases = {{},{1, 4, 6, 8} , {9, 2, 4}};
printAll(n.weights,"weights");
printAll(n.biases,"biases");
std::vector<std::vector<std::vector<double>>> derivativeWeights;
std::vector<std::vector<double>> derivativeBiases;
n.backPropagation({1,2},{1,2,3},derivativeWeights,derivativeBiases);
printAll(n.derivativeWeights,"derivativeWeights");
printAll(n.derivativeBiases,"derivativeBiases");
system("PAUSE");
return 0;
}
It looks like your problem is that you are only reserving memory for the vectors in the constructor, not allocating it.
The reserve method does not resize the vector, it is a performance optimization in cases where you know you will resize the vector in the future, but an optimizing compiler is free to ignore it.
This isn't causing a problem for 'weights' and 'biases' in this particular code becuase you are initializing them with vectors of the proper size, which does set them to the correct size. The problems are with derivativeWeights and derivativeBiases, where you reserve memory for the vectors, but they are never actually resized. This makes this memory potentially invalid if you try to dereference it. You could use resize instead of reserve, or push back the elements one by one, which will also resize the vector.
Another comment is that you don't have to use this-> for every member of the class, the 'this->' is assumed for members of the class if you don't use it.
I didn't found the problem, but I realised that, for this problem (artificial neural network), I can initialize each property of the class with each right sizes without loss of generality. So this is what I'm going to do.
I feel a little ashamed that I am not finding it... :/
Related
I was given a problem to interchange the position of the max and min numbers in a given set of numbers and print out the old arrangement and new arrangement
The old one is like this : 5,5678,62,6000
and the new one is expected to be like this: 6000,5678,62,5
I've tried running different variations of the print code to print it all to no avail, as I've been able to interchange the max and min position of the numbers
int main () {
int m [4] = {5,5678,62,6000};
int i;
int max=m[0];
int min=m[0];
int pMin = 0;
int pMax = 0;
int temp = 0;
{
printf( "%d\n", m[i]) ;
}
for (i=0; i<4; i++){
{
printf( "%d\n", m[i]) ;
}
if ( m[i] > max )
{
max = m[i] ;
}
}
for (i=0; i<4; ++i){
if ( m[i] < min )
{
min = m[i] ;
}
}
temp = min;
min = max;
max = temp;
printf ("min = %d\n", min);
printf ("max = %d\n", max);
printf( "%d\n", m[i]) ;
getch();
}
If i'm able to do it right by only inputting this line of code
temp = min;
min = max;
max = temp;
I should be able to achieve the aim of switching the places of the maximum and minimum numbers, but i also want to print out the result such that the 2 numbers in the middle are unaltered just the first and last.
The old one is like this : 5,5678,62,6000
and the new one is expected to be like this: 6000,5678,62,5
You should get a warning from your compiler (if not, enable warning flags, like the -Wall in GCC), about this printing statement, inside its own block:
{
printf( "%d\n", m[i]) ;
}
I think you meant to loop over the array and print its contents, which you do afterwards. But when you try to find the max, you seem to have forget to loop over the array.
Your code invokes Undefined Behavior (UB) here:
printf("%d\n", m[i]) ;
since you go out of range, because i has the last value after the previous for loop (to find the min), which was 4 (size of the array). So you are indexing the array beyond its end, namely m[4], which explains the garbage value of the output.
Here is the plan, which you were getting close, but let's list it here:
Find the max element of the array, and remember the index of that
element.
Find the min element of the array, and remember the index of that
element.
Swap the min and max element of the array.
Note: If there are more than one maximum elements, take into account the last one seen. Similarly for the minimum element.
Putting everything together, you get this:
#include <cstdio>
int main () {
int m [4] = {5,5678,62,6000};
int i;
int max=m[0], max_idx;
int min=m[0], min_idx;
int temp = 0;
for(int i = 0; i < 4; ++i)
{
printf( "%d\n", m[i]) ;
}
for (i=0; i<4; i++)
{
if ( m[i] > max )
{
max = m[i];
max_idx = i;
}
}
for (i=0; i<4; ++i)
{
if ( m[i] < min )
{
min = m[i];
min_idx = i;
}
}
temp = min;
m[min_idx] = max;
m[max_idx] = temp;
printf ("min = %d\n", min);
printf ("max = %d\n", max);
printf("The new array is:\n");
for(int i = 0; i < 4; i++)
printf("%d\n", m[i]);
}
Output:
5
5678
62
6000
min = 5
max = 6000
The new array is:
6000
5678
62
5
try this code
# include<stdio.h>
int main () {
int m [4] = {5,5678,62,6000};
int i;
int max=m[0];
int min=m[0];
int pMin = 0;
int pMax = 0;
int temp = 0;
// print before swap
for (i=0; i<4; i++){
{
printf( "%d\n", m[i]) ;
}
if ( m[i] > max )
{
max = m[i] ;
pMin = i;
}
}
for (i=0; i<4; ++i){
if ( m[i] < min )
{
min = m[i] ;
pMax = i;
}
}
m[pMax] = max;
m[pMin] = min;
// print after swap
for (i=0; i<4; i++)
{
printf( "%d\n", m[i]) ;
}
return 0;
}
If it is indeed a C++ program then use C++ features.
The program can look the following way.
#include <iostream>
#include <iterator>
#include <algorithm>
int main()
{
int m[] = { 5, 5678, 62, 6000 };
for ( const auto &item : m ) std::cout << item << ' ';
std::cout << '\n';
auto minmax = std::minmax_element( std::begin( m ), std::end( m ) );
if ( minmax.first != minmax.second ) std::iter_swap( minmax.first, minmax.second );
for ( const auto &item : m ) std::cout << item << ' ';
std::cout << '\n';
}
Its output is
5 5678 62 6000
6000 5678 62 5
As for your code then it at least in this code snippet does not make sense
int main () {
int m [4] = {5,5678,62,6000};
int i;
// ...
int temp = 0;
{
printf( "%d\n", m[i]) ;
}
// ...
not speaking about that the variable i is not initialized.
And your code does not swap the maximum and the minimum elements in the array.
If it is a C program then it can look the following way.
#include <stdio.h>
int main( void )
{
int m[] = { 5, 5678, 62, 6000 };
const size_t N = sizeof( m ) / sizeof( *m );
for ( size_t i = 0; i < N; i++ ) printf( "%d ", m[i] );
putchar( '\n' );
size_t min_i = 0;
size_t max_i = 0;
for ( size_t i = 1; i < N; i++ )
{
if ( m[i] < m[min_i] ) min_i = i;
if ( m[max_i] < m[i] ) max_i = i;
}
if ( min_i != max_i )
{
int tmp = m[min_i];
m[min_i] = m[max_i];
m[max_i] = tmp;
}
for ( size_t i = 0; i < N; i++ ) printf( "%d ", m[i] );
putchar( '\n' );
}
The program output is the same as shown above
5 5678 62 6000
6000 5678 62 5
You can rewrite this C program as a C++ program using the loops used in the program.
I'm making a 2D dynamic Matrix class. Problem arises in my copy constructor and =operator. Kindly tell me what am I doing wrong. Here is the code: (The cout's are for checking.
private:
int rows;
int coloumns;
float **ptr;
Matrix(const Matrix & M)
{ cout << "copy const called"<<endl;
cout << "1 "<< endl;
if(rows < 0 || column < 0) // To check if its a garbage value or not
{
rows = 0, col = 0;
ptr = NULL;
cout << "2 "<< endl;
}
else if(ptr!=NULL)
{
cout << "3 "<< endl;
for(int i = 0 ; i < col; i++)
{
delete [] ptr[i];
}
cout << "4 "<< endl;
delete [] ptr;
ptr = NULL;
cout << "5 "<< endl;
}
cout << "6 "<< endl;
*this = M;
cout << "7 "<< endl;
}
Matrix operator= (const Matrix &M)
{
if(this == &M)
{
return *this;
}
if(row!=0 && columns != 0)
{
for(int i = 0 ; i < columns; i++)
{
delete [] ptr[i];
}
delete [] ptr;
ptr = NULL;
}
rows = M.rows; col = M.columns;
ptr = new float *[rows];
for(int i = 0; i < rows; i++)
{
ptr[i] = new float [col];
}
for(int i = 0; i< rows ; i++)
{
for( int j=0 ; j< columns ;j++)
{
ptr[i][j] = M.ptr[i][j];
}
}
return *this;
}
int main()
{
Matrix M1(2,3);
Matrix M2(M1);
M2(0, 0) = 1;
}
It stops at the " *this = M " in the copy constructor. Moreover, I wanted to confirm that when I return something in the = operator, does it take the place of the whole " *this = M" , or just replaces M?
Note:
Not allowed to use vectors.
You have infinite recursion going on. In you copy constructor you have
*this = M;
This calls your class's operator= which you have declared as
Matrix operator= (const Matrix &M)
You can see that you are returning by value. When you return by value a copy is made. To make that copy we need to call the copy construct. This in turn calls the assignment operator again which call the copy constructor and the cycle just keeps going.
Your copy constructor can be corrected and simplified to be
Matrix(const Matrix & m) : rows(m.rows), columns(m.columns)
{
ptr = new float*[rows];
for (int i = 0; i < rows; i++)
ptr[i] = new float[columns];
for (int i = 0; i < rows; i++)
for (int j = 0; j < columns; j++)
ptr[i][j] = m.ptr[i][j]
}
Notice how I didn't have to check the state of the new class as we know what it is since we are initializing it. Everything is uninitialized and all we have to do is initialize everything and then copy the values from the one matrix into the new one.
In regards to you assignment operator you should have it return a reference to the object instead of returning by value. This avoids unnecessary copies and allows you to chain the operator with other operators.
Your code looks complicated to me.
I do not understand why you have chosen float** instead of plain float*. This looks better to me:
int rc, cc; // row count, column count
float* d; // data (rc * cc floats)
Memory allocation became a simpler operation:
d = new float[ rc * cc ];
Copying is also simpler:
memcpy( d, source.d, rc * cc * sizeof( *d ) );
The "hard" part is to retrieve a matrix element. You have to convert a row and column to an index:
index = row * column_count + column;
The whole class:
#include <iostream>
class matrix_type
{
int rc, cc; // row count, column count
float* d; // data
void allocate( const int arc, const int acc ) // a prefix in arc and acc stands for Argument
{
if ( arc * acc == rc * cc )
return; // nothing to do: already allocated
delete[] d;
rc = arc;
cc = acc;
d = new float[rc * cc];
}
void copy( const matrix_type& s )
{
allocate( s.rc, s.cc );
memcpy( d, s.d, rc * cc * sizeof( *d ) );
}
int as_idx( const int ar, const int ac ) const
{
return ar * cc + ac;
}
public:
matrix_type( const int arc, const int acc ) : rc( 0 ), cc( 0 ), d( 0 )
{
allocate( arc, acc );
memset( d, 0, rc * cc * sizeof( *d ) );
}
matrix_type()
{
delete[] d;
}
matrix_type( const matrix_type& s ) : rc( 0 ), cc( 0 ), d( 0 )
{
copy( s );
}
matrix_type& operator=(const matrix_type& s)
{
copy( s );
return *this;
}
float& at( const int ar, const int ac )
{
if ( ar < rc && ac < cc )
return d[as_idx( ar, ac )];
else
throw "out of range";
}
const float& at( const int ar, const int ac ) const
{
return const_cast<matrix_type*>(this)->at( ar, ac );
}
void print( std::ostream& os ) const
{
for ( int r = 0; r < rc; ++r )
{
for ( int c = 0; c < cc; ++c )
os << at( r, c ) << ' ';
os << '\n';
}
}
};
int main()
{
matrix_type m1( 3, 5 );
m1.at( 0, 0 ) = 1.f;
m1.at( 2, 4 ) = 15.f;
matrix_type m2( m1 );
matrix_type m3( 0, 0 );
m3 = m2;
m3.print( std::cout );
return 0;
}
Having a hard time designing an efficient algorithm that accomplishes the following. If I start with a 2d vector A,
A = [1 2 3;
2 3 4;
5 6]
I want to take the rows that contain common elements and combine them (removing duplicates) resulting in 2d vector B:
B = [1 2 3 4;
5 6]
I can accomplish this in Matlab, but am having a hard time in C++. Any help is appreciated.
Try this, it worked for me with your example matrix. It looks like a lot of code, but there are functions just for the example and for debugging purpose.
void disp( const std::vector< int >& a )
{
for ( const auto item : a )
{
std::cout << item;
}
std::cout << "\n";
}
void disp( const std::vector< std::vector< int > >& matrix )
{
for ( const auto& row : matrix )
{
disp( row );
}
}
// I think there shall be some easier way for this.
bool hasCommonElements( const std::vector< int >& aVector1, const std::vector< int >& aVector2 )
{
for ( const auto item1 : aVector1 )
{
for ( const auto item2 : aVector2 )
{
if ( item1 == item2 )
{
return true;
}
}
}
return false;
}
void makeAllElementsUnique( std::vector< int >& aRow )
{
std::sort( aRow.begin(), aRow.end() );
aRow.erase( std::unique( aRow.begin(), aRow.end() ), aRow.end() );
}
void mergeRowsWithCommonValues( std::vector< std::vector< int > >& aMatrix )
{
for ( auto it = aMatrix.begin(); it != aMatrix.end(); ++it )
{
auto it2 = it + 1;
while ( it2 != aMatrix.end() )
{
if ( hasCommonElements( *it, *it2 ) )
{
(*it).insert( (*it).end(), (*it2).begin(), (*it2).end() ); // Merge the rows with the common value(s).
makeAllElementsUnique( (*it) );
it2 = aMatrix.erase( it2 ); // Remove the merged row.
}
else
{
++it2;
}
}
}
}
void example()
{
std::vector< std::vector< int > > matrix;
matrix.push_back( { 1, 2, 3 } );
matrix.push_back( { 2, 3, 4 } );
matrix.push_back( { 5, 6 } );
disp( matrix );
mergeRowsWithCommonValues( matrix );
disp( matrix );
}
Here is my own attempt, kinda messy, I know.
int index2 = 0;
int rowIndex = 0;
int tracker = 0;
int insert = 0;
// Loop over all rows
while (index2 < A.size()){
// Check if vector is empty. If so, copy the first row in.
if (B.empty()){
B.push_back(A[index2]);
index2++;
cout<<"Hit an empty.\n";
}
// If vector not empty, do the complicated stuff.
else if (!B.empty()){
for (int i = 0; i < A[index2].size(); i++){ // element in A
for (int j = 0; j < B.size(); j++){ // row in B
for (int k = 0; k < B[j].size(); k++){ // element in row in B
if (A[index2][i] == B[j][k]){
rowIndex = j;
tracker = 1;
}
}
}
}
// If tracker activated, we know there's a common element.
if (tracker == 1){
cout<<"Hit a positive tracker.\n";
for (int i = 0; i < A[index2].size(); i++){ // element in A
for (int j = 0; j < B[rowIndex].size(); j++){ // element in B at rowIndex
if (A[index2][i] != B[rowIndex][j])
insert++;
cout<<"Hit an insert increment.\n";
}
if (insert == B[rowIndex].size()){
cout<<"Hit an insert.\n";
B[rowIndex].push_back(A[index2][i]);
}
insert = 0;
}
index2++;
}else{
B.push_back(A[index2]);
index2++;
cout<<"Hit a zero tracker.\n";
}
}
tracker = 0;
}
Hello I am looking for a way to write this C++ Code in a general way, so that if a want 20 columns I will not have to write 20 for loops:
for(int i=1; i<6; i++) {
for(int j=i; j<6; j++) {
for(int k=j; k<6; k++) {
for(int m=k; m<6; m++) {
std::cout << i << j << k << m << std::endl;
}
}
}
}
It is important that my numbers follow a >= Order.
I am very grateful for any help.
This recursive function should work:
#include <iostream>
bool inc( int *indexes, int limit, int n )
{
if( ++indexes[n] < limit )
return true;
if( n == 0 ) return false;
if( inc( indexes, limit, n-1 ) ) {
indexes[n] = indexes[n-1];
return true;
}
return false;
}
int main()
{
const size_t N=3;
int indexes[N];
for( size_t i = 0; i < N; ++i ) indexes[i] = 1;
do {
for( size_t i = 0; i < N; ++i ) std::cout << indexes[i] << ' ';
std::cout << std::endl;
} while( inc( indexes, 6, N-1 ) );
return 0;
}
live example
The design here is simple. We take a std::vector each containing a dimension count and a std::vector containing a current index at each dimension.
advance advances the current bundle of dimension indexes by amt (default 1).
void advance( std::vector<size_t>& indexes, std::vector<size_t> const& counts, size_t amt=1 ) {
if (indexes.size() < counts.size())
indexes.resize(counts.size());
for (size_t i = 0; i < counts.size(); ++i ) {
indexes[i]+=amt;
if (indexes[i] < counts[i])
return;
assert(counts[i]!=0);
amt = indexes[i]/counts[i];
indexes[i] = indexes[i]%counts[i];
}
// past the end, don't advance:
indexes = counts;
}
which gives us an advance function for generic n dimensional coordinates.
Next, a filter that tests the restriction you want:
bool vector_ascending( std::vector<size_t> const& v ) {
for (size_t i = 1; (i < v.size()); ++i) {
if (v[i-1] < v[i]) {
return false;
}
}
return true;
}
then a for loop that uses the above:
void print_a_lot( std::vector<size_t> counts ) {
for( std::vector<size_t> v(counts.size()); v < counts; advance(v,counts)) {
// check validity
if (!vector_ascending(v))
continue;
for (size_t x : v)
std::cout << (x+1);
std::cout << std::endl;
}
}
live example.
No recursion needed.
The downside to the above is that it generates 6^20 elements, and then filters. We don't want to make that many elements.
void advance( std::vector<size_t>& indexes, std::vector<size_t> const& counts, size_t amt=1 ) {
if (indexes.size() < counts.size())
indexes.resize(counts.size());
for (size_t i = 0; i < counts.size(); ++i ) {
indexes[i]+=amt;
if (indexes[i] < counts[i])
{
size_t min = indexes[i];
// enforce <= ordering:
for (size_t j = i+i; j < counts.size(); ++j) {
if (indexes[j]<min)
indexes[j]=min;
else
break; // other elements already follow <= transitively
}
assert(vector_ascending(indexes));
return;
}
assert(counts[i]!=0);
amt = indexes[i]/counts[i];
indexes[i] = indexes[i]%counts[i];
}
// past the end, don't advance:
indexes = counts;
}
which should do it without the vector_ascending check in the previous version. (I left the assert in to do testing).
This function works for me, but do not call it with 20 if you want it to finish.
#include <list>
#include <iostream>
std::list<std::list<int>> fun (std::list<std::list<int>> inputlist, int counter)
{
if(counter == 0)
{
return inputlist;
}
else
{
std::list<std::list<int>> outputlist;
for(std::list<int> oldlist : inputlist)
{
for(int i = 1; i<6; i++)
{
std::list<int> newlist = oldlist;
newlist.push_back(i);
outputlist.push_back(newlist);
}
}
return fun(outputlist, counter - 1);
}
}
int main()
{
std::list<int> somelist;
std::list<std::list<int>> listlist;
listlist.push_back(somelist);
std::list<std::list<int>> manynumbers = fun (listlist,5);
for (std::list<int> somenumbers : manynumbers)
{
for(int k : somenumbers)
{
std::cout<<k;
}
std::cout<<std::endl;
}
return 0;
}
Same with Processing (java) here :
void loopFunction(int targetLevel, int actualLevel, int min, int max, String prefix){
/*
targetLevel is the wanted level (20 in your case)
actualLevel starts from 1
min starts from 1
max is the max number displayed (6 in your case)
prefix starts from blank
see usage bellow (in setup function)
*/
for(int m=min; m<max; m++) {
if(targetLevel==actualLevel)
{
println(prefix+ " " + m);
}
else
{
loopFunction(targetLevel, actualLevel+1,m,max,prefix+ " " + m);
}
}
}
void setup(){
loopFunction(10,1,1,6,"");
}
Well, I am not the fastest in writing answer... when I started there was no other answer. Anyhow, here is my version:
#include <iostream>
#include <vector>
using namespace std;
class Multiindex {
public:
typedef std::vector<int> Index;
Multiindex(int dims,int size) :
dims(dims),size(size),index(Index(dims,0)){}
void next(){
int j=dims-1;
while (nextAt(j) && j >= 0){j--;}
}
Index index;
bool hasNext(){return !(index[0]==size-1);}
private:
bool nextAt(int j){
index[j] = index[j]+1;
bool overflow = (index[j]==size);
if (!overflow && j < dims-1){std::fill(index.begin() + j + 1,index.end(),index[j]);}
return overflow;
}
int dims;
int size;
};
int main() {
Multiindex m(4,6);
while (m.hasNext()){
cout << m.index[0] << m.index[1] << m.index[2] << m.index[3] << endl;
m.next();
}
cout << m.index[0] << m.index[1] << m.index[2] << m.index[3] << endl;
return 0;
}
If I want to get the Cartesian Product of these two vector<string>s:
vector<string> final{"a","b","c"};
vector<string> temp{"1","2"};
But I want to put the result in final, such that final would contain:
a1
a2
b1
b2
c1
c2
I'd like to do this without creating a temporary array. Is it possible to do this? If it matters, the order of final is not important.
You may try the following approach
#include <iostream>
#include <vector>
#include <string>
int main()
{
std::vector<std::string> final{ "a", "b", "c" };
std::vector<std::string> temp{ "1", "2" };
auto n = final.size();
final.resize( final.size() * temp.size() );
for ( auto i = n, j = final.size(); i != 0; --i )
{
for ( auto it = temp.rbegin(); it != temp.rend(); ++it )
{
final[--j] = final[i-1] + *it;
}
}
for ( const auto &s : final ) std::cout << s << ' ';
std::cout << std::endl;
return 0;
}
The program output is
a1 a2 b1 b2 c1 c2
Try the function cartesian:
#include <vector>
#include <string>
using namespace std;
void cartesian(vector<string>& f, vector<string> &o) {
int oldfsize = f.size();
f.resize(oldfsize * o.size());
for (int i = o.size() - 1; i>=0; i--) {
for (int j = 0; j < oldfsize; j++) {
f[i*oldfsize + j] = f[j] + o[i];
}
}
}
int main()
{
vector<string> f{"a","b","c"};
vector<string> temp{"1","2"};
cartesian(f, temp);
for (auto &s: f) {
printf("%s\n", s.c_str());
}
}
This works for me:
void testCartesianString(vector<string>& final,
vector<string>const& temp)
{
size_t size1 = final.size();
size_t size2 = temp.size();
// Step 1.
// Transform final to : {"a","a","b","b","c","c"}
final.resize(size1*size2);
for ( size_t i = size1; i > 0; --i )
{
for ( size_t j = (i-1)*size2; j < i*size2; ++j )
{
final[j] = final[i-1];
}
}
// Step 2.
// Now fix the values and
// change final to : {"a1","a2","b1","b2","c1","c2"}
for ( size_t i = 0; i < size1; ++i )
{
for ( size_t j = 0; j < size2; ++j )
{
final[i*size2+j] = final[i*size2+j] + temp[j];
cout << final[i*size2+j] << " ";
}
cout << endl;
}
}
This is just a personal preference option to Vald from Moscow's solution. I think it may be faster for dynamic arrays because there would be less branching. But I haven't gotten around to writing a timing test bench.
Given the inputs vector<string> final and vector<string> temp:
const auto size = testValues1.first.size();
testValues1.first.resize(size * testValues1.second.size());
for (int i = testValues1.first.size() - 1; i >= 0; --i){
testValues1.first[i] = testValues1.first[i % size] + testValues1.second[i / size];
}
EDIT:
Nope, this solution is slower not faster: http://ideone.com/e.js/kVIttT
And usually significantly faster, though I don't know why...
In any case, prefer Vlad from Moscow's answer