Neural Network doesn't seem to be working properly - c++

I've been creating my own neural network by following a book called "Make Your Own Neural Network" by Tariq Rashid. After some theory, the book explains an example written in Python. Since I'm not into Python that much I tried implementing it in C++.
And yes, I did try existing libraries. But now I'd like to implement my own architecture I can tweak as I wish and use them with my Qt and AR projects.
Feeding forward the signals works as expected. I used the same sample values as given in the book and it all seems to be working fine.
The back-propagation however fails at solving the XOR problem. The output value is almost always the same. It's just a bit bigger when both input values are 0.0. The weights appear unusually big to me (often bigger than 7.0. Not sure if that's normal).
Indexing through all layers and transposing those that need to be, also works fine since otherwise, I'd get out of bounds errors.
The project consists of a namespace containing all matrix calculations and the NeuralNetwork class.
Perhaps someone knows what's causing the issue in my code.
Thanks
(EDIT: If you want to compile and run it on your own you can get the source from my Git repository)
AnnMaths.cpp
#include "AnnMaths.h"
vector<vector<double>> AnnMaths::transpose(vector<vector<double>>& x)
{
vector<vector<double>> y;
for (int col = 0; col < x[0].size(); ++col)
{
y.push_back(vector<double>());
for (int row = 0; row < x.size(); ++row)
{
y[col].push_back(x[row][col]);
}
}
return y;
}
vector<vector<double>> AnnMaths::multiply(vector<vector<double>>& x, vector<vector<double>>& y, bool useSigmoid)
{
vector<vector<double>> z;
for (int row = 0; row < x.size(); row++)
{
z.push_back(vector<double>());
for (int col = 0; col < y[0].size(); col++)
{
z[row].push_back(0);
for (int inner = 0; inner < x[0].size(); inner++)
{
z[row][col] += x[row][inner] * y[inner][col];
}
if (useSigmoid) { z[row][col] = sigmoid(z[row][col]); }
}
}
return z;
}
vector<vector<double>> AnnMaths::getOutputErrors(vector<vector<double>>& targets, vector<vector<double>>& output)
{
vector<vector<double>> errors;
for (int neuron = 0; neuron < targets.size(); ++neuron)
{
errors.push_back(vector<double>());
errors[neuron].push_back(/*pow(*/targets[neuron][0] - output[neuron][0]/*, 2)*/);
}
return errors;
}
vector<vector<double>> AnnMaths::getHiddenErrors(vector<vector<double>>& weightsT, vector<vector<double>>& errors)
{
vector<vector<double>> results = multiply(weightsT, errors);
return results;
}
vector<vector<double>> AnnMaths::applyErrors(vector<vector<double>>& errors, vector<vector<double>>& output)
{
vector<vector<double>> results;
for (int i = 0; i < errors.size(); ++i)
{
results.push_back(vector<double>());
results[i].push_back(errors[i][0] * output[i][0] * (1.0 - output[i][0]));
}
return results;
}
vector<vector<double>> AnnMaths::applyLearnrate(vector<vector<double>>& x, double lr)
{
vector<vector<double>> results;
for (int row = 0; row < x.size(); ++row)
{
results.push_back(vector<double>());
for (int col = 0; col < x[0].size(); ++col)
{
results[row].push_back(x[row][col] * lr);
}
}
return results;
}
vector<vector<double>> AnnMaths::add(vector<vector<double>>& x, vector<vector<double>>& y)
{
vector<vector<double>> results;
for (int row = 0; row < x.size(); ++row)
{
results.push_back(vector<double>());
for (int col = 0; col < x[0].size(); ++col)
{
results[row].push_back(x[row][col] + y[row][col]);
}
}
return results;
}
double AnnMaths::sigmoid(double val)
{
return 1 / (1 + pow(E, -val));
}
double AnnMaths::randomVal()
{
return ((double)rand() / (RAND_MAX) -0.5);
}
NeuralNetwork.cpp
#include "NeuralNetwork.h"
NeuralNetwork::NeuralNetwork(vector<int>& topology, vector<vector<double>>& input, vector<vector<double>>& targets, double lr)
{
this->topology = topology;
this->layers.clear();
this->weights.clear();
this->targets.clear();
this->targets = targets;
this->layers.resize(topology.size());
this->errors.resize(this->layers.size());
this->weights.resize(topology.size()-1);
this->learnrate = lr;
for (int layer = 0; layer < topology.size(); ++layer)
{
if (layer == 0)
{
this->layers[layer].push_back(vector<double>());
this->layers[layer] = input;
}
else
{
for (int neuron = 0; neuron < topology[layer]; ++neuron)
{
this->layers[layer].push_back(vector<double>());
this->layers[layer][neuron].push_back(/*AnnMaths::randomVal()*/0.0);
}
}
if (layer < (topology.size() - 1))
{
for (int row = 0; row < topology[layer+1]; ++row)
{
this->weights[layer].push_back(vector<double>());
for (int col = 0; col < topology[layer]; ++col)
{
this->weights[layer][row].push_back(AnnMaths::randomVal());
}
}
}
}
this->errors = this->layers;
}
void NeuralNetwork::feedForward()
{
for (int layer = 0; layer < weights.size(); ++layer)
{
layers[layer + 1] = AnnMaths::multiply(weights[layer], layers[layer], true);
}
}
void NeuralNetwork::setErrors()
{
for (int layer = layers.size() - 1; layer >= 0; --layer)
{
if (layer == layers.size() - 1)
{
this->errors[layer] = AnnMaths::getOutputErrors(this->targets, layers[layer]);
}
else
{
vector<vector<double>> weightsT = AnnMaths::transpose(this->weights[layer]);
vector<vector<double>> tmpErrors = AnnMaths::multiply(weightsT, this->errors[layer+1]);
this->errors[layer] = tmpErrors;
}
}
}
void NeuralNetwork::setInput(vector<vector<double>>& input)
{
this->layers[0] = input;
}
void NeuralNetwork::setTargets(vector<vector<double>>& target)
{
this->targets = targets;
}
void NeuralNetwork::backPropagation()
{
setErrors(); //compute all errors
for (int layer = layers.size() - 2; layer >= 0; --layer)
{
vector<vector<double>> prevOutputT = AnnMaths::transpose(layers[layer]); //get the transposed output of the previous layer
vector<vector<double>> appliedErrors = AnnMaths::applyErrors(this->errors[layer+1], layers[layer+1]); //apply errors to output of next layer
vector<vector<double>> deltaWeights = AnnMaths::multiply(appliedErrors, prevOutputT); //compute delta of weights by multiplying the applied output with the previous output
deltaWeights = AnnMaths::applyLearnrate(deltaWeights, learnrate); //add learning rate to delta weights
weights[layer] = AnnMaths::add(deltaWeights, weights[layer]); //add delta weights to the weights
}
}
main.cpp
#include"AnnMaths.h"
#include<iostream>
#include<vector>
#include"NeuralNetwork.h"
int main()
{
std::vector<std::vector<std::vector<double>>> input;
input.push_back(std::vector<std::vector<double>>());
input.push_back(std::vector<std::vector<double>>());
input.push_back(std::vector<std::vector<double>>());
input.push_back(std::vector<std::vector<double>>());
input[0].push_back(std::vector<double>());
input[0].push_back(std::vector<double>());
input[0][0].push_back(0.0);
input[0][1].push_back(0.0);
input[1].push_back(std::vector<double>());
input[1].push_back(std::vector<double>());
input[1][0].push_back(1.0);
input[1][1].push_back(0.0);
input[2].push_back(std::vector<double>());
input[2].push_back(std::vector<double>());
input[2][0].push_back(0.0);
input[2][1].push_back(1.0);
input[3].push_back(std::vector<double>());
input[3].push_back(std::vector<double>());
input[3][0].push_back(1.0);
input[3][1].push_back(1.0);
std::vector<std::vector<std::vector<double>>> targets;
targets.push_back(std::vector<std::vector<double>>());
targets.push_back(std::vector<std::vector<double>>());
targets.push_back(std::vector<std::vector<double>>());
targets.push_back(std::vector<std::vector<double>>());
targets[0].push_back(std::vector<double>());
targets[0][0].push_back(0.0);
targets[1].push_back(std::vector<double>());
targets[1][0].push_back(1.0);
targets[2].push_back(std::vector<double>());
targets[2][0].push_back(1.0);
targets[3].push_back(std::vector<double>());
targets[3][0].push_back(0.0);
std::vector<int> topology;
topology.push_back(input[0].size());
topology.push_back(3);
topology.push_back(targets[0].size());
NeuralNetwork nn(topology, input[0], targets[0], 0.3);
for (struct { int i = 0; int count = 0; } x; x.i < 10000; ++x.i, ++x.count)
{
if (x.count == 4) { x.count = 0; }
nn.setInput(input[x.count]);
nn.setTargets(targets[x.count]);
nn.feedForward();
nn.backPropagation();
if (x.i % 51 == 0 )
{
nn.printInput();
nn.printOutput();
}
}
return 0;
}
For better visibility I did not include test and print functions.

Related

Speeding up calculation using vectors in C++ by using pointers/references

Currently, I am making a C++ program that solves a sudoku. In order to do this, I calculate the "energy" of the sudoku (the number of faults) frequently. This calculation unfortunately takes up a lot of computation time. I think that it can be sped up significantly by using pointers and references in the calculation, but have trouble figuring out how to implement this.
In my solver class, I have a vector<vector<int> data-member called _sudoku, that contains the values of each site. Currently, when calculating the energy I call a lot of functions with pass-by-value. I tried adding a & in the arguments of the functions and a * when making the variables, but this did not work. How can I make this program run faster by using pass-by-reference?
Calculating the energy should not change the vector anyway so that would be better.
I used the CPU usage to track down 80% of the calculation time to the function where vectors are called.
int SudokuSolver::calculateEnergy() {
int energy = 243 - (rowUniques() + colUniques() + blockUniques());//count number as faults
return energy;
}
int SudokuSolver::colUniques() {
int count = 0;
for (int col = 0; col < _dim; col++) {
vector<int> colVec = _sudoku[col];
for (int i = 1; i <= _dim; i++) {
if (isUnique(colVec, i)) {
count++;
}
}
}
return count;
}
int SudokuSolver::rowUniques() {
int count = 0;
for (int row = 0; row < _dim; row++) {
vector<int> rowVec(_dim);
for (int i = 0; i < _dim; i++) {
rowVec[i] = _sudoku[i][row];
}
for (int i = 1; i <= _dim; i++) {
if (isUnique(rowVec, i)) {
count++;
}
}
}
return count;
}
int SudokuSolver::blockUniques() {
int count = 0;
for (int nBlock = 0; nBlock < _dim; nBlock++) {
vector<int> blockVec = blockMaker(nBlock);
for (int i = 1; i <= _dim; i++) {
if (isUnique(blockVec, i)) {
count++;
}
}
}
return count;
}
vector<int> SudokuSolver::blockMaker(int No) {
vector<int> block(_dim);
int xmin = 3 * (No % 3);
int ymin = 3 * (No / 3);
int col, row;
for (int i = 0; i < _dim; i++) {
col = xmin + (i % 3);
row = ymin + (i / 3);
block[i] = _sudoku[col][row];
}
return block;
}
bool SudokuSolver::isUnique(vector<int> v, int n) {
int count = 0;
for (int i = 0; i < _dim; i++) {
if (v[i] == n) {
count++;
}
}
if (count == 1) {
return true;
} else {
return false;
}
}
The specific lines that use a lot of computatation time are the ones like:
vector<int> colVec = _sudoku[col];
and every time isUnique() is called.
I expect that if I switch to using pass-by-reference, my code will speed up significantly. Could anyone help me in doing so, if that would indeed be the case?
Thanks in advance.
If you change your SudokuSolver::isUnique to take vector<int> &v, that is the only change you need to do pass-by-reference instead of pass-by-value. Passing with a pointer will be similar to passing by reference, with the difference that pointers could be re-assigned, or be NULL, while references can not.
I suspect you would see some performance increase if you are working on a sufficiently large-sized problem where you would be able to distinguish a large copy (if your problem is small, it will be difficult to see minor performance increases).
Hope this helps!
vector<int> colVec = _sudoku[col]; does copy/transfer all the elements, while const vector<int>& colVec = _sudoku[col]; would not (it only creates an alias for the right hand side).
Same with bool SudokuSolver::isUnique(vector<int> v, int n) { versus bool SudokuSolver::isUnique(const vector<int>& v, int n) {
Edited after Jesper Juhl's suggestion: The const addition makes sure that you don't change the reference contents by mistake.
Edit 2: Another thing to notice is that vector<int> rowVec(_dim); these vectors are continuously allocated and unallocated at each iteration, which might get costly. You could try something like
int SudokuSolver::rowUniques() {
int count = 0;
vector<int> rowVec(_maximumDim); // Specify maximum dimension
for (int row = 0; row < _dim; row++) {
for (int i = 0; i < _dim; i++) {
rowVec[i] = _sudoku[i][row];
}
for (int i = 1; i <= _dim; i++) {
if (isUnique(rowVec, i)) {
count++;
}
}
}
return count;
}
if that doesn't mess up with your implementation.

C++ program getting caught up in _platform_memmove$VARIANT$Haswell

I am trying to use the suggestion from this post to free up time being spent in _platform_memmove$VARIANT$Haswell. According to a time profiler, this is occurring when I send a pointer to several class instances to a function. I have tried changing the way I declare the class instances, changing what the function takes, etc. but have not been able to resolve this.
The chunk of my code that may help:
Inputs *tables = new Inputs(OutputFolder, DataFolder);
ScreenStrat *strat_burnin = new ScreenStrat(ScreenStrat::NoScreen, ScreenStrat::NoScreen,
tables->ScreenStartAge, tables->ScreenStopAgeHIV,
tables->ScreenStopAge, ScreenStrat::NoVaccine);
calibrate *calib_output = new calibrate ();
StateMachine *Machine = new StateMachine();
for (int i = 0; i < n_sims; i++){
calib_output->saved_output[i] = RunCalibration(calib_output->calib_params[i], *strat_burnin, *tables, *Machine);
}
auto ret_val = *calib_output;
delete strat_burnin;
delete tables;
delete Machine;
delete calib_output;
return(ret_val);
and then the function declaration:
vector<double> RunCalibration(vector<double> calib_params, ScreenStrat &strat_burnin, Inputs &tables, StateMachine &Machine)
EDIT
I addressed the points #Botje suggest and it hasn't fixed the problems. Updated code:
void RunCalibration(calibrate &calib, ScreenStrat &strat_burnin, Inputs &tables, StateMachine &Machine, int i);
unique_ptr<calibrate> RunChain(string RunsFileName, string CurKey, string OutputFolder, string DataFolder);
int main(int argc, char* argv[]) {
string DataFolder;
string OutputFolder;
DataFolder = "../Data/";
OutputFolder = "../Output/";
unsigned int run;
string CurKey;
string RunsFileName(DataFolder);
if(argc == 1){
RunsFileName.append("test.ini");
}
else if(argc > 1){
RunsFileName.append(argv[1]);
}
CIniFile RunsFile(RunsFileName);
if (!RunsFile.ReadFile()) {
cout << "Could not read Runs File: " << RunsFileName << endl;
exit(1);
}
CurKey = RunsFile.GetKeyName (0);
if (RunsFile.GetValue(CurKey, "RunType") == "Calibration"){
int totaliters = RunsFile.GetValueI(CurKey, "Iterations");
int n_sims = RunsFile.GetValueI(CurKey, "Simulations");
vector<future<unique_ptr<calibrate>>> futures;
vector<unique_ptr<calibrate>> modeloutputs;
for (run = 0; run < totaliters; run++){
futures.push_back (async(launch::async, RunChain, RunsFileName, CurKey, OutputFolder, DataFolder));
}
for (int i = 0; i < futures.size(); i++){
modeloutputs.push_back (futures[i].get());
} return(0)}
unique_ptr<calibrate> RunChain(string RunsFileName, string CurKey, string OutputFolder, string DataFolder) {
Inputs *tables = new Inputs(OutputFolder, DataFolder);
tables->loadRFG (RunsFileName, CurKey);
tables->loadVariables ();
int n_sims = tables->Simulations;
int n_params = tables->Multipliers.size();
int n_targs = tables->CalibTargs.size();
ScreenStrat *strat_burnin = new ScreenStrat(ScreenStrat::NoScreen, ScreenStrat::NoScreen,
tables->ScreenStartAge, tables->ScreenStopAgeHIV,
tables->ScreenStopAge, ScreenStrat::NoVaccine);
calibrate *calib_output = new calibrate (n_sims, n_params, n_targs);
calib_output->multipliers_names = tables->MultipliersNames;
calib_output->calib_targs_names = tables->CalibTargsNames;
for (int i = 0; i < n_targs; i ++){
calib_output->calib_targs[i] = tables->CalibTargs[i][0];
calib_output->calib_targs_SD[i] = tables->CalibTargs[i][1];
}
for (int i = 0; i < n_params; i++){
for (int j = 0; j < 3; j++){
calib_output->multipliers[i][j] = tables->Multipliers[i][j];
}
}
StateMachine *Machine = new StateMachine();
for (int i = 0; i < n_sims; i++){
RunCalibration(*calib_output, *strat_burnin, *tables, *Machine, i);
}
unique_ptr<calibrate> ret_val = make_unique<calibrate>(*calib_output);
delete strat_burnin;
delete tables;
delete Machine;
delete calib_output;
return(ret_val);
}
void RunCalibration(calibrate &calib, ScreenStrat &strat_burnin, Inputs &tables, StateMachine &Machine, int i){
Adding in Calibrate definition per request from #botje
#include "calibrate.h"
using namespace std;
calibrate::calibrate(int n_sims, int n_params, int n_targs) {
calib_targs.resize (n_targs);
calib_targs_SD.resize (n_targs);
multipliers.resize(n_params);
for(int i = 0; i < n_params; i++){
multipliers[i].resize(3);
}
calib_params.resize (n_sims);
for (int i = 0; i < calib_params.size(); i++){
calib_params[i].resize (n_params);
}
saved_output.resize (n_sims);
for (int i = 0; i < saved_output.size(); i++){
saved_output[i].resize (n_targs);
}
best_params.resize (n_params);
GOF.clear();
tuned_SD.resize(n_params);
}
calibrate::~calibrate(void) {
}
void calibrate::CalculateGOF(int n_sims) {
GOF.push_back (WeightedDistance (saved_output[n_sims][0], calib_targs[0], calib_targs_SD[0]));
for (int i = 1; i < calib_targs.size(); i ++){
GOF[n_sims] += WeightedDistance (saved_output[n_sims][i], calib_targs[i], calib_targs_SD[i]);
}
if (n_sims == 0){
GOF_min = GOF[0];
best_params = calib_params[0];
} else {
auto it = std::min_element(std::begin(GOF), std::end(GOF));
int index = distance(GOF.begin(), it);
GOF_min_run = GOF[index];
if (GOF_min_run < GOF_min){
GOF_min = GOF_min_run;
best_params = calib_params[index];
}
}
}
std::vector<double> calibrate::loadCalibData(int n_params, int n_sim, int tuning_factor) {
if(n_sim == 0){
random_device rd;
mt19937 gen(rd());
for (int i = 0; i < n_params; i ++ ){
uniform_real_distribution<> dis(multipliers[i][0], multipliers[i][1]);
calib_params[n_sim][i] = dis(gen);
}
} else {
tuned_SD = tuningparam (n_sim, n_params, tuning_factor);
for (int i = 0; i < n_params; i ++ ){
calib_params[n_sim][i] = rnormal_trunc (best_params[i], tuned_SD[i], multipliers[i][1], multipliers[i][0]);
}
}
return(calib_params[n_sim]);
}
double calibrate::WeightedDistance(double data, double mean, double SD) {
double distance = pow((data - mean)/(SD * 2),2);
return distance;
}
double calibrate::rnormal_trunc(double mu, double sigma, double upper, double lower) {
std::default_random_engine generator;
std::normal_distribution<double> distribution(mu, sigma);
double prob = distribution(generator);
while (prob < lower || prob > upper){
prob = distribution(generator);
}
return(prob);
}
vector<double> calibrate::tuningparam(int n_sims, int n_param, int tuning_factor) {
vector<double> newSD;
for (int i = 0; i < n_param; i++){
newSD.push_back (multipliers[i][2]/pow(tuning_factor,n_sims));
}
return newSD;
}
I improved RunCalibration as follows. Note the comments for further improvement opportunities.
using std::make_unique;
using std::unique_ptr;
void RunCalibration(calibrate &calib, ScreenStrat &strat_burnin, Inputs &tables, StateMachine &Machine, int i);
unique_ptr<calibrate> RunChain(string RunsFileName, string CurKey, string OutputFolder, string DataFolder) {
auto tables = make_unique<Inputs>(OutputFolder, DataFolder);
tables->loadRFG (RunsFileName, CurKey);
tables->loadVariables ();
int n_sims = tables->Simulations;
int n_params = tables->Multipliers.size();
int n_targs = tables->CalibTargs.size();
auto strat_burnin = make_unique<ScreenStrat>(
ScreenStrat::NoScreen, ScreenStrat::NoScreen,
tables->ScreenStartAge, tables->ScreenStopAgeHIV,
tables->ScreenStopAge, ScreenStrat::NoVaccine);
auto calib_output = make_unique<calibrate>(n_sims, n_params, n_targs);
// I don't know the type of these fields, but IF you do not modify them in
// `RunCalibration`, consider making them `shared_ptr<vector<...>>`
// both in `calibrate` and in `Inputs` so you can simply copy
// the pointer instead of the full table.
calib_output->multipliers_names = tables->MultipliersNames;
calib_output->calib_targs_names = tables->CalibTargsNames;
// Same applies here. If you do not modify CalibTargs, make `calib_targs` a shared_ptr
// and only copy by pointer.
for (int i = 0; i < n_targs; i ++){
calib_output->calib_targs[i] = tables->CalibTargs[i][0];
calib_output->calib_targs_SD[i] = tables->CalibTargs[i][1];
}
// and again...
for (int i = 0; i < n_params; i++){
for (int j = 0; j < 3; j++){
calib_output->multipliers[i][j] = tables->Multipliers[i][j];
}
}
auto Machine = make_unique<StateMachine>();
for (int i = 0; i < n_sims; i++){
RunCalibration(*calib_output, *strat_burnin, *tables, *Machine, i);
}
// This will return the unique_ptr without copying.
return calib_output;
}

Adding threads increases time needed to perform the same task

I have been struggling with this for past 3 days. I do some stuff in image processing. I came to a point where I could distribute the workflow to more threads since I had "patches" of image, that I could pass to different threads. Unfortunately, the whole time it took to process the image was the same no matter if using 1 or more threads.
So I started digging, making copies of patches so every thread has its own local data, stopped writing result to array, but it was still the same. So I made the most minimalistic program I could have. After thread was created, it would make 10x10 matrix and write its determinant to console. So nothing shared between, only thing passed was index of a thread.
But it was still the same. I made tests both on Linux and Windows. These show time required to compute one determinant, so when using two threads each one took the same amount of time if not stated otherwise:
Windows:
1 Thread = 4479ms
2 Threads = 7500ms
3 Threads = 11300ms
4 Threads = 15800 ms
Linux:
1 Thread = 490ms
2 Threads = 478ms
3 Threads = First: 503ms; Other two: 1230ms
4 Threads = 1340ms
first thing is obvious, Linux is computing the same thing 10x faster. Nevermind. However windows not that single thread performance is worse, it gets worse no matter how many I add. Linux seems to be slowed down only when workload is being done on logical core. Thats why 1 and 2 are ok, since I have 2Core with HT, and when using 3 threads it slows down on the core that uses HT as well but the other is ok. However windows sucks no matter what.
Funny thing is that on windows it takes +- the same amount of time if I compute 4 determinants on one core or 1 determinant on each core.
The code I was using to get these results. I was able to compile with g++ and msvc no problem. Important are only last few methods, there are some constructors I wasn't sure are not being used.
#include <iostream>
#include <cmath>
#include <thread>
#include <chrono>
#include <float.h>
class FVector
{
public:
FVector();
FVector(int length);
FVector(const FVector &vec);
FVector(FVector &&vec);
FVector &operator=(const FVector &vec);
FVector &operator=(FVector &&vec);
~FVector();
void setLength(int length);
int getLength() const;
double *getData();
const double* getConstData() const;
private:
double *data;
int length;
void allocateDataArray(int length);
void deallocateDataArray();
};
FVector::FVector() {
data = nullptr;
length = 0;
}
FVector::FVector(int length) {
data = nullptr;
this->length = length;
allocateDataArray(length);
for (int i = 0; i < length; i++) {
data[i] = 0.;
}
}
FVector::FVector(const FVector &vec) {
allocateDataArray(vec.length);
length = vec.length;
for (int i = 0; i < length; i++) {
data[i] = vec.data[i];
}
}
FVector::FVector(FVector &&vec) {
data = vec.data;
vec.data = nullptr;
length = vec.length;
}
FVector &FVector::operator=(const FVector &vec) {
deallocateDataArray();
if (data == nullptr) {
allocateDataArray(vec.length);
for (int i = 0; i < vec.length; i++) {
data[i] = vec.data[i];
}
length = vec.length;
}
return *this;
}
FVector &FVector::operator=(FVector &&vec) {
deallocateDataArray();
if (data == nullptr) {
data = vec.data;
vec.data = nullptr;
length = vec.length;
}
return *this;
}
FVector::~FVector() {
deallocateDataArray();
}
void FVector::allocateDataArray(int length) {
data = new double[length];
}
void FVector::deallocateDataArray() {
if (data != nullptr) {
delete[] data;
}
data = nullptr;
}
int FVector::getLength() const {
return length;
}
double *FVector::getData() {
return data;
}
void FVector::setLength(int length) {
deallocateDataArray();
allocateDataArray(length);
this->length = length;
}
const double* FVector::getConstData() const {
return data;
}
class FMatrix
{
public:
FMatrix();
FMatrix(int columns, int rows);
FMatrix(const FMatrix &mat);
FMatrix(FMatrix &&mat);
FMatrix& operator=(const FMatrix &mat);
FMatrix& operator=(FMatrix &&mat);
~FMatrix();
FVector *getData();
const FVector* getConstData() const;
void makeIdentity();
int determinant() const;
private:
FVector *data;
int columns;
int rows;
void deallocateDataArray();
void allocateDataArray(int count);
};
FMatrix::FMatrix() {
data = nullptr;
columns = 0;
rows = 0;
}
FMatrix::FMatrix(int columns, int rows) {
data = nullptr;
allocateDataArray(columns);
for (int i = 0; i < columns; i++) {
data[i].setLength(rows);
}
this->columns = columns;
this->rows = rows;
}
FMatrix::FMatrix(const FMatrix &mat) {
data = nullptr;
allocateDataArray(mat.columns);
for (int i = 0; i < mat.columns; i++) {
data[i].setLength(mat.data[i].getLength());
data[i] = mat.data[i];
}
columns = mat.columns;
rows = mat.rows;
}
FMatrix::FMatrix(FMatrix &&mat) {
data = mat.data;
mat.data = nullptr;
columns = mat.columns;
rows = mat.rows;
}
FMatrix &FMatrix::operator=(const FMatrix &mat) {
deallocateDataArray();
if (data == nullptr) {
allocateDataArray(mat.columns);
for (int i = 0; i < mat.columns; i++) {
data[i].setLength(mat.rows);
data[i] = mat.data[i];
}
}
columns = mat.columns;
rows = mat.rows;
return *this;
}
FMatrix &FMatrix::operator=(FMatrix &&mat) {
deallocateDataArray();
data = mat.data;
mat.data = nullptr;
columns = mat.columns;
rows = mat.rows;
return *this;
}
FMatrix::~FMatrix() {
deallocateDataArray();
}
void FMatrix::deallocateDataArray() {
if (data != nullptr) {
delete[] data;
}
data = nullptr;
}
void FMatrix::allocateDataArray(int count) {
data = new FVector[count];
}
FVector *FMatrix::getData() {
return data;
}
void FMatrix::makeIdentity() {
for (int i = 0; i < columns; i++) {
for (int j = 0; j < rows; j++) {
if (i == j) {
data[i].getData()[j] = 1.;
}
else {
data[i].getData()[j] = 0.;
}
}
}
}
int FMatrix::determinant() const {
int det = 0;
FMatrix subMatrix(columns - 1, rows - 1);
int subi;
if (columns == rows && rows == 1) {
return data[0].getData()[0];
}
if (columns != rows) {
//throw EXCEPTIONS::SINGULAR_MATRIX;
}
if (columns == 2)
return ((data[0].getConstData()[0] * data[1].getConstData()[1]) - (data[1].getConstData()[0] * data[0].getConstData()[1]));
else {
for (int x = 0; x < columns; x++) {
subi = 0;
for (int i = 0; i < columns; i++) {
for (int j = 1; j < columns; j++) {
if (x == i) {
continue;
}
subMatrix.data[subi].getData()[j - 1] = data[i].getConstData()[j];
}
if (x != i) {
subi++;
}
}
det += (pow(-1, x) * data[x].getConstData()[0] * subMatrix.determinant());
}
}
return det;
}
const FVector* FMatrix::getConstData() const {
return data;
}
class FCore
{
public:
FCore();
~FCore();
void process();
private:
int getMaxThreads() const;
void joinThreads(std::thread *threads, int max);
};
void parallelTest(int i) {
auto start = std::chrono::high_resolution_clock::now();
FMatrix m(10, 10);
m.makeIdentity();
std::cout << "Det: " << i << "= " << m.determinant() << std::endl;
auto finish = std::chrono::high_resolution_clock::now();
auto microseconds = std::chrono::duration_cast<std::chrono::microseconds>(finish - start);
std::cout << "Time: " << microseconds.count() / 1000. << std::endl;
}
FCore::FCore()
{
}
FCore::~FCore()
{
}
void FCore::process() {
/*********************************************/
/*Set this to limit number of created threads*/
int threadCount = getMaxThreads();
/*********************************************/
/*********************************************/
std::cout << "Thread count: " << threadCount;
std::thread *threads = new std::thread[threadCount];
for (int i = 0; i < threadCount; i++) {
threads[i] = std::thread(parallelTest, i);
}
joinThreads(threads, threadCount);
delete[] threads;
getchar();
}
int FCore::getMaxThreads() const {
int count = std::thread::hardware_concurrency();
if (count == 0) {
return 1;
}
else {
return count;
}
}
void FCore::joinThreads(std::thread *threads, int max) {
for (int i = 0; i < max; i++) {
threads[i].join();
}
}
int main() {
FCore core;
core.process();
return 0;
}
Obviously I've done some testing with more primitive ones, as simple as adding numbers and it was the same. So I just wanted to ask if any of you have ever stumbled on something remotely similar to this. I know that I won't be able to get the awesome time on windows as it is on Linux, but at least the scaling could be better.
Tested on Win7/Linux intel 2C+2T and Win10 ryzen 8C+8T. Times posted are from 2C+2T

How can I return an array of matrices of differing sizes in c++?

I'm not so advanced in c++ yet, but I'm trying to perform clustering analysis,
the data, vector< vector< double>> X, is M by T, with M features and T data points, I'm trying to group features into sets in which the distance correlation between each of the features within the set is above a certain threshold. The distCorrelation function is already defined by the way.
set<vector<double>> clusterIndices(vector<vector<double>> &X, double threshold){
vector<double> feature[X.size()];
for(int i = 0; i < X.size(); i++){
for(int j = 0; j < X[0].size(); j++){
feature[i].push_back(X[i][j]);
}
}
vector<vector<double>> distCorrMatrix(X.size(), vector<double> (X.size()));
for (int i = 0; i < X.size(); i++){
for (int j = 0; j < X.size(); j++){
distCorrMatrix[i][j] = (distCorrelation(feature[i],feature[j]) >= threshold ? 1.0 : 0.0);
}
}
set<vector<double>> rows;
for (int i = 0; i < X.size(); i++){
vector<int> temp;
for (int j = 0; j < X.size(); j++){
if (distCorrMatrix[i][j] == 1){
temp.push_back(j);
}
}
rows.insert(temp);
}
return rows;
}
So the above code will produce sets of features with mutually high correlation but will only give indices of those features.
That is, the returned rows could be (1,2,5) , (3,7,8,10) ... etc which translates to (feature[1],feature[2],feature[5]) , (feature[3],feature[7],feature[8],feature[10]) ...etc in which feature[i] represents i'th row of the data matrix.
The problem is I don't know how I can create a function that turns those each sets into matrices and return them.
No, your code won't compile. You should do it like this:
// k is the number of clusters
vector<vector<vector<double> > > myFunction(vector<vector<double> > &X, int k) {
vector<vector<vector<double> > > result(k);
for (int i = 0; i < X.size(); i++){
//do something then know X[i] belongs to cluster j
result[j].push_back(X[i]);
}
return result;
}
From what I can tell, you want this
std::vector<int> myclusteringfunction(std::vector<std::vector<double> > const &dataitems)
{
/* assign a cluster id to each data item */
std::vector<int> answer;
for(i=0;i<dataitems.size();i++)
answer.push_back( /* get the cluster id for each data item */);
/* return the ids as a list of the same length as your input list
eg {0, 1, 2, 1, 1, 1, 2, 2, 0, 0, 3, 1, 1, 1, 1} for four clusters */
return answer;
}
Your input seems unclear, but we can go this way: (check function getVectorOfMatrices)
#include <vector>
#include <iostream>
/**
* A classic 2D matrix implementation.
* Pay attention to the constructors and the operator=.
*/
class Matrix2D {
public:
// Standard constructor, allocates memory and initializes.
Matrix2D(const unsigned int rows, const unsigned int columns)
: m_rows(rows), m_columns(columns) {
m_data = new float*[rows];
for(unsigned row = 0; row < rows; ++row) {
m_data[row] = new float[columns];
for (unsigned column = 0; column < columns; ++column) {
m_data[row][column] = 0;
}
}
}
// Copy-constructor - also allocates and initializes.
Matrix2D(const Matrix2D& rhs) {
m_rows = rhs.m_rows;
m_columns = rhs.m_columns;
m_data = new float*[rhs.m_rows];
for (unsigned row = 0; row < rhs.m_rows; ++row) {
m_data[row] = new float[rhs.m_columns];
for (unsigned column = 0; column < rhs.m_columns; ++column) {
m_data[row][column] = rhs.at(row, column);
}
}
}
// Affectation operator - also allocates memory and initializes.
Matrix2D& operator=(const Matrix2D& rhs) {
m_rows = rhs.m_rows;
m_columns = rhs.m_columns;
m_data = new float*[rhs.m_rows];
for (unsigned row = 0; row < rhs.m_rows; ++row) {
m_data[row] = new float[rhs.m_columns];
for (unsigned column = 0; column < rhs.m_columns; ++column) {
m_data[row][column] = rhs.at(row, column);
}
}
}
// Used to set values in the 2D matrix
// NOTA : This function should check row vs m_rows and column vs m_columns
float& at(const unsigned int row, const unsigned int column) {
return m_data[row][column];
}
// Used to get values of the 2D matrix
// NOTA : This function should check row vs m_rows and column vs m_columns
const float at(const unsigned int row, const unsigned int column) const {
return m_data[row][column];
}
// Debug tool - prints the matrix
void print() const {
for (unsigned row = 0; row < m_rows; ++row) {
for (unsigned column = 0; column < m_columns; ++column) {
std::cout << " " << m_data[row][column] << " ";
}
std::cout << std::endl;
}
}
// Destructor - deallocates the memory
~Matrix2D() {
for (unsigned int row=0; row<m_rows; ++row) {
delete[] m_data[row];
}
delete[] m_data;
}
private:
unsigned int m_rows; // y-size
unsigned int m_columns; // x-size
float** m_data; // the data
};
/*
* Function that creates and returns a vector of 2D matrices
* Matrices are of different sizes
*/
std::vector<Matrix2D> getVectorOfMatrices() {
Matrix2D m1(1,1);
Matrix2D m2(2,2);
Matrix2D m3(3,3);
Matrix2D m4(4,2);
m1.at(0, 0) = 4;
m2.at(0, 1) = 2;
m4.at(1, 1) = 8;
std::vector<Matrix2D> result;
result.push_back(m1);
result.push_back(m2);
result.push_back(m3);
result.push_back(m4);
return result;
}
/*
* Main - simply call our function.
*/
int main () {
std::vector<Matrix2D> vec = getVectorOfMatrices();
for(std::vector<Matrix2D>::iterator it = vec.begin(); it != vec.end(); ++it) {
it->print();
}
return 0;
}

Unhandled exception with C++ class function

I am writing a program which will preform texture synthesis. I have been away from C++ for a while and am having trouble figuring out what I am doing wrong in my class. When I run the program, I get an unhandled exception in the copyToSample function when it tries to access the arrays. It is being called from the bestSampleSearch function when the unhandled exception occurs. The function has been called before and works just fine, but later on in the program it is called a second time and fails. Any ideas? Let me know if anyone needs to see more code. Thanks!
Edit1: Added the bestSampleSearch function and the compareMetaPic function
Edit2: Added a copy constructor
Edit3: Added main()
Edit4: I have gotten the program to work. However there is now a memory leak of some kind or I am running out of memory when I run the program. It seems in the double for loop in main which starts "// while output picture is unfilled" is the problem. If I comment this portion out the program finishes in a timely manner but only one small square is output. Something must be wrong with my bestSampleSearch function.
MetaPic.h
#pragma once
#include <pic.h>
#include <stdlib.h>
#include <cmath>
class MetaPic
{
public:
Pic* source;
Pixel1*** meta;
int x;
int y;
int z;
MetaPic();
MetaPic(Pic*);
MetaPic(const MetaPic&);
MetaPic& operator=(const MetaPic&);
~MetaPic();
void allocateMetaPic();
void copyPixelData();
void copyToOutput(Pic*&);
void copyToMetaOutput(MetaPic&, int, int);
void copyToSample(MetaPic&, int, int);
void freeMetaPic();
};
MetaPic.cpp
#include "MetaPic.h"
MetaPic::MetaPic()
{
source = NULL;
meta = NULL;
x = 0;
y = 0;
z = 0;
}
MetaPic::MetaPic(Pic* pic)
{
source = pic;
x = pic->nx;
y = pic->ny;
z = pic->bpp;
allocateMetaPic();
copyPixelData();
}
MetaPic::MetaPic(const MetaPic& mp)
{
source = mp.source;
x = mp.x;
y = mp.y;
z = mp.z;
allocateMetaPic();
copyPixelData();
}
MetaPic::~MetaPic()
{
freeMetaPic();
}
// create a 3 dimensional array from the original one dimensional array
void MetaPic::allocateMetaPic()
{
meta = (Pixel1***)calloc(x, sizeof(Pixel1**));
for(int i = 0; i < x; i++)
{
meta[i] = (Pixel1**)calloc(y, sizeof(Pixel1*));
for(int j = 0; j < y; j++)
{
meta[i][j] = (Pixel1*)calloc(z, sizeof(Pixel1));
}
}
}
void MetaPic::copyPixelData()
{
for(int j = 0; j < y; j++)
{
for(int i = 0; i < x; i++)
{
for(int k = 0; k < z; k++)
meta[i][j][k] = source->pix[(j*z*x)+(i*z)+k];
}
}
}
void MetaPic::copyToOutput(Pic* &output)
{
for(int j = 0; j < y; j++)
{
for(int i = 0; i < x; i++)
{
for(int k = 0; k < z; k++)
output->pix[(j*z*x)+(i*z)+k] = meta[i][j][k];
}
}
}
// copy the meta data to the final pic output starting at the top left of the picture and mapped to 'a' and 'b' coordinates in the output
void MetaPic::copyToMetaOutput(MetaPic &output, int a, int b)
{
for(int j = 0; (j < y) && ((j+b) < output.y); j++)
{
for(int i = 0; (i < x) && ((i+a) < output.x); i++)
{
for(int k = 0; k < z; k++)
output.meta[i+a][j+b][k] = meta[i][j][k];
}
}
}
// copies from a source image to a smaller sample image
// *** Must make sure that the x and y coordinates have enough buffer space ***
void MetaPic::copyToSample(MetaPic &sample, int a, int b)
{
for(int j = 0; (j < sample.y) && ((b+j) < y); j++)
{
for(int i = 0; i < (sample.x) && ((a+i) < x); i++)
{
for(int k = 0; k < sample.z; k++)
{
**sample.meta[i][j][k] = meta[i+a][j+b][k];**
}
}
}
}
// free the meta pic data (MetaPic.meta)
// *** Not to be used outside of class declaration ***
void MetaPic::freeMetaPic()
{
for(int j = 0; j < y; j++)
{
for(int i = 0; i < z; i++)
free(meta[i][j]);
}
for(int i = 0; i < x; i++)
free(meta[i]);
free(meta);
}
MetaPic MetaPic::operator=(MetaPic mp)
{
MetaPic newMP(mp.source);
return newMP;
}
main.cpp
#ifdef WIN32
// For VC++ you need to include this file as glut.h and gl.h refer to it
#include <windows.h>
// disable the warning for the use of strdup and friends
#pragma warning(disable:4996)
#endif
#include <stdio.h> // Standard Header For Most Programs
#include <stdlib.h> // Additional standard Functions (exit() for example)
#include <iostream>
// Interface to libpicio, provides functions to load/save jpeg files
#include <pic.h>
#include <string.h>
#include <time.h>
#include <cmath>
#include "MetaPic.h"
using namespace std;
MetaPic bestSampleSearch(MetaPic, MetaPic);
double compareMetaPics(MetaPic, MetaPic);
#define SAMPLE_SIZE 23
#define OVERLAP 9
// Texture source image (pic.h uses the Pic* data structure)
Pic *sourceImage;
Pic *outputImage;
int main(int argc, char* argv[])
{
char* pictureName = "reg1.jpg";
int outputWidth = 0;
int outputHeight = 0;
// attempt to read in the file name
sourceImage = pic_read(pictureName, NULL);
if(sourceImage == NULL)
{
cout << "Couldn't read the file" << endl;
system("pause");
exit(EXIT_FAILURE);
}
// *** For now set the output image to 3 times the original height and width ***
outputWidth = sourceImage->nx*3;
outputHeight = sourceImage->ny*3;
// allocate the output image
outputImage = pic_alloc(outputWidth, outputHeight, sourceImage->bpp, NULL);
Pic* currentImage = pic_alloc(SAMPLE_SIZE, SAMPLE_SIZE, sourceImage->bpp, NULL);
MetaPic metaSource(sourceImage);
MetaPic metaOutput(outputImage);
MetaPic metaCurrent(currentImage);
// seed the output image
int x = 0;
int y = 0;
int xupperbound = metaSource.x - SAMPLE_SIZE;
int yupperbound = metaSource.y - SAMPLE_SIZE;
int xlowerbound = 0;
int ylowerbound = 0;
// find random coordinates
srand(time(NULL));
while((x >= xupperbound) || (x <= xlowerbound))
x = rand() % metaSource.x;
while((y >= yupperbound) || (y <= ylowerbound))
y = rand() % metaSource.y;
// copy a random sample from the source to the metasample
metaSource.copyToSample(metaCurrent, x, y);
// copy the seed to the metaoutput
metaCurrent.copyToMetaOutput(metaOutput, 0, 0);
int currentOutputX = 0;
int currentOutputY = 0;
// while the output picture is unfilled...
for(int j = 0; j < yupperbound; j+=(SAMPLE_SIZE-OVERLAP))
{
for(int i = 0; i < xupperbound; i+=(SAMPLE_SIZE-OVERLAP))
{
// move the sample to correct overlap
metaSource.copyToSample(metaCurrent, i, j);
// find the best match for the sample
metaCurrent = bestSampleSearch(metaSource, metaCurrent);
// write the best match to the metaoutput
metaCurrent.copyToMetaOutput(metaOutput, i, j);
// update the values
}
}
// copy the metaOutput to the output
metaOutput.copyToOutput(outputImage);
// output the image
pic_write("reg1_output.jpg", outputImage, PIC_JPEG_FILE);
// clean up
pic_free(sourceImage);
pic_free(outputImage);
pic_free(currentImage);
// return success
cout << "Done!" << endl;
system("pause");
// return success
return 0;
}
// finds the best sample to insert into the image
// *** best must be the sample which consists of the overlap ***
MetaPic bestSampleSearch(MetaPic source, MetaPic best)
{
MetaPic metaSample(best);
double bestScore = 999999.0;
double currentScore = 0.0;
for(int j = 0; j < source.y; j++)
{
for(int i = 0; i < source.x; i++)
{
// copy the image starting at the top left of the source image
source.copyToSample(metaSample, i, j);
// compare the sample with the overlap
currentScore = compareMetaPics(best, metaSample);
// if best score is greater than current score then copy the better sample to best and continue searching
if( bestScore > currentScore)
{
metaSample.copyToSample(best, 0, 0);
bestScore = currentScore;
}
// otherwise, the score is less than current score then do nothing (a better sample has not been found)
}
}
return best;
}
// find the comparison score for the two MetaPics based on their rgb values
// *** Both of the meta pics should be the same size ***
double compareMetaPics(MetaPic pic1, MetaPic pic2)
{
float r1 = 0.0;
float g1 = 0.0;
float b1 = 0.0;
float r2 = 0.0;
float g2 = 0.0;
float b2 = 0.0;
float r = 0.0;
float g = 0.0;
float b = 0.0;
float sum = 0.0;
// take the sum of the (sqrt((r1-r2)^2 + ((g1-g2)^2 + ((b1-b2)^2))
for(int j = 0; (j < pic1.y) && (j < pic2.y); j++)
{
for(int i = 0; (i < pic1.x) && (i < pic2.x); i++)
{
r1 = PIC_PIXEL(pic1.source, i, j, 0);
r2 = PIC_PIXEL(pic2.source, i, j, 0);
g1 = PIC_PIXEL(pic1.source, i, j, 1);
g2 = PIC_PIXEL(pic2.source, i, j, 1);
b1 = PIC_PIXEL(pic1.source, i, j, 2);
b2 = PIC_PIXEL(pic2.source, i, j, 2);
r = r1 - r2;
g = g1 - g2;
b = b1 - b2;
sum += sqrt((r*r) + (g*g) + (b*b));
}
}
return sum;
}
I'm not sure if this is the root cause of the problem, but your assignment operator does not actually assign anything:
MetaPic MetaPic::operator=(MetaPic mp)
{
MetaPic newMP(mp.source);
return newMP;
}
This should probably look something like the following (based off of the code in your copy constructor):
edit: with credit to Alf P. Steinbach
MetaPic& MetaPic::operator=(MetaPic mp)
{
mp.swap(*this);
return *this;
}
It turns out that the deallocate function is incorrect. It should be freeing in the same manner that it was allocating.
void MetaPic::freeMetaPic()
{
for(int j = 0; j < y; j++)
{
for(int i = 0; i < z; i++)
free(meta[i][j]);
}
for(int i = 0; i < x; i++)
free(meta[i]);
free(meta);
}