So I have implemented a fully connected one hidden layer neural network in C++ using Eigen for matrix multiplication. It uses minibatch gradient descent.
However, my model cannot get above 50% accuracy on mnist. I have tried learning rates from between 0.0001 and 10. The model does overfit on training sizes < 100 (with ~90% accuracy which is still pretty bad), albeit extremely slowly.
What might be causing this low accuracy and extremely slow learning? My main concern is that the backpropagation is incorrect. Furthermore, I would prefer not to add any other optimization techniques (learning rate schedule, regularization, etc.).
Feed forward and backprop code:
z1 = (w1 * mbX).colwise() + b1;
a1 = sigmoid(z1);
z2 = (w2 * a1).colwise() + b2;
a2 = sigmoid(z2);
MatrixXd err = ((double) epsilon)/((double) minibatch_size) * ((a2 - mbY).array() * sigmoid_derivative(z2).array()).matrix();
b2 = b2 - err * ones;
w2 = w2 - (err * a1.transpose());
err = ((w2.transpose() * err).array() * sigmoid_derivative(z1).array()).matrix();
b1 = b1 - err * ones;
w1 = w1 - (err * mbX.transpose());
Full program code:
#include <iostream>
#include <fstream>
#include <math.h>
#include <cstdlib>
#include <Eigen/Dense>
#include <vector>
#include <string>
using namespace Eigen;
#define N 30
#define epsilon 0.7
#define epoch 1000
const int minibatch_size = 10;
const int training_size = 10000;
const int val_size = 10;
unsigned int num, magic, rows, cols;
unsigned int image[training_size][28][28];
unsigned int val_image[val_size][28][28];
unsigned int label[training_size];
unsigned int val_label[val_size];
MatrixXd X(784, training_size);
MatrixXd Y = MatrixXd::Zero(10, training_size);
MatrixXd mbX(784, minibatch_size);
MatrixXd mbY = MatrixXd::Zero(10, minibatch_size);
MatrixXd Xv(784, val_size);
MatrixXd Yv = MatrixXd::Zero(10, val_size);
//Image processing courtesy of
unsigned int in(std::ifstream& icin, unsigned int size) {
unsigned int ans = 0;
for (int i = 0; i < size; i++) {
unsigned char x;*)&x, 1);
unsigned int temp = x;
ans <<= 8;
ans += temp;
return ans;
void input(std::string ipath, std::string lpath, std::string ipath2, std::string lpath2) {
std::ifstream icin;
//training data, std::ios::binary);
magic = in(icin, 4), num = in(icin, 4), rows = in(icin, 4), cols = in(icin, 4);
for (int i = 0; i < training_size; i++) {
int val = 0;
for (int x = 0; x < rows; x++) {
for (int y = 0; y < cols; y++) {
image[i][x][y] = in(icin, 1);
X(val, i) = image[i][x][y]/255;
//training labels, std::ios::binary);
magic = in(icin, 4), num = in(icin, 4);
for (int i = 0; i < training_size; i++) {
label[i] = in(icin, 1);
Y(label[i], i) = 1;
//validation data, std::ios::binary);
magic = in(icin, 4), num = in(icin, 4), rows = in(icin, 4), cols = in(icin, 4);
for (int i = 0; i < val_size; i++) {
int val = 0;
for (int x = 0; x < rows; x++) {
for (int y = 0; y < cols; y++) {
val_image[i][x][y] = in(icin, 1);
Xv(val, i) = val_image[i][x][y]/255;
//validation labels, std::ios::binary);
magic = in(icin, 4), num = in(icin, 4);
for (int i = 0; i < val_size; i++) {
val_label[i] = in(icin, 1);
Yv(val_label[i], i) = 1;
//Neural Network calculations
MatrixXd sigmoid(MatrixXd m) {
m *= -1;
return (1/(1 + m.array().exp())).matrix();
MatrixXd sigmoid_derivative(MatrixXd m) {
return (sigmoid(m).array() * (1 - sigmoid(m).array())).matrix();
//Initialize weights and biases
//hidden layer
VectorXd b1 = MatrixXd::Zero(N, 1);
MatrixXd w1 = MatrixXd::Random(N, 784);
VectorXd b2 = MatrixXd::Zero(10, 1);
MatrixXd w2 = MatrixXd::Random(10, N);
//Initialize intermediate values
MatrixXd z1, z2, a1, a2, z1v, z2v, a1v, a2v;
MatrixXd ones = MatrixXd::Constant(minibatch_size, 1, 1);
int main() {
input("C:\\Users\\Aaron\\Documents\\Test\\train-images-idx3-ubyte\\train-images.idx3-ubyte", "C:\\Users\\Aaron\\Documents\\Test\\train-labels-idx1-ubyte\\train-labels.idx1-ubyte", "C:\\Users\\Aaron\\Documents\\Test\\t10k-images-idx3-ubyte\\t10k-images.idx3-ubyte", "C:\\Users\\Aaron\\Documents\\Test\\t10k-labels-idx1-ubyte\\t10k-labels.idx1-ubyte");
std::cout << "Finished Image Processing" << std::endl;
//std::cout << w1 << std::endl;
std::vector<double> val_ac;
std::vector<double> c;
std::vector<int> order;
for (int i = 0; i < training_size; i++) {
for (int i = 0; i < epoch; i++) {
//feed forward
std::random_shuffle(order.begin(), order.end());
for (int j = 0; j < training_size/minibatch_size; j++) {
for (int k = 0; k < minibatch_size; k++) {
int index = order[j * minibatch_size + k];
mbX.col(k) = X.col(index);
mbY.col(k) = Y.col(index);
z1 = (w1 * mbX).colwise() + b1;
a1 = sigmoid(z1);
z2 = (w2 * a1).colwise() + b2;
a2 = sigmoid(z2);
MatrixXd err = ((double) epsilon)/((double) minibatch_size) * ((a2 - mbY).array() * sigmoid_derivative(z2).array()).matrix();
//std::cout << err << std::endl;
b2 = b2 - err * ones;
w2 = w2 - (err * a1.transpose());
err = ((w2.transpose() * err).array() * sigmoid_derivative(z1).array()).matrix();
//std::cout << err << std::endl;
b1 = b1 - err * ones;
w1 = w1 - (err * mbX.transpose());
z1 = (w1 * X).colwise() + b1;
a1 = sigmoid(z1);
z2 = (w2 * a1).colwise() + b2;
a2 = sigmoid(z2);
double cost = 1/((double) training_size) * ((a2 - Y).array() * (a2 - Y).array()).matrix().sum();
int correct = 0;
for (int i = 0; i < training_size; i++) {
double maxP = -1;
int na;
for (int j = 0; j < 10; j++) {
if (a2(j, i) > maxP) {
maxP = a2(j, i);
na = j;
if (na == label[i]) correct++;
val_ac.push_back(((double) correct) / ((double) training_size));
std::cout << "Finished Epoch " << i + 1 << std::endl;
std::cout << "Cost: " << cost << std::endl;
std::cout << "Accuracy: " << ((double) correct) / ((double) training_size) << std::endl;
//plot accuracy
FILE * gp = _popen("gnuplot", "w");
fprintf(gp, "set terminal wxt size 600,400 \n");
fprintf(gp, "set grid \n");
fprintf(gp, "set title '%s' \n", "NN");
fprintf(gp, "plot '-' w line, '-' w lines \n");
for (int i = 0; i < epoch; i++) {
fprintf(gp, "%f %f \n", i + 1.0, c[i]);
fprintf(gp, "e\n");
//validation accuracy
for (int i = 0; i < epoch; i++) {
fprintf(gp, "%f %f \n", i + 1.0, val_ac[i]);
fprintf(gp, "e\n");
return 0;
Here is a graph of the accuracy on the training dataset (green) and the loss (purple)
Here is a graph of the loss for the training data and validation data:
The loss of the validation data is increasing past a certain point, which shows signs of overfitting. However, the accuracy still remains abysmal even on the training data.
unsigned int val_image[val_size][28][28];
Xv(val, i) = val_image[i][x][y]/255;
Can you try again with Xv(val, i) = val_image[i][x][y] / 255.0;
There too:
X(val, i) = image[i][x][y]/255;
With the code as written, Xv is 0 very often, and 1, when the image as value 255. With a floating point division, you'll get value between 0.0 and 1.0.
You'll need to check your code for other places where you may be dividing integers.
N.b.: In C++, 240/255 is 0.
I'm translating Python's version of 'page_dewarper' ( into C++. I'm going to use dlib, which is a fantastic tool, that helped me in a few optimization problems before. In line 748 of Github repo ( Matt uses optimize function from Scipy, to find the minimal distance between two vectors. I think, my C++ equivalent should be solve_least_squares_lm() or solve_least_squares(). I'll give a concrete example to analyze.
My data:
a) dstpoints is a vector with OpenCV points - std::vector<cv::Point2f> (I have 162 points in this example, they are not changing),
b) ppts is also std::vector<cv::Point2f> and the same size as dstpoints.
std::vector<cv::Point2f> ppts = project_keypoints(params, input);
It is dependent on:
- dlib::column_vector 'input' is 2*162=324 long and is not changing,
- dlib::column_vector 'params' is 189 long and its values should be changed to get the minimal value of variable 'suma', something like this:
double suma = 0.0;
for (int i=0; i<dstpoints_size; i++)
suma += pow(dstpoints[i].x - ppts[i].x, 2);
suma += pow(dstpoints[i].y - ppts[i].y, 2);
I'm looking for 'params' vector that will give me the smallest value of 'suma' variable. Least squares algorithm seems to be a good option to solve it:, but I don't know if it is good for my case.
I think, my problem is that for every different 'params' vector I get different 'ppts' vector, not only single value, and I don't know if solve_least_squares function can match my example.
I must calculate residual for every point. I think, my 'list' from aforementioned link should be something like this:
(ppts[i].x - dstpoints[i].x, ppts[i].y - dstpoints[i].y, ppts[i+1].x - dstpoints[i+1].x, ppts[i+1].y - dstpoints[i+1].y, etc.)
, where 'ppts' vector depends on 'params' vector and then this problem can be solved with least squares algorithm. I don't know how to create data_samples with these assumptions, because it requires dlib::input_vector for every sample, as it is shown in example:
Am I thinking right?
I'm doing the same thing this days. My solution is writing a Powell Class by myself. It works, but really slowly. The program takes 2 minutes in dewarping linguistics_thesis.jpg.
I don't know what cause the program running so slowly. Maybe because of the algorithm or the code has some extra loop. I'm a Chinese student and my school only have java lessons. So it is normal if you find some extra codes in my codes.
Here is my Powell class.
using namespace std;
using namespace cv;
class MyPowell
vector<vector<double>> xi;
vector<double> pcom;
vector<double> xicom;
vector<Point2d> dstpoints;
vector<double> myparams;
vector<double> params;
vector<Point> keypoint_index;
Point2d dst_br;
Point2d dims;
int N;
int itmax;
int ncom;
int iter;
double fret, ftol;
int usingAorB;
MyPowell(vector<Point2d> &dstpoints, vector<double> ¶ms, vector<Point> &keypoint_index);
MyPowell(Point2d &dst_br, vector<double> ¶ms, Point2d & dims);
double obj(vector<double> ¶ms);
void powell(vector<double> &p, vector<vector<double>> &xi, double ftol, double &fret);
double sign(double a);// , double b);
double sqr(double a);
void linmin(vector<double> &p, vector<double> &xit, int n, double &fret);
void mnbrak(double & ax, double & bx, double & cx,
double & fa, double & fb, double & fc);
double f1dim(double x);
double brent(double ax, double bx, double cx, double & xmin, double tol);
vector<double> usePowell();
void erase(vector<double>& pbar, vector<double> &prr, vector<double> &pr);
MyPowell::MyPowell(vector<Point2d> &dstpoints, vector<double>& params, vector<Point> &keypoint_index)
this->dstpoints = dstpoints;
this->myparams = params;
this->keypoint_index = keypoint_index;
N = params.size();
itmax = N * N;
usingAorB = 1;
MyPowell::MyPowell(Point2d & dst_br, vector<double>& params, Point2d & dims)
this->dst_br = dst_br;
this->params = params;
this->dims = dims;
N = 2;
itmax = N * 1000;
usingAorB = 2;
usingAorB = 3;
double MyPowell::obj(vector<double> &myparams)
if (1 == usingAorB)
vector<Point2d> ppts = Dewarp::projectKeypoints(keypoint_index, myparams);
double total = 0;
for (int i = 0; i < ppts.size(); i++)
double x = dstpoints[i].x - ppts[i].x;
double y = dstpoints[i].y - ppts[i].y;
total += (x * x + y * y);
return total;
else if(2 == usingAorB)
dims.x = myparams[0];
dims.y = myparams[1];
//cout << "dims.x " << dims.x << " dims.y " << dims.y << endl;
vector<Point2d> vdims = { dims };
vector<Point2d> proj_br = Dewarp::projectXY(vdims, params);
double total = 0;
double x = dst_br.x - proj_br[0].x;
double y = dst_br.y - proj_br[0].y;
total += (x * x + y * y);
return total;
return 0;
void MyPowell::powell(vector<double> &x, vector<vector<double>> &direc, double ftol, double &fval)
vector<double> x1;
vector<double> x2;
vector<double> direc1;
int myitmax = 20;
myitmax = 10;
else if (N > 300)
myitmax = 15;
double fx2, t, fx, dum, delta;
fval = obj(x);
int bigind;
for (int j = 0; j < N; j++)
int iter = 0;
while (true)
iter += 1;
fx = fval;
bigind = 0;
delta = 0.0;
for (int i = 0; i < N; i++)
direc1 = direc[i];
fx2 = fval;
linmin(x, direc1, N, fval);
if (fabs(fx2 - fval) > delta)
delta = fabs(fx2 - fval);
bigind = i;
if (2.0 * fabs(fx - fval) <= ftol * (fabs(fx) + fabs(fval)) + 1e-7)
erase(direc1, x2, x1);
if (iter >= itmax)
cout << "powell exceeding maximum iterations" << endl;
if (!x2.empty())
for (int j = 0; j < N; j++)
x2.push_back(2.0*x[j] - x1[j]);
direc1[j] = x[j] - x1[j];
x1[j] = x[j];
cout << fx2 << endl;
fx2 = obj(x2);
if (myitmax < 0)
} while (fx2 >= fx);
dum = fx - 2 * fval + fx2;
t = 2.0*dum*pow((fx - fval - delta), 2) - delta * pow((fx - fx2), 2);
} while (t >= 0.0);
linmin(x, direc1, N, fval);
direc[bigind] = direc1;
double MyPowell::sign(double a)//, double b)
if (a > 0.0)
return 1;
if (a < 0.0)
return -1;
return 0;
double MyPowell::sqr(double a)
return a * a;
void MyPowell::linmin(vector<double>& p, vector<double>& xit, int n, double &fret)
double tol = 1e-2;
ncom = n;
pcom = p;
xicom = xit;
double ax = 0.0;
double xx = 1.0;
double bx = 0.0;
double fa, fb, fx, xmin;
mnbrak(ax, xx, bx, fa, fx, fb);
fret = brent(ax, xx, bx, xmin, tol);
for (int i = 0; i < n; i++)
xit[i] = (xmin * xit[i]);
p[i] += xit[i];
void MyPowell::mnbrak(double & ax, double & bx, double & cx,
double & fa, double & fb, double & fc)
const double GOLD = 1.618034, GLIMIT = 110.0, TINY = 1e-20;
double val, fw, tmp2, tmp1, w, wlim;
double denom;
fa = f1dim(ax);
fb = f1dim(bx);
if (fb > fa)
val = ax;
ax = bx;
bx = val;
val = fb;
fb = fa;
fa = val;
cx = bx + GOLD * (bx - ax);
fc = f1dim(cx);
int iter = 0;
while (fb >= fc)
tmp1 = (bx - ax) * (fb - fc);
tmp2 = (bx - cx) * (fb - fa);
val = tmp2 - tmp1;
if (fabs(val) < TINY)
denom = 2.0*TINY;
denom = 2.0*val;
w = bx - ((bx - cx)*tmp2 - (bx - ax)*tmp1) / (denom);
wlim = bx + GLIMIT * (cx - bx);
if ((bx - w) * (w - cx) > 0.0)
fw = f1dim(w);
if (fw < fc)
ax = bx;
fa = fb;
bx = w;
fb = fw;
else if (fw > fb)
cx = w;
fc = fw;
w = cx + GOLD * (cx - bx);
fw = f1dim(w);
if ((cx - w)*(w - wlim) >= 0.0)
fw = f1dim(w);
if (fw < fc)
bx = cx;
cx = w;
w = cx + GOLD * (cx - bx);
fb = fc;
fc = fw;
fw = f1dim(w);
else if ((w - wlim)*(wlim - cx) >= 0.0)
w = wlim;
fw = f1dim(w);
w = cx + GOLD * (cx - bx);
fw = f1dim(w);
ax = bx;
bx = cx;
cx = w;
fa = fb;
fb = fc;
fc = fw;
double MyPowell::f1dim(double x)
vector<double> xt;
for (int j = 0; j < ncom; j++)
xt.push_back(pcom[j] + x * xicom[j]);
return obj(xt);
double MyPowell::brent(double ax, double bx, double cx, double & xmin, double tol = 1.48e-8)
const double CGOLD = 0.3819660, ZEPS = 1.0e-4;
int itmax = 500;
double a = MIN(ax, cx);
double b = MAX(ax, cx);
double v = bx;
double w = v, x = v;
double deltax = 0.0;
double fx = f1dim(x);
double fv = fx;
double fw = fx;
double rat = 0, u = 0, fu;
int iter;
int done;
double dx_temp, xmid, tol1, tol2, tmp1, tmp2, p;
for (iter = 0; iter < 500; iter++)
xmid = 0.5 * (a + b);
tol1 = tol * fabs(x) + ZEPS;
tol2 = 2.0*tol1;
if (fabs(x - xmid) <= (tol2 - 0.5*(b - a)))
done = -1;
if (fabs(deltax) > tol1)
tmp1 = (x - w) * (fx - fv);
tmp2 = (x - v) * (fx - fw);
p = (x - v) * tmp2 - (x - w) * tmp1;
tmp2 = 2.0 * (tmp2 - tmp1);
if (tmp2 > 0.0)
p = -p;
tmp2 = fabs(tmp2);
dx_temp = deltax;
deltax = rat;
if ((p > tmp2 * (a - x)) && (p < tmp2 * (b - x)) &&
fabs(p) < fabs(0.5 * tmp2 * dx_temp))
rat = p / tmp2;
u = x + rat;
if ((u - a) < tol2 || (b - u) < tol2)
rat = fabs(tol1) * sign(xmid - x);
done = 0;
if (x >= xmid)
deltax = a - x;
deltax = b - x;
rat = CGOLD * deltax;
if (fabs(rat) >= tol1)
u = x + rat;
u = x + fabs(tol1) * sign(rat);
fu = f1dim(u);
if (fu > fx)
if (u < x)
a = u;
b = u;
if (fu <= fw || w == x)
v = w;
w = u;
fv = fw;
fw = fu;
else if (fu <= fv || v == x || v == w)
v = u;
fv = fu;
if (u >= x)
a = x;
b = x;
v = w;
w = x;
x = u;
fv = fw;
fw = fx;
fx = fu;
if(iter > itmax)
cout << "\n Brent exceed maximum iterations.\n\n";
xmin = x;
return fx;
vector<double> MyPowell::usePowell()
ftol = 1e-4;
vector<vector<double>> xi;
for (int i = 0; i < N; i++)
vector<double> xii;
for (int j = 0; j < N; j++)
double fret = 0;
powell(myparams, xi, ftol, fret);
//for (int i = 0; i < xi.size(); i++)
// double a = obj(xi[i]);
// if (fret > a)
// {
// fret = a;
// myparams = xi[i];
// }
cout << "final result" << fret << endl;
return myparams;
void MyPowell::erase(vector<double>& pbar, vector<double>& prr, vector<double>& pr)
for (int i = 0; i < pbar.size(); i++)
pbar[i] = 0;
for (int i = 0; i < prr.size(); i++)
prr[i] = 0;
for (int i = 0; i < pr.size(); i++)
pr[i] = 0;
I used PRAXIS library, because it doesn't need derivative information and is fast.
I modified the code a little to my needs and now it is faster than original version written in Python.
Closed. This question needs debugging details. It is not currently accepting answers.
Edit the question to include desired behavior, a specific problem or error, and the shortest code necessary to reproduce the problem. This will help others answer the question.
Closed 6 years ago.
Improve this question
I am trying to run a code scheme published in the following paper:
Specifically, the implementation of the code is from:
Following successful compilation of the below code on Mac OS X El Capitan using gcc Fortran, I get an executable file as expected from the code below. However, when I try to execute this file I get
segmentation fault 11.
After some research I think this is a recursion problem causing the stack to overflow, but I have no idea how to solve this. Could someone point me in the right direction please?
#include <stdio.h>
#include <math.h>
#include "./3rdparty/dSFMT-src-2.0/dSFMT.c"
double x1,
double x2,
double x3,
double x4,
double k1;
double k2;
double k3;
int x1ctr[100][5][5][5][5][5][5][5];
int x2ctr[100][5][5][5][5][5][5][5];
int x3ctr[100][5][5][5][5][5][5];
int x4ctr[100][5][5][5][5];
fx1(double x1)
return -(k1 * x1 * x3 - k2 * x2) + k3 * x2;
fx2(double x2)
return (k1 * x1 * x3 - k2 * x2) - k3 * x2;
fx3(double x3)
return -(k1 * x1 * x3 - k2 * x2);
fx4(double x4)
return k3 * x2;
discretize(double v, double xi[], int length)
for (int j = 1; j < length - 1; j++)
if (v < xi[j])
return j - 1;
return length - 2;
main(int argc, char *argv[])
int myid = atoi(argv[1]);
double dt = 0.01,
halfdt = dt / 2.0;
int tps = (int) (10.0 / dt),
int block = tps / 100,
double halfF1,
double x1i[] = { 0.0, 3.0, 6.0, 9.0, 12.0, 15.0 };
double x2i[] = { 0.0, 3.0, 6.0, 9.0, 12.0, 15.0 };
double x3i[] = { 0.0, 3.0, 6.0, 9.0, 12.0, 15.0 };
double x4i[] = { 0.0, 3.0, 6.0, 9.0, 12.0, 15.0 };
int x1pre,
x1init = 2;
int x2pre,
x2init = 0;
int x3pre,
x3init = 4;
int x4pre,
x4init = 0;
double k1i[] =
{ 0.0, 0.2, 0.4, 0.6000000000000001, 0.8, 1.0 };
double k2i[] =
{ 0.0, 0.2, 0.4, 0.6000000000000001, 0.8, 1.0 };
double k3i[] =
{ 0.0, 0.2, 0.4, 0.6000000000000001, 0.8, 1.0 };
int k1bin;
int k2bin;
int k3bin;
int sampleNo = 1000;
dsfmt_t dsfmt;
int seed = 7018 + myid;
dsfmt_init_gen_rand(&dsfmt, seed);
for (int i = 0; i < sampleNo; i++) {
k1 = 0.0 + dsfmt_genrand_close_open(&dsfmt) * 1.0;
k2 = 0.0 + dsfmt_genrand_close_open(&dsfmt) * 1.0;
k3 = 0.0 + dsfmt_genrand_close_open(&dsfmt) * 1.0;
x1 = x1i[x1init] +
dsfmt_genrand_close_open(&dsfmt) * (x1i[x1init + 1] -
x2 = x2i[x2init] +
dsfmt_genrand_close_open(&dsfmt) * (x2i[x2init + 1] -
x3 = x3i[x3init] +
dsfmt_genrand_close_open(&dsfmt) * (x3i[x3init + 1] -
x4 = x4i[x4init] +
dsfmt_genrand_close_open(&dsfmt) * (x4i[x4init + 1] -
x1preN = x1;
x2preN = x2;
x3preN = x3;
x4preN = x4;
k1bin = discretize(k1, k1i, 6);
k2bin = discretize(k2, k2i, 6);
k3bin = discretize(k3, k3i, 6);
for (int t = 1; t <= tps; t++) {
// x1
halfF1 = halfdt * fx1(x1);
halfF2 = halfdt * fx1(x1 + halfF1);
F3 = dt * fx1(x1 + halfF2);
F4 = dt * fx1(x1 + F3);
x1p = x1 + (2 * halfF1 + 4 * halfF2 + 2 * F3 + F4) / 6.0;
// x2
halfF1 = halfdt * fx2(x2);
halfF2 = halfdt * fx2(x2 + halfF1);
F3 = dt * fx2(x2 + halfF2);
F4 = dt * fx2(x2 + F3);
x2p = x2 + (2 * halfF1 + 4 * halfF2 + 2 * F3 + F4) / 6.0;
// x3
halfF1 = halfdt * fx3(x3);
halfF2 = halfdt * fx3(x3 + halfF1);
F3 = dt * fx3(x3 + halfF2);
F4 = dt * fx3(x3 + F3);
x3p = x3 + (2 * halfF1 + 4 * halfF2 + 2 * F3 + F4) / 6.0;
// x4
halfF1 = halfdt * fx4(x4);
halfF2 = halfdt * fx4(x4 + halfF1);
F3 = dt * fx4(x4 + halfF2);
F4 = dt * fx4(x4 + F3);
x4p = x4 + (2 * halfF1 + 4 * halfF2 + 2 * F3 + F4) / 6.0;
if (t % block == 0) {
tb = t / block - 1;
x1pre = discretize(x1preN, x1i, 6);
x2pre = discretize(x2preN, x2i, 6);
x3pre = discretize(x3preN, x3i, 6);
x4pre = discretize(x4preN, x4i, 6);
x1post = discretize(x1, x1i, 6);
x2post = discretize(x2, x2i, 6);
x3post = discretize(x3, x3i, 6);
x4post = discretize(x4, x4i, 6);
x1preN = x1;
x2preN = x2;
x3preN = x3;
x4preN = x4;
x1 = x1p;
x2 = x2p;
x3 = x3p;
x4 = x4p;
// output
FILE *out;
char buffer[256];
snprintf(buffer, sizeof(buffer), "dummy.txt");
int idx = 0;
for (tb = 0; tb < 100; tb++) {
snprintf(buffer, sizeof(buffer),
"./models/toy/batct/toyCTx1T%d_%d.txt", tb, myid);
out = fopen(buffer, "w");
idx = 0;
for (int ki0 = 0; ki0 < 5; ki0++)
for (int ki1 = 0; ki1 < 5; ki1++)
for (int ki2 = 0; ki2 < 5; ki2++)
for (int vi0 = 0; vi0 < 5; vi0++)
for (int vi1 = 0; vi1 < 5; vi1++)
for (int vi2 = 0; vi2 < 5; vi2++)
for (int vi = 0; vi < 5; vi++) {
int ctrtmp =
if (ctrtmp > 0) {
fprintf(out, "%d %d\n", idx,
snprintf(buffer, sizeof(buffer),
"./models/toy/batct/toyCTx2T%d_%d.txt", tb, myid);
out = fopen(buffer, "w");
idx = 0;
for (int ki0 = 0; ki0 < 5; ki0++)
for (int ki1 = 0; ki1 < 5; ki1++)
for (int ki2 = 0; ki2 < 5; ki2++)
for (int vi0 = 0; vi0 < 5; vi0++)
for (int vi1 = 0; vi1 < 5; vi1++)
for (int vi2 = 0; vi2 < 5; vi2++)
for (int vi = 0; vi < 5; vi++) {
int ctrtmp =
if (ctrtmp > 0) {
fprintf(out, "%d %d\n", idx,
snprintf(buffer, sizeof(buffer),
"./models/toy/batct/toyCTx3T%d_%d.txt", tb, myid);
out = fopen(buffer, "w");
idx = 0;
for (int ki0 = 0; ki0 < 5; ki0++)
for (int ki1 = 0; ki1 < 5; ki1++)
for (int vi0 = 0; vi0 < 5; vi0++)
for (int vi1 = 0; vi1 < 5; vi1++)
for (int vi2 = 0; vi2 < 5; vi2++)
for (int vi = 0; vi < 5; vi++) {
int ctrtmp =
if (ctrtmp > 0) {
fprintf(out, "%d %d\n", idx, ctrtmp);
snprintf(buffer, sizeof(buffer),
"./models/toy/batct/toyCTx4T%d_%d.txt", tb, myid);
out = fopen(buffer, "w");
idx = 0;
for (int ki0 = 0; ki0 < 5; ki0++)
for (int vi0 = 0; vi0 < 5; vi0++)
for (int vi1 = 0; vi1 < 5; vi1++)
for (int vi = 0; vi < 5; vi++) {
int ctrtmp =
if (ctrtmp > 0) {
fprintf(out, "%d %d\n", idx, ctrtmp);
return 0;
Perhaps there are others, but I see only two things that can generate a crash.
you get the myid integer from this instruction
int myid = atoi(argv[1]);
But if you call the program without passing the id parameter? argv[1] is NULL. Crash!
Suggestion: define a default id and check argc; something like
int myid = (argc > 1 ? atoi(argv[1]) : defId);
you fopen() the output files but you don't check the success of the opening; so, when you write in the files, like in
fprintf(out, "%d %d\n", idx,
in case of failure, in opening the file, out is NULL. Crash!
Suggestion: check the opening of the output files (out != NULL).
p.s.: sorry for my bad English.
I am using this as my reference to implement my version of Bicubic interpolation for resizing the images. Here is the function that I have so far with some changes.
IplImage * bicubic(IplImage *img, int newWidth, int newHeight)
IplImage *img2 ;
img2 = createImage(newWidth,newHeight);
uchar * data = (uchar*)img->imageData;
uchar * Data = (uchar*)img2->imageData;
//int a,b,c,index;
uchar Cc;
uchar C[5];
uchar d0,d2,d3,a0,a1,a2,a3;
int i,j,k,jj;
int x,y;
float dx,dy;
float tx,ty;
tx = (float)img->width /newWidth ;
ty = (float)img->height / newHeight;
printf("New Width = %d, New Height = %d WidthStep = %d", newWidth, newHeight,img->widthStep);
for(i = 0; i< newHeight; i++)
for(j = 0; j< newWidth; j++)
x = (int)(tx * j);
y = (int)(ty * i);
dx = tx * j - x;
dy = ty * i - y;
for(k = 0;k < 3;k++)
for(jj = 0;jj <= 3 ;jj++)
int z = (y - 1 + jj);
//if(z > -1){
a0 = data[z * img->widthStep + (x)*img->nChannels +k];//===>Throws of runtime error
d0 = data[z * img->widthStep + (x-1)*img->nChannels +k] - a0 ;
d2 = data[z * img->widthStep + (x+1)*img->nChannels +k] - a0 ;
d3 = data[z * img->widthStep + (x+2)*img->nChannels +k] - a0 ;
a1 = -1.0/3 * d0 + d2 -1.0/6*d3;
a2 = 1.0/2 * d0 + 1.0/2*d2;
a3 = -1.0/6 * d0 - 1.0/2*d2 + 1.0/6*d3;
C[jj] = a0 + a1*dx + a2*dx*dx + a3*dx*dx*dx;
d0 = C[0]-C[1];
d2 = C[2]-C[1];
d3 = C[3]-C[1];
a0 = C[1];
a1 = -1.0/3*d0 + d2 -1.0/6*d3;
a2 = 1.0/2*d0 + 1.0/2*d2;
a3 = -1.0/6*d0 - 1.0/2*d2 + 1.0/6*d3;
Cc = a0 + a1*dy + a2*dy*dy + a3*dy*dy*dy;
Data[i*img2->widthStep +j*img2->nChannels +k ] = Cc;
return img2;
The problem that I am facing is that when I call this bicubic function, it throws off an invalid access runtime error at the line where I find out the value of a0. I am using VS 2012 debugger and it tells me that the value of z is calculated as -1. This causes the index to access the invalid part of memory of data array.
My question is, why is this happening? Am I missing something in OpenCV's image library that can help in getting right indices so that I dont run into this error? Or am I making some mistake in accessing the correct indices?
for(i = 0; i< newHeight; i++)
for(j = 0; j< newWidth; j++)
x = (int)(tx * j);
y = (int)(ty * i);
dx = tx * j - x;
dy = ty * i - y;
for(k = 0;k < 3;k++)
for(jj = 0;jj <= 3 ;jj++)
int z = (y - 1 + jj);
//if(z > -1){
a0 = data[z * img->widthStep + (x)*img->nChannels +k];//===>Throws of runtime error
d0 = data[z * img->widthStep + (x-1)*img->nChannels +k] - a0 ;
On the first iteration, i and j are 0. as are k and jj
This means that:
y = (int)(ty * i); //y = ty * 0 (== 0)
int z = (y - 1 + jj); //z = 0 - 1 + 0 (==-1)
And so in the line:
a0 = data[z * img->widthStep + (x)*img->nChannels +k];//===>Throws of runtime error
the index is:
(-1) * img->widthStep + (x)*img->nChannels +k
simplifies to:
(-1) * img->widthStep + 0 + 0
which is:
This is of course out of bounds, leading to the crash.
I have written a global version of Particle Swarm Optimization algorithm in C++.
I tried to write it exactly as same as my MATLAB PSO code that have written before, but this code generates different and so worst answers.
The MATLAB code is:
clear all;
numofdims = 30;
numofparticles = 50;
c1 = 2;
c2 = 2;
numofiterations = 1000;
V = zeros(50, 30);
initialpop = V;
Vmin = zeros(30, 1);
Vmax = Vmin;
Xmax = ones(30, 1) * 100;
Xmin = -Xmax;
pbestfits = zeros(50, 1);
worsts = zeros(50, 1);
bests = zeros(50, 1);
meanfits = zeros(50, 1);
pbests = zeros(50, 30);
initialpop = Xmin + (Xmax - Xmin) .* rand(numofparticles, numofdims);
X = initialpop;
fitnesses = testfunc1(X);
[minfit, minfitidx] = min(fitnesses);
gbestfit = minfit;
gbest = X(minfitidx, :);
for i = 1:numofdims
Vmax(i) = 0.2 * (Xmax(i) - Xmin(i));
Vmin(i) = -Vmax(i);
for t = 1:1000
w = 0.9 - 0.7 * (t / numofiterations);
for i = 1:numofparticles
if(fitnesses(i) < pbestfits(i))
pbestfits(i) = fitnesses(i);
pbests(i, :) = X(i, :);
for i = 1:numofparticles
for j = 1:numofdims
V(i, j) = min(max((w * V(i, j) + rand * c1 * (pbests(i, j) - X(i, j))...
+ rand * c2 * (gbest(j) - X(i, j))), Vmin(j)), Vmax(j));
X(i, j) = min(max((X(i, j) + V(i, j)), Xmin(j)), Xmax(j));
fitnesses = testfunc1(X);
[minfit, minfitidx] = min(fitnesses);
if(minfit < gbestfit)
gbestfit = minfit;
gbest = X(minfitidx, :);
worsts(t) = max(fitnesses);
bests(t) = gbestfit;
meanfits(t) = mean(fitnesses);
In which, testfunc1 is:
function [out] = testfunc1(R)
out = sum(R .^ 2, 2);
The C++ code is:
#include <cstring>
#include <iostream>
#include <cmath>
#include <algorithm>
#include <ctime>
#define rand_01 ((float)rand() / (float)RAND_MAX)
const int numofdims = 30;
const int numofparticles = 50;
using namespace std;
void fitnessfunc(float X[numofparticles][numofdims], float fitnesses[numofparticles])
memset(fitnesses, 0, sizeof (float) * numofparticles);
for(int i = 0; i < numofparticles; i++)
for(int j = 0; j < numofdims; j++)
fitnesses[i] += (pow(X[i][j], 2));
float mean(float inputval[], int vallength)
int addvalue = 0;
for(int i = 0; i < vallength; i++)
addvalue += inputval[i];
return (float)(addvalue / vallength);
void PSO(int numofiterations, float c1, float c2,
float Xmin[numofdims], float Xmax[numofdims], float initialpop[numofparticles][numofdims],
float worsts[], float meanfits[], float bests[], float *gbestfit, float gbest[numofdims])
float V[numofparticles][numofdims] = {0};
float X[numofparticles][numofdims];
float Vmax[numofdims];
float Vmin[numofdims];
float pbests[numofparticles][numofdims];
float pbestfits[numofparticles];
float fitnesses[numofparticles];
float w;
float minfit;
int minfitidx;
memcpy(X, initialpop, sizeof(float) * numofparticles * numofdims);
fitnessfunc(X, fitnesses);
minfit = *min_element(fitnesses, fitnesses + numofparticles);
minfitidx = min_element(fitnesses, fitnesses + numofparticles) - fitnesses;
*gbestfit = minfit;
memcpy(gbest, X[minfitidx], sizeof(float) * numofdims);
for(int i = 0; i < numofdims; i++)
Vmax[i] = 0.2 * (Xmax[i] - Xmin[i]);
Vmin[i] = -Vmax[i];
for(int t = 0; t < 1000; t++)
w = 0.9 - 0.7 * (float) (t / numofiterations);
for(int i = 0; i < numofparticles; i++)
if(fitnesses[i] < pbestfits[i])
pbestfits[i] = fitnesses[i];
memcpy(pbests[i], X[i], sizeof(float) * numofdims);
for(int i = 0; i < numofparticles; i++)
for(int j = 0; j < numofdims; j++)
V[i][j] = min(max((w * V[i][j] + rand_01 * c1 * (pbests[i][j] - X[i][j])
+ rand_01 * c2 * (gbest[j] - X[i][j])), Vmin[j]), Vmax[j]);
X[i][j] = min(max((X[i][j] + V[i][j]), Xmin[j]), Xmax[j]);
fitnessfunc(X, fitnesses);
minfit = *min_element(fitnesses, fitnesses + numofparticles);
minfitidx = min_element(fitnesses, fitnesses + numofparticles) - fitnesses;
if(minfit < *gbestfit)
*gbestfit = minfit;
memcpy(gbest, X[minfitidx], sizeof(float) * numofdims);
worsts[t] = *max_element(fitnesses, fitnesses + numofparticles);
bests[t] = *gbestfit;
meanfits[t] = mean(fitnesses, numofparticles);
int main()
time_t t;
srand((unsigned) time(&t));
float xmin[30], xmax[30];
float initpop[50][30];
float worsts[1000], bests[1000];
float meanfits[1000];
float gbestfit;
float gbest[30];
for(int i = 0; i < 30; i++)
xmax[i] = 100;
xmin[i] = -100;
for(int i = 0; i < 50; i++)
for(int j = 0; j < 30; j++)
initpop[i][j] = rand() % (100 + 100 + 1) - 100;
PSO(1000, 2, 2, xmin, xmax, initpop, worsts, meanfits, bests, &gbestfit, gbest);
cout<<"fitness: "<<gbestfit<<endl;
return 0;
I have debugged two codes many times but can not find the difference which makes answers different.
It is making me crazy!
May you help me please?
Please consider that, the function mean is just used for reporting some information and is not used in the optimization procedure.
You've got integer division in the following line
w = 0.9 - 0.7 * (float) (t / numofiterations);
w will be 0.2 for every iteration, change it to
w = 0.9 - 0.7 * t / numofiterations;
The first multiplication will automatically promote t to a double the division should then promote numof iterations to a double.
The parenthesis means it will be done first and therefore not be promoted as wo integers is involved in the division.
This could be a mistake in function mean:
return (float)(addvalue / vallength);
This is integer division, so the result is truncated down, then cast to float. It is unlikely this is what you want.