I have such a code:
QVector<Point> legalMoves = field.getLegalMoves();
QVector<Cell> costs;
costs.reserve(legalMoves.size());
for (QVector<Point>::iterator i = legalMoves.begin(); i < legalMoves.end(); ++i)
costs.push_back(checkCost(field, *i, player, -100, 100));
Cell cost;
if (player) {
QVector<Point>::iterator m1 = legalMoves.begin();
QVector<Cell>::iterator m2 = costs.begin();
QVector<Cell>::iterator j = costs.begin() + 1;
for (QVector<Point>::iterator i = legalMoves.begin() + 1; i < legalMoves.end(); ++i, ++j)
if (j->status > m2->status) {
m1 = i;
m2 = j;
}
cost=*m2;
}
else {
QVector<Point>::iterator m1 = legalMoves.begin();
QVector<Cell>::iterator m2 = costs.begin();
QVector<Cell>::iterator j = costs.begin() + 1;
for (QVector<Point>::iterator i = legalMoves.begin() + 1; i < legalMoves.end(); ++i, ++j)
if (j->status < m2->status) {
m1 = i;
m2 = j;
}
cost=*m2;
}
QVector<Point> moves;
QVector<Cell>::iterator j = costs.begin();
for (QVector<Point>::iterator i = legalMoves.begin(); i < legalMoves.end(); ++i, ++j)
if (j->status == cost.status)
moves.push_back(*i);
short index = qrand()%moves.size();
return moves[index];
}
When i'm debugging it, compiler just skips this parts inside loops:
if (j->status < m2->status) {
m1 = i;
m2 = j;
}
which means the functions returns the very first Point(or any other Point with same Cell value instead of min/max). Why does that happen and how is it possible to fix it?
Related
I've been implementing NN recently based on http://neuralnetworksanddeeplearning.com/. I've made whole algorithm for backprop and SGD almost the same way as author of this book. The problem is that while he gets accuracy around 90 % after one epoch i get 30% after 5 epochs even though i have the same hiperparameters. Do you have any idea what might be the cause ?
Here s my respository.
https://github.com/PiPower/Deep-Neural-Network
Here is part with algorithm for backprop and SGD implemented in Network.cpp:
void Network::Train(MatrixD_Array& TrainingData, MatrixD_Array& TrainingLabels, int BatchSize,int epochs, double LearningRate)
{
assert(TrainingData.size() == TrainingLabels.size() && CostFunc != nullptr && CostFuncDer != nullptr && LearningRate > 0);
std::vector<long unsigned int > indexes;
for (int i = 0; i < TrainingData.size(); i++) indexes.push_back(i);
std::random_device rd;
std::mt19937 g(rd());
std::vector<Matrix<double>> NablaWeights;
std::vector<Matrix<double>> NablaBiases;
NablaWeights.resize(Layers.size());
NablaBiases.resize(Layers.size());
for (int i = 0; i < Layers.size(); i++)
{
NablaWeights[i] = Matrix<double>(Layers[i].GetInDim(), Layers[i].GetOutDim());
NablaBiases[i] = Matrix<double>(1, Layers[i].GetOutDim());
}
//---- Epoch iterating
for (int i = 0; i < epochs; i++)
{
cout << "Epoch number: " << i << endl;
shuffle(indexes.begin(), indexes.end(), g);
// Batch iterating
for (int batch = 0; batch < TrainingData.size(); batch = batch + BatchSize)
{
for (int i = 0; i < Layers.size(); i++)
{
NablaWeights[i].Clear();
NablaBiases[i].Clear();
}
int i = 0;
while( i < BatchSize && (i+batch)< TrainingData.size())
{
std::vector<Matrix<double>> ActivationOutput;
std::vector<Matrix<double>> Z_Output;
ActivationOutput.resize(Layers.size() + 1);
Z_Output.resize(Layers.size());
ActivationOutput[0] = TrainingData[indexes[i + batch]];
int index = 0;
// Pushing values through
for (auto layer : Layers)
{
Z_Output[index] = layer.Mul(ActivationOutput[index]);
ActivationOutput[index + 1] = layer.ApplyActivation(Z_Output[index]);
index++;
}
// ---- Calculating Nabla that will be later devided by batch size element wise
auto DeltaNabla = BackPropagation(ActivationOutput, Z_Output, TrainingLabels[indexes[i + batch]]);
for (int i = 0; i < Layers.size(); i++)
{
NablaWeights[i] = NablaWeights[i] + DeltaNabla.first[i];
NablaBiases[i] = NablaBiases[i] + DeltaNabla.second[i];
}
i++;
}
for (int g = 0; g < Layers.size(); g++)
{
Layers[g].Weights = Layers[g].Weights - NablaWeights[g] * LearningRate;
Layers[g].Biases = Layers[g].Biases - NablaBiases[g] * LearningRate;
}
// std::transform(NablaWeights.begin(), NablaWeights.end(), NablaWeights.begin(),[BatchSize, LearningRate](Matrix<double>& Weight) {return Weight * (LearningRate / BatchSize);});
//std::transform(NablaBiases.begin(), NablaBiases.end(), NablaBiases.begin(), [BatchSize, LearningRate](Matrix<double>& Bias) {return Bias * (LearningRate / BatchSize); });
}
}
}
std::pair<MatrixD_Array, MatrixD_Array> Network::BackPropagation( MatrixD_Array& ActivationOutput, MatrixD_Array& Z_Output,Matrix<double>& label)
{
MatrixD_Array NablaWeight;
MatrixD_Array NablaBias;
NablaWeight.resize(Layers.size());
NablaBias.resize(Layers.size());
auto zs = Layers[Layers.size() - 1].ActivationPrime(Z_Output[Z_Output.size() - 1]);
Matrix<double> Delta_L = Hadamard(CostFuncDer(ActivationOutput[ActivationOutput.size() - 1],label), zs);
NablaWeight[Layers.size() - 1] = Delta_L * ActivationOutput[ActivationOutput.size() - 2].Transpose();
NablaBias[Layers.size() - 1] = Delta_L;
for (int j = 2; j <= Layers.size() ; j++)
{
auto sp = Layers[Layers.size() - j].ActivationPrime(Z_Output[Layers.size() -j]);
Delta_L = Hadamard(Layers[Layers.size() - j+1 ].Weights.Transpose() * Delta_L, sp);
NablaWeight[Layers.size() - j] = Delta_L * ActivationOutput[ActivationOutput.size() -j-1].Transpose();
NablaBias[Layers.size() - j] = Delta_L;
}
return make_pair(NablaWeight, NablaBias);
}
It turned out that mnist loader didnt work correctly.
I have used openm to parallelize my c++ code as below:
int shell_num = 50, grparallel[shell_num],grbot[shell_num];
double p_x,p_y,grp[shell_num];
for (int f = 0; f < shell_num; f++)
{
grp[f] = 0;
grparallel[f] = 0;
grbot[f] = 0;
}
//some code...
#pragma omp parallel for reduction(+ : grp,grparallel,grbot)
for(int i = 0; i < N; i++){ //some code
for(int j = 0; j < N; j++){
if (j==i) continue;
double delta_x = x[i]-x[j],
delta_y = y[i]-y[j],
e_dot_e = e_x[i] * e_x[j] + e_y[i] * e_y[j],
e_cross_e = e_x[i] * e_y[j] - e_y[i] * e_x[j];
if (j > i)
{
double fasele = sqrt(dist(x[i],y[i],x[j],y[j],L));
for (int h = 0; h < shell_num; h++) //determine periodic distance between i and j is in which shel
{
if( L * h / 100 < fasele && fasele < L * (h + 1) / 100 )
{grp[h]+= e_dot_e;
double pdotr = abs(periodic(delta_x,L) * p_x + periodic(delta_y,L) * p_y)/fasele;
if (pdotr > 0.9659)
{
grparallel[h]+= 1;}else if(pdotr < 0.2588)
{
grbot[h]+= 1;
}
break;
}
}
}
}
}
When I run the code in terminal, there is an error:
‘grp’ has invalid type for ‘reduction’
The same error occurs for grparallel and grbot.
How can I remove the error?
I have a code that does Singular Value Decomposition (SVD) for square matrices. Code does the job however, it is quite slow and when matrix size increases it gets unbearable. As I am not familiar with parallel programming hence, I am asking advise from experts before I start digging deeper and eventually realize the action I want to achieve is not even possible.
Thank you in advance.
void SVD::decompose() {
bool flag;
int i, its, j, jj, k, l, nm;
double anorm, c, f, g, h, s, scale, x, y, z;
Row rv1(n);
g = scale = anorm = 0.0; //Householder reduction to bidiagonal form.
for (i = 0; i < n; i++) {
l = i + 2;
rv1[i] = scale*g;
g = s = scale = 0.0;
if (i < m) {
for (k = i; k < m; k++) scale += abs(u[k][i]);
if (scale != 0.0) {
for (k = i; k < m; k++) {
u[k][i] /= scale;
s += u[k][i] * u[k][i];
}
f = u[i][i];
g = -SIGN(sqrt(s), f);
h = f*g - s;
u[i][i] = f - g;
for (j = l - 1; j < n; j++) {
for (s = 0.0, k = i; k < m; k++) s += u[k][i] * u[k][j];
f = s / h;
for (k = i; k < m; k++) u[k][j] += f*u[k][i];
}
for (k = i; k < m; k++) u[k][i] *= scale;
}
}
w[i] = scale *g;
g = s = scale = 0.0;
if (i + 1 <= m && i + 1 != n) {
for (k = l - 1; k < n; k++) scale += abs(u[i][k]);
if (scale != 0.0) {
for (k = l - 1; k < n; k++) {
u[i][k] /= scale;
s += u[i][k] * u[i][k];
}
f = u[i][l - 1];
g = -SIGN(sqrt(s), f);
h = f*g - s;
u[i][l - 1] = f - g;
for (k = l - 1; k < n; k++) rv1[k] = u[i][k] / h;
for (j = l - 1; j < m; j++) {
for (s = 0.0, k = l - 1; k < n; k++) s += u[j][k] * u[i][k];
for (k = l - 1; k < n; k++) u[j][k] += s*rv1[k];
}
for (k = l - 1; k < n; k++) u[i][k] *= scale;
}
}
anorm = MAX(anorm, (abs(w[i]) + abs(rv1[i])));
}
for (i = n - 1; i >= 0; i--) { //Accumulation of right-hand tranformations.
if (i < n - 1) {
if (g != 0.0) {
for (j = l; j < n; j++) // Double division to avoid possible underflow.
v[j][i] = (u[i][j] / u[i][l]) / g;
for (j = l; j < n; j++) {
for (s = 0.0, k = l; k < n; k++) s += u[i][k] * v[k][j];
for (k = l; k < n; k++) v[k][j] += s*v[k][i];
}
}
for (j = l; j < n; j++) v[i][j] = v[j][i] = 0.0;
}
v[i][i] = 1.0;
g = rv1[i];
l = i;
}
for (i = MIN(m, n) - 1; i >= 0; i--) { //Accumulation of left-hand transformations.
l = i + 1;
g = w[i];
for (j = l; j < n; j++) u[i][j] = 0.0;
if (g != 0.0) {
g = 1.0 / g;
for (j = l; j < n; j++) {
for (s = 0.0, k = l; k < m; k++) s += u[k][i] * u[k][j];
f = (s / u[i][i])*g;
for (k = i; k < m; k++) u[k][j] += f*u[k][i];
}
for (j = i; j < m; j++) u[j][i] *= g;
}
else for (j = i; j < m; j++) u[j][i] = 0.0;
++u[i][i];
}
for (k = n - 1; k >= 0; k--) { //Diagonalization of the bidiagonal form: Loop over
for (its = 0; its < 30; its++) { //singular values, and over allowed iterations.
flag = true;
for (l = k; l >= 0; l--) { //Test ofr splitting.
nm = l - 1;
if (l == 0 || abs(rv1[l]) <= eps*anorm) {
flag = false;
break;
}
if (abs(w[nm]) <= eps*anorm) break;
}
if (flag) {
c = 0.0; //Cancellatin of rv[l], if l>0.
s = 1.0;
for (i = l; i < k + 1; i++) {
f = s*rv1[i];
rv1[i] = c*rv1[i];
if (abs(f) <= eps*anorm) break;
g = w[i];
h = pythag(f, g);
w[i] = h;
h = 1.0 / h;
c = g*h;
s = -f*h;
for (j = 0; j < m; j++) {
y = u[j][nm];
z = u[j][i];
u[j][nm] = y*c + z*s;
u[j][i] = z*c - y*s;
}
}
}
z = w[k];
if (l == k) { //Convergence.
if (z < 0.0) { //Singular value is made nonnegative.
w[k] = -z;
for (j = 0; j < n; j++) v[j][k] = -v[j][k];
}
break;
}
x = w[l]; //Shift from bottom 2-by-2 minor.
nm = k - 1;
y = w[nm];
g = rv1[nm];
h = rv1[k];
f = ((y - z)*(y + z) + (g - h)*(g + h)) / (2.0*h*y);
g = pythag(f, 1.0);
f = ((x - z)*(x + z) + h*((y / (f + SIGN(g, f))) - h)) / x;
c = s = 1.0; //Next QR transformation:
for (j = l; j <= nm; j++) {
i = j + 1;
g = rv1[i];
y = w[i];
h = s*g;
g = c*g;
z = pythag(f, h);
rv1[j] = z;
c = f / z;
s = h / z;
f = x*c + g*s;
g = g*c - x*s;
h = y*s;
y *= c;
for (jj = 0; jj < n; jj++) {
x = v[jj][j];
z = v[jj][i];
v[jj][j] = x*c + z*s;
v[jj][i] = z*c - x*s;
}
z = pythag(f, h);
w[j] = z; //Rotation can be arbitrary if z = 0.
if (z) {
z = 1.0 / z;
c = f*z;
s = h*z;
}
f = c*g + s*y;
x = c*y - s*g;
for (jj = 0; jj < m; jj++) {
y = u[jj][j];
z = u[jj][i];
u[jj][j] = y*c + z*s;
u[jj][i] = z*c - y*s;
}
}
rv1[l] = 0.0;
rv1[k] = f;
w[k] = x;
}
}
}
Parts of your code can certainly be parallelized. How much you gain, that is an other question.
The easy way would be to use a common math library.
The fun way would be to maybe use OpenMP to do it yourself.
But befor you even think about OpenMP, consider to rearange your indices. You tend to loop over the first index alot, like in for (k = i; k < m; k++) u[k][i] *= scale;. This has a very bad cache hit rate in c++ for u[k][i] is basicly u[k*second_index_size+i]. If you swap the indices you get for (k = i; k < m; k++) u[i][k] *= scale; which makes perfect use of the cache.
You should see quite a speedup by implementing this.
Now for the OpenMP part.
Find out where the hot regions in your code are. Maybe use Visual Studio to do so. And then you could use OpenMP to parallelize certain for loops, like
#pragma omp parallel for
for (k = i; k < m; k++) u[i][k] *= scale;
What you will gain depends on where the hot regions are and how big your matrices are. Benchmarks will have to show.
I must implement in C++ using diblok The Seeded Region Growing algorithm due to Adams and Bischof which can be found here http://bit.ly/1nIxphj.
It is the fig.2 pseudocode.
After I choose the seeded points using the mouse , it throws this message : Unhandled exception at 0x00416ca0 in diblook.exe: 0xC0000005: Access violation reading location 0x3d2f6e68.
This is the code of the function:
void CDibView::OnLButtonDblClk(UINT nFlags, CPoint point)
{ BEGIN_SOURCE_PROCESSING;
int** labels = new int* [dwHeight];
for(int k = 0;k < dwHeight; k++)
labels[k] = new int[dwWidth];
int noOfRegions = 2;
double meanRegion[2];
double noOfPointsInRegion[2];
for(int i = 0; i < dwHeight ; i++)
for(int j = 0; j < dwWidth ; j++)
{
labels[i][j] = -1;
}
if(noOfPoints < 6)
{
CPoint p = GetScrollPosition() + point;
pos[noOfPoints].x = p.x;
pos[noOfPoints].y = p.y;
int regionLabel = 0;
if(noOfPoints <= noOfPoints / 2)
labels[p.x][p.y] = regionLabel;
else
labels[p.x][p.y] = regionLabel + 1;
noOfPoints++;
}
else
{
// Calculate the mean of each region
for(int i = 0; i < noOfRegions; i++)
{
for(int j = 0 ; j < noOfPoints; j++)
{
if(labels[pos[j].x][pos[j].y] == i)
{
meanRegion[i] += lpSrc[pos[j].x * w + pos[j].y];
}
}
meanRegion[i] /= 3;
noOfPointsInRegion[i] = 3;
}
for(int seedPoint = 0; seedPoint < noOfPoints; seedPoint++)
{
// define list
node *start, *temp;
start = (node *) malloc (sizeof(node));
temp = start;
temp -> next = NULL;
for(int i = -1; i <= 1; i++)
for(int j = -1; j<= 1; j++)
{
if(i == 0 && j == 0) continue;
int gamma = lpSrc[(pos[seedPoint].x + i) * + pos[seedPoint].y + j] - lpSrc[pos[seedPoint].x * w + pos[seedPoint].y];
push(start, pos[seedPoint].x + i, pos[seedPoint].y + j, gamma);
}
sort(start);
if(start != NULL)
{
node *y = start;
pop(start);
int sameNeighbour = 1;
int neighValue = -1;
for(int k = -1; k <= 1; k++)
for(int l = -1; l <= 1;l++)
{
if(k ==0 && l==0) continue;
if(labels[y -> x + k][y -> y + l] != -1)
{
neighValue = labels[y -> x + k][y -> y + l];
break;
}
}
for(int k = -1; k <= 1; k++)
for(int l = -1; l <= 1;l++)
{
if(k == 0 && l==0) continue;
if(labels[y -> x + k][y -> y = 1] != -1 && labels[y -> x + k][y -> y + l] != neighValue)
sameNeighbour = 0;
}
if(sameNeighbour == 1)
{
labels[y -> x][y -> y] = neighValue;
meanRegion[neighValue] = meanRegion[neighValue] * noOfPointsInRegion[neighValue] / noOfPointsInRegion[neighValue] + 1;
noOfPointsInRegion[neighValue]++;
for(int k = -1; k <= 1; k++)
for(int l = -1; l <= 1;l++)
{
if(k == 0 && l == 0) continue;
if(labels[y -> x + k][y -> y + l] == -1 && find(start, y->x + k, y->y + l) == 0)
{
int gammak = meanRegion[neighValue] - lpSrc[(y->x +k) * w + (y->y + l)];
push(start, y->x + k, y->y + l, gammak);
sort(start);
}
}
}
else
{
labels[y->x][y->y] = -1;
}
}
}
int noOfRegionOne = 0;
int noOfRegionTwo = 0;
int noOfBoundary = 0;
for(int i = 0; i< dwHeight; i++)
for(int j = 0;j<dwWidth; j++)
{
if(labels[i][j] == -1)
noOfBoundary++;
else if(labels[i][j] == 0)
noOfRegionOne++;
else if(labels[i][j] == 1)
noOfRegionTwo++;
}
CString info;
info.Format("Boundary %d, One %d, Two %d", noOfBoundary, noOfRegionOne, noOfRegionTwo);
AfxMessageBox(info);
noOfPoints = 0;
}
CScrollView::OnLButtonDblClk(nFlags, point);
END_SOURCE_PROCESSING;
}
After a choose to break the running, this is what is shown http://postimg.org/image/j2sh9k0a1/
Can anybody tell what is wrong and why it doesn't work?
Thanks.
Your screenshot shows that your node (Y is a terrible name, incidentally) has garbage values in it. Offhand, I suspect that 'sort' is overwriting your node values, resulting in garbage. I would create a static copy of your current node to prevent it from changing during processing:
Change
node *y = start;
pop(start);
to
node y = *start;
pop(start);
I make qt quick application and I need to make a declarative widget with video (I do not want to use qt mobility). Like, it's not a problem. But I do not know how to do blur libVLC.
Does anyone know how to do blur video with libVLC?
Maybe you could just use expoblur over the QImage and re-encode the blurred video.
http://www.qtcentre.org/archive/index.php/t-26534.html
static QImage blurred(const QImage& image, const QRect& rect, int radius, bool alphaOnly = false)
{
int tab[] = { 14, 10, 8, 6, 5, 5, 4, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2 };
int alpha = (radius < 1) ? 16 : (radius > 17) ? 1 : tab[radius-1];
QImage result = image.convertToFormat(QImage::Format_ARGB32_Premultiplied);
int r1 = rect.top();
int r2 = rect.bottom();
int c1 = rect.left();
int c2 = rect.right();
int bpl = result.bytesPerLine();
int rgba[4];
unsigned char* p;
int i1 = 0;
int i2 = 3;
if (alphaOnly)
i1 = i2 = (QSysInfo::ByteOrder == QSysInfo::BigEndian ? 0 : 3);
for (int col = c1; col <= c2; col++) {
p = result.scanLine(r1) + col * 4;
for (int i = i1; i <= i2; i++)
rgba[i] = p[i] << 4;
p += bpl;
for (int j = r1; j < r2; j++, p += bpl)
for (int i = i1; i <= i2; i++)
p[i] = (rgba[i] += ((p[i] << 4) - rgba[i]) * alpha / 16) >> 4;
}
for (int row = r1; row <= r2; row++) {
p = result.scanLine(row) + c1 * 4;
for (int i = i1; i <= i2; i++)
rgba[i] = p[i] << 4;
p += 4;
for (int j = c1; j < c2; j++, p += 4)
for (int i = i1; i <= i2; i++)
p[i] = (rgba[i] += ((p[i] << 4) - rgba[i]) * alpha / 16) >> 4;
}
for (int col = c1; col <= c2; col++) {
p = result.scanLine(r2) + col * 4;
for (int i = i1; i <= i2; i++)
rgba[i] = p[i] << 4;
p -= bpl;
for (int j = r1; j < r2; j++, p -= bpl)
for (int i = i1; i <= i2; i++)
p[i] = (rgba[i] += ((p[i] << 4) - rgba[i]) * alpha / 16) >> 4;
}
for (int row = r1; row <= r2; row++) {
p = result.scanLine(row) + c2 * 4;
for (int i = i1; i <= i2; i++)
rgba[i] = p[i] << 4;
p -= 4;
for (int j = c1; j < c2; j++, p -= 4)
for (int i = i1; i <= i2; i++)
p[i] = (rgba[i] += ((p[i] << 4) - rgba[i]) * alpha / 16) >> 4;
}
return result;
}