Issue with a DCT implementation - c++

I have to implement a DCT algorithm in C++, here is my present code :
// dct: computes the discrete cosinus tranform of a 8x8 block
template<typename Tin=uchar,typename Tout=float>
inline cv::Mat_<Tout> dct(const cv::Mat_<Tin>& oBlock) {
int indexNumber;
float pi = 3.14159265359;
float fcoscos, fxy, cos1, cos2, forCos1, forCos2;
cv::Mat_<Tout> resultBloc(8, 8);
for (int u = 0; u < oBlock.rows; u++){
for (int v = 0; v < oBlock.cols; v++){
float cu=0, cv=0, Result=0;
// calcul c(u)
if (u == 0){
cu = (float)sqrt((float)1 / (float)oBlock.rows);
}
else {
cu = (float)sqrt((float)2 / (float)oBlock.rows);
}
// calcul c(v)
if (v == 0){
cv = (float)sqrt((float)1 / (float)oBlock.cols);
}
else {
cv = (float)sqrt((float)2 / (float)oBlock.cols);
}
float sums = 0;
for (int x = 0; x < oBlock.rows; x++){
for (int y = 0; y < oBlock.cols; y++){
indexNumber = x * oBlock.rows + y;
fxy = (int)oBlock.data[indexNumber];
forCos1 = (pi*((2 * x) + 1)*u) / (2 * oBlock.rows);
forCos2 = (pi*((2 * y) + 1)*v) / (2 * oBlock.cols);
cos1 = cos(forCos1);
cos2 = cos(forCos2);
fcoscos = fxy * cos1 * cos2;
sums += fcoscos;
}
}
// calcul total
Result = sums*cu*cv;
indexNumber = u * oBlock.rows + v;
resultBloc.data[indexNumber] = Result;
}
}
return resultBloc;
}
I compared the result with the cv DCT algorithm as follow :
cv::Mat_<float> tempImage(8,8);
for (int i = 0; i < vecImageCut[0].cols*vecImageCut[0].rows; i++){
tempImage.data[i] = (int)vecImageCut[0].data[i];
}
cv::Mat_<float> dctCV;
cv::dct(tempImage, dctCV);
for (int i = 0; i < blocksAfterDCT[0].cols*blocksAfterDCT[0].rows; i++){
std::cerr << "Difference DCT for pixel " << i << " : " << dctCV.data[i] - blocksAfterDCT[0].data[i] << std::endl;
}
The results between my DCT and the cv DCT are very different so i assume my DCT algorithm is wrong but i searched for hours and i can't find my mistake, can anyone tell me where i did something wrong ?

Your index calculations are wrong. In indexNumber = x * oBlock.rows + y;, since x is counting rows it needs to be multiplied by the number of columns:
indexNumber = x * oBlock.cols + y;
The same for indexNumber = u * oBlock.rows + v;
indexNumber = u * oBlock.cols + v;

Related

Issue with convolution kernels that add up to zero

I'm making an image editing program in c++ using sfml and tried to add image filters using:
int clamp(int value, int min, int max)
{
if (value < min)
return min;
if (value > max)
return max;
return value;
}
void MyImage::applyKernel(std::vector<std::vector<int>> kernel)
{
int index(0), tempx(0), tempy(0);
int wr(0), wg(0), wb(0), wa(0), sum(0);
auto newPixels = new sf::Uint8[this->size_y * this->size_x * 4];
// Calculate the sum of the kernel
for (int i = 0; i < kernel.size(); i++) {
for (int j = 0; j < kernel[i].size(); j++) {
sum += kernel[i][j];
}
}
for (int y = 0; y < this->size_y; y++) {
for (int x = 0; x < this->size_x; x++) {
/*
Calculate weighted sum from kernel
*/
wr = wg = wb = wa = 0;
for (int i = 0; i < kernel.size(); i++) {
for (int j = 0; j < kernel[i].size(); j++) {
/*
Calculates the coordinates of the kernel relative to the pixel we are changing
*/
tempx = x + (j - floor(kernel[i].size() / 2));
tempy = y + (i - floor(kernel.size() / 2));
//std::cout << "kernel=(" << j << ", " << i << "), pixel=(" << x << ", " << y << ") tempPos=(" << tempx << ", " << tempy << ")\n";
/*
This code below should have the effect of mirroring the image in the case the kernel coordinate is out of bounds (along the edge of the image)
*/
tempx = (tempx < 0) ? -1 * tempx : tempx;
tempy = (tempy < 0) ? -1 * tempy : tempy;
tempx = (tempx > this->size_x) ? x - (j - floor(kernel[i].size() / 2)) : tempx;
tempy = (tempy > this->size_y) ? y - (i - floor(kernel.size() / 2)) : tempy;
if (tempx >= 0 && tempx < this->size_x && tempy >= 0 && tempy < this->size_y) {
index = (((tempy * this->size_x) - tempy) + (tempx)) * 4;
wr += kernel[i][j] * this->pixels[index];
wg += kernel[i][j] * this->pixels[index + 1];
wb += kernel[i][j] * this->pixels[index + 2];
wa += kernel[i][j] * this->pixels[index + 3];
}
}
}
if (sum) {
wr /= sum;
wg /= sum;
wb /= sum;
wa /= sum;
}
index = (((y * this->size_x) - y) + (x)) * 4;
newPixels[index] = clamp(wr, 0, 255); // Red
newPixels[index + 1] = clamp(wg, 0, 255); // Green
newPixels[index + 2] = clamp(wb, 0, 255); // Blue
newPixels[index + 3] = clamp(wa, 0, 255); // Alpha
}
}
this->pixels = newPixels;
// Copies the data from our sf::Uint8 array to the image object to be displayed => Removes the overhead of calling setPixel(x,y,color) for every pixel {As a side note setPixel() should always be avoided}|
this->im->create(this->size_x, this->size_y, this->pixels);
}
I was trying to use [-1,-1,-1], [-1,8,-1]. [-1,-1,-1] for edge detection but just ended up with a white image except for some pixels near the bottom. I've tried different images and kernels out but any that add to 0 don't work. For example if I take the edge detection kernel above and change the 8 to a 9, it gives an expected result. Is there something wrong with my idea of how convolution kernels work or is it just a bug in my code?
Thank you.

Using Gaussian Filter in C++ AMP returning wrong colours

I'm trying to use a Gaussian filter on a generated Mandelbrot set, and I have a solution that will work sequentially. However, I do want to have a solution that works using GPU processing.
I'm using C++ AMP to use GPU processing. An issue with storing the individual color channels of the pixels is that in AMP you can't use unsigned 8 bit integers ( uint8_t ), therefore I've had to store the channels in unsigned 32 bit integers ( uint32_t ). All of the syntax that I've implemented into the AMP code is the same as the sequential one, however the colours of the one generated in AMP has the wrong colours output, however the shape of the Mandelbrot can still be seen.
So, I believe its the way that I'm recollecting the channels together for the pixel that's giving the wrong colour. If anymore details are needed, I can provide them.
Sequential Output
Sequential Code:
void applyFilter(){
int kernelCentreX, kernelCentreY;// center index of kernel
int kernelRadius = 5 / 2;
uint8_t r, g, b;
float kernelTotal = 0.0;
float redTotal = 0.0, blueTotal = 0.0, greenTotal = 0.0;
float sum;
the_clock::time_point start = the_clock::now();
for (int y = 0; y < HEIGHT; y++) { //loop through image height
for (int x = 0; x < WIDTH; x++) {//loop through image width
float redTotal = 0.0, blueTotal = 0.0, greenTotal = 0.0;
float kernelTotal = 0.0;
for (int v = 0; v < 5; v++) { //loop through for kernel height
for (int u = 0; u < 5; u++) { //loop through for kernel width
// Current position
int cX = x + u - kernelRadius;
int cY = y + v - kernelRadius;
//Make sure we stay in boundries
if (cX < 0 || cX > WIDTH - 1 || cY < 0 || cY > HEIGHT - 1)
{
continue;
}
//Get colour channels of current pixel
r = (image[cY][cX] >> 16);
g = (image[cY][cX] >> 8);
b = (image[cY][cX]);
//Get colour channels of current pixel
/*r = image[cY][cX] >> 16;
g = image[cY][cX] >> 8;
b = image[cY][cX];*/
//Calculate Totals
redTotal += r *kernel[v][u];
greenTotal += g *kernel[v][u];
blueTotal += b *kernel[v][u];
kernelTotal += kernel[v][u];
}
}
//Calculate new pixel values
r = (redTotal / kernelTotal);
g = (greenTotal / kernelTotal);
b = (blueTotal / kernelTotal);
image[y][x] = (r << 16 | g << 8 | b);
}
}
the_clock::time_point end = the_clock::now();
auto time_taken = duration_cast<milliseconds>(end - start).count();
cout << "Time taken to apply kernel(sequential) " << time_taken << "ms" << endl;
}
Parallel Output
Parallel Code:
void applyFilterAMP() {
the_clock::time_point start = the_clock::now();
uint32_t *pImage = &(image[0][0]);
uint32_t *pFilteredImage = &(filteredImage[0][0]);
float *pKernel = &(kernel[0][0]);
uint32_t pi[HEIGHT/3][WIDTH/3][3];
uint32_t* pPi = &(pi[0][0][0]);
array_view<float, 2>k(5, 5, pKernel);
array_view<uint32_t, 2> m(HEIGHT, WIDTH, pImage);
array_view<uint32_t, 2> fi(HEIGHT, WIDTH, pFilteredImage);
//array_view<uint32_t, 3> piColor(HEIGHT/3, WIDTH/3, 3, pPi);
fi.discard_data();
static const int TileSize = 16;
parallel_for_each(fi.extent, [=](concurrency::index<2> idx) restrict(amp) {
//parallel_for_each(fi.extent.tile<TileSize, TileSize>(), [=](tiled_index<TileSize, TileSize> tidx) restrict(amp) {
int kernelCentreX, kernelCentreY;// center index of kernel
int kernelRadius = 5/2;
int y = idx[0];
int x = idx[1];
//int y = tidx.global[0];
//int x = tidx.global[1];
float kernelTotal = 0.0;
float redTotal = 0.0, blueTotal = 0.0, greenTotal = 0.0;
float sum=0.0;
uint32_t r, g, b, newPixel;
for (int v = 0; v < 5; v++) { //loop through for kernel height
for (int u = 0; u < 5; u++) { //loop through for kernel width
// Current position
int cX = x + u - kernelRadius;
int cY = y + v - kernelRadius;
//int cX = tidx.global[1] + u - kernelRadius;
//int cY = tidx.global[0] + v - kernelRadius;
//Make sure we stay in boundries
if ((cX < 0 || cX > WIDTH - 1 || cY < 0 || cY > HEIGHT - 1))
{
continue;
}
//Get colour channels of pixel
r = m[cY][cX] >> 16;
g = m[cY][cX] >> 8;
b = m[cY][cX];
//Calculate Totals
redTotal += r *k[v][u];
greenTotal += g *k[v][u];
blueTotal += b *k[v][u];
kernelTotal += k[v][u];
}
}
//Calculate new pixel values
r = (redTotal / kernelTotal);
g = (greenTotal / kernelTotal);
b = (blueTotal / kernelTotal);
fi[y][x] = (r << 16 | g << 8 | b);
});
fi.synchronize();
//m.synchronize();
the_clock::time_point end = the_clock::now();
auto time_taken = duration_cast<milliseconds>(end - start).count();
cout << "Time taken to apply kernel(parallel) " << time_taken << "ms" << endl;
errno_t err = memcpy_s(image, sizeof(uint32_t)*(HEIGHT * WIDTH), filteredImage, sizeof(uint32_t)*(HEIGHT * WIDTH));
if (err)
{
printf("Error executing memcpy_s.\n");
}
/*for (int j = 0; j < HEIGHT; j++) {
for (int i = 0; i < WIDTH; i++) {
image[j][i] = filteredImage[j][i];
}
}*/
}

My Neural Network isn't learning the right answers [closed]

Closed. This question needs to be more focused. It is not currently accepting answers.
Want to improve this question? Update the question so it focuses on one problem only by editing this post.
Closed 5 years ago.
Improve this question
Firstly, I'm a complete amateur so I may mix up some terminology.
I've been working on a Neural Network to play Connect 4 / Four In A Row.
The current design of the network model is 170 input values, 417 hidden neurons and 1 output neuron. The network is fully connected, i.e. every input is connected to every hidden neuron and every hidden neuron is connected to the output node.
Every connection has an independent weight, and every hidden node, and the single output node, have an additional bias node with a weight.
The input representation of 170 values for the game-state of Connect 4 is:
42 pairs of values (84 input variables) which denote whether a space is occupied by player 1, player 2 or is vacant.
0,0 means it's free
1,0 means it's player 1's position
0,1 means it's player 2's position
1,1 is not possible
Another 42 pairs of values (84 input variables) which denote whether adding a piece here will give player 1 or player 2 a "Connect 4"/"Four In a Row". The combination of values means the same as above.
2 final input variables to denote who's turn it is:
1,0 player 1's turn
0,1 player 2's turn
1,1 and 0,0 are not possible
I measured the average Mean Square Error of 100 games over 10,000 total games of various configurations to arrive at:
417 hidden neurons
Alpha and Beta learning rate of 0.1 at the start and dropping to 0.01 linearly across the total number of epochs
A lambda value of 0.5
90 out of 100 moves are random at the start and drop down to 10 out of every 100 after the first 50% of epochs. So at the midway point 10 out of 100 moves are random
The first 50% of epochs start with a random move
Sigmoid Activation Function used in every node
This image shows the results of the various configurations plotted with a logarithmic scale. This is how I determined which configuration to use.
I calculate this Mean Square Error by comparing the output of a board in a win-state against either -1 for a player 2 win and 1 for a player 1 win. I add these up every 100 games and divide the total by 100 to get 1000 values to plot in the above graph. I.e. the code snippet is:
if(board.InARowConnected(4) == Board<7,6,4>::Player1)
{
totalLoss += NN->BackPropagateFinal({1},previousNN,alpha,beta,lambda);
winState = true;
}
else if(board.InARowConnected(4) == Board<7,6,4>::Player2)
{
totalLoss += NN->BackPropagateFinal({-1},previousNN,alpha,beta,lambda);
winState = true;
}
else if(!board.IsThereAvailableMove())
{
totalLoss += NN->BackPropagateFinal({0},previousNN,alpha,beta,lambda);
winState = true;
}
...
if(gameNumber % 100 == 0 && gameNumber != 0)
{
totalLoss = totalLoss / gamesToOutput;
matchFile << std::fixed << std::setprecision(51) << totalLoss << std::endl;
totalLoss = 0.0;
}
The way I'm training the network is by having it play against itself over and over again. It's a feed-forward network and I'm using TD-Lambda to train it for every move (every move that wasn't randomly chosen).
The Board State that is given to the Neural Network is done through this:
template<std::size_t BoardWidth, std::size_t BoardHeight, std::size_t InARow>
void create_board_state(std::array<double,BoardWidth*BoardHeight*4+2>& gameState, const Board<BoardWidth,BoardHeight,InARow>& board,
const typename Board<BoardWidth,BoardHeight,InARow>::Player player)
{
using BoardType = Board<BoardWidth,BoardHeight,InARow>;
auto bb = board.GetBoard();
std::size_t stateIndex = 0;
for(std::size_t boardIndex = 0; boardIndex < BoardWidth*BoardHeight; ++boardIndex, stateIndex += 2)
{
if(bb[boardIndex] == BoardType::Free)
{
gameState[stateIndex] = 0;
gameState[stateIndex+1] = 0;
}
else if(bb[boardIndex] == BoardType::Player1)
{
gameState[stateIndex] = 1;
gameState[stateIndex+1] = 0;
}
else
{
gameState[stateIndex] = 0;
gameState[stateIndex+1] = 1;
}
}
for(std::size_t x = 0; x < BoardWidth; ++x)
{
for(std::size_t y = 0; y < BoardHeight; ++y)
{
auto testBoard1 = board;
auto testBoard2 = board;
testBoard1.SetBoardChecker(x,y,Board<BoardWidth,BoardHeight,InARow>::Player1);
testBoard2.SetBoardChecker(x,y,Board<BoardWidth,BoardHeight,InARow>::Player2);
// player 1's set
if(testBoard1.InARowConnected(4) == Board<7,6,4>::Player1)
gameState[stateIndex] = 1;
else
gameState[stateIndex] = 0;
// player 2's set
if(testBoard2.InARowConnected(4) == Board<7,6,4>::Player2)
gameState[stateIndex+1] = 1;
else
gameState[stateIndex+1] = 0;
stateIndex += 2;
}
}
if(player == Board<BoardWidth,BoardHeight,InARow>::Player1)
{
gameState[stateIndex] = 1;
gameState[stateIndex+1] = 0;
}
else
{
gameState[stateIndex] = 0;
gameState[stateIndex+1] = 1;
}
}
It's templated to make changing things later easier. I don't believe there's anything wrong in the above.
My Sigmoid activation function:
inline double sigmoid(const double x)
{
// return 1.0 / (1.0 + std::exp(-x));
return x / (1.0 + std::abs(x));
}
My Neuron Class
template<std::size_t NumInputs>
class Neuron
{
public:
Neuron()
{
for(auto& i : m_inputValues)
i = 9;
for(auto& e : m_eligibilityTraces)
e = 9;
for(auto& w : m_weights)
w = 9;
m_biasWeight = 9;
m_biasEligibilityTrace = 9;
m_outputValue = 9;
}
void SetInputValue(const std::size_t index, const double value)
{
m_inputValues[index] = value;
}
void SetWeight(const std::size_t index, const double weight)
{
if(std::isnan(weight))
throw std::runtime_error("Shit! this is a nan bread");
m_weights[index] = weight;
}
void SetBiasWeight(const double weight)
{
m_biasWeight = weight;
}
double GetInputValue(const std::size_t index) const
{
return m_inputValues[index];
}
double GetWeight(const std::size_t index) const
{
return m_weights[index];
}
double GetBiasWeight() const
{
return m_biasWeight;
}
double CalculateOutput()
{
m_outputValue = 0;
for(std::size_t i = 0; i < NumInputs; ++i)
{
m_outputValue += m_inputValues[i] * m_weights[i];
}
m_outputValue += 1.0 * m_biasWeight;
m_outputValue = sigmoid(m_outputValue);
return m_outputValue;
}
double GetOutput() const
{
return m_outputValue;
}
double GetEligibilityTrace(const std::size_t index) const
{
return m_eligibilityTraces[index];
}
void SetEligibilityTrace(const std::size_t index, const double eligibility)
{
m_eligibilityTraces[index] = eligibility;
}
void SetBiasEligibility(const double eligibility)
{
m_biasEligibilityTrace = eligibility;
}
double GetBiasEligibility() const
{
return m_biasEligibilityTrace;
}
void ResetEligibilityTraces()
{
for(auto& e : m_eligibilityTraces)
e = 0;
m_biasEligibilityTrace = 0;
}
private:
std::array<double,NumInputs> m_inputValues;
std::array<double,NumInputs> m_weights;
std::array<double,NumInputs> m_eligibilityTraces;
double m_biasWeight;
double m_biasEligibilityTrace;
double m_outputValue;
};
My Neural Network class
template
class NeuralNetwork
{
public:
void RandomiseWeights()
{
double inputToHiddenRange = 4.0 * std::sqrt(6.0 / (NumInputs+1+NumOutputs));
RandomGenerator inputToHidden(-inputToHiddenRange,inputToHiddenRange);
double hiddenToOutputRange = 4.0 * std::sqrt(6.0 / (NumHidden+1+1));
RandomGenerator hiddenToOutput(-hiddenToOutputRange,hiddenToOutputRange);
for(auto& hiddenNeuron : m_hiddenNeurons)
{
for(std::size_t i = 0; i < NumInputs; ++i)
hiddenNeuron.SetWeight(i, inputToHidden());
hiddenNeuron.SetBiasWeight(inputToHidden());
}
for(auto& outputNeuron : m_outputNeurons)
{
for(std::size_t h = 0; h < NumHidden; ++h)
outputNeuron.SetWeight(h, hiddenToOutput());
outputNeuron.SetBiasWeight(hiddenToOutput());
}
}
double GetOutput(const std::size_t index) const
{
return m_outputNeurons[index].GetOutput();
}
std::array<double,NumOutputs> GetOutputs()
{
std::array<double, NumOutputs> returnValue;
for(std::size_t o = 0; o < NumOutputs; ++o)
returnValue[o] = m_outputNeurons[o].GetOutput();
return returnValue;
}
void SetInputValue(const std::size_t index, const double value)
{
for(auto& hiddenNeuron : m_hiddenNeurons)
hiddenNeuron.SetInputValue(index, value);
}
std::array<double,NumOutputs> Calculate()
{
for(auto& h : m_hiddenNeurons)
h.CalculateOutput();
for(auto& o : m_outputNeurons)
o.CalculateOutput();
return GetOutputs();
}
std::array<double,NumOutputs> FeedForward(const std::array<double,NumInputs>& inputValues)
{
for(std::size_t h = 0; h < NumHidden; ++h)//auto& hiddenNeuron : m_hiddenNeurons)
{
for(std::size_t i = 0; i < NumInputs; ++i)
m_hiddenNeurons[h].SetInputValue(i,inputValues[i]);
m_hiddenNeurons[h].CalculateOutput();
}
std::array<double, NumOutputs> returnValue;
for(std::size_t h = 0; h < NumHidden; ++h)
{
auto hiddenOutput = m_hiddenNeurons[h].GetOutput();
for(std::size_t o = 0; o < NumOutputs; ++o)
m_outputNeurons[o].SetInputValue(h, hiddenOutput);
}
for(std::size_t o = 0; o < NumOutputs; ++o)
{
returnValue[o] = m_outputNeurons[o].CalculateOutput();
}
return returnValue;
}
double BackPropagateFinal(const std::array<double,NumOutputs>& actualValues, const NeuralNetwork<NumInputs,NumHidden,NumOutputs>* NN, const double alpha, const double beta, const double lambda)
{
for(std::size_t iO = 0; iO < NumOutputs; ++iO)
{
auto y = NN->m_outputNeurons[iO].GetOutput();
auto y1 = actualValues[iO];
for(std::size_t iH = 0; iH < NumHidden; ++iH)
{
auto e = NN->m_outputNeurons[iO].GetEligibilityTrace(iH);
auto h = NN->m_hiddenNeurons[iH].GetOutput();
auto w = NN->m_outputNeurons[iO].GetWeight(iH);
double e1 = lambda * e + (y * (1.0 - y) * h);
double w1 = w + beta * (y1 - y) * e1;
m_outputNeurons[iO].SetEligibilityTrace(iH,e1);
m_outputNeurons[iO].SetWeight(iH,w1);
}
auto e = NN->m_outputNeurons[iO].GetBiasEligibility();
auto h = 1.0;
auto w = NN->m_outputNeurons[iO].GetBiasWeight();
double e1 = lambda * e + (y * (1.0 - y) * h);
double w1 = w + beta * (y1 - y) * e1;
m_outputNeurons[iO].SetBiasEligibility(e1);
m_outputNeurons[iO].SetBiasWeight(w1);
}
for(std::size_t iH = 0; iH < NumHidden; ++iH)
{
auto h = NN->m_hiddenNeurons[iH].GetOutput();
for(std::size_t iI = 0; iI < NumInputs; ++iI)
{
auto e = NN->m_hiddenNeurons[iH].GetEligibilityTrace(iI);
auto x = NN->m_hiddenNeurons[iH].GetInputValue(iI);
auto u = NN->m_hiddenNeurons[iH].GetWeight(iI);
double sumError = 0;
for(std::size_t iO = 0; iO < NumOutputs; ++iO)
{
auto w = NN->m_outputNeurons[iO].GetWeight(iH);
auto y = NN->m_outputNeurons[iO].GetOutput();
auto y1 = actualValues[iO];
auto grad = y1 - y;
double e1 = lambda * e + (y * (1.0 - y) * w * h * (1.0 - h) * x);
sumError += grad * e1;
}
double u1 = u + alpha * sumError;
m_hiddenNeurons[iH].SetEligibilityTrace(iI,sumError);
m_hiddenNeurons[iH].SetWeight(iI,u1);
}
auto e = NN->m_hiddenNeurons[iH].GetBiasEligibility();
auto x = 1.0;
auto u = NN->m_hiddenNeurons[iH].GetBiasWeight();
double sumError = 0;
for(std::size_t iO = 0; iO < NumOutputs; ++iO)
{
auto w = NN->m_outputNeurons[iO].GetWeight(iH);
auto y = NN->m_outputNeurons[iO].GetOutput();
auto y1 = actualValues[iO];
auto grad = y1 - y;
double e1 = lambda * e + (y * (1.0 - y) * w * h * (1.0 - h) * x);
sumError += grad * e1;
}
double u1 = u + alpha * sumError;
m_hiddenNeurons[iH].SetBiasEligibility(sumError);
m_hiddenNeurons[iH].SetBiasWeight(u1);
}
double retVal = 0;
for(std::size_t o = 0; o < NumOutputs; ++o)
{
retVal += 0.5 * alpha * std::pow((NN->GetOutput(o) - GetOutput(0)),2);
}
return retVal / NumOutputs;
}
double BackPropagate(const NeuralNetwork<NumInputs,NumHidden,NumOutputs>* NN, const double alpha, const double beta, const double lambda)
{
for(std::size_t iO = 0; iO < NumOutputs; ++iO)
{
auto y = NN->m_outputNeurons[iO].GetOutput();
auto y1 = m_outputNeurons[iO].GetOutput();
for(std::size_t iH = 0; iH < NumHidden; ++iH)
{
auto e = NN->m_outputNeurons[iO].GetEligibilityTrace(iH);
auto h = NN->m_hiddenNeurons[iH].GetOutput();
auto w = NN->m_outputNeurons[iO].GetWeight(iH);
double e1 = lambda * e + (y * (1.0 - y) * h);
double w1 = w + beta * (y1 - y) * e1;
m_outputNeurons[iO].SetEligibilityTrace(iH,e1);
m_outputNeurons[iO].SetWeight(iH,w1);
}
auto e = NN->m_outputNeurons[iO].GetBiasEligibility();
auto h = 1.0;
auto w = NN->m_outputNeurons[iO].GetBiasWeight();
double e1 = lambda * e + (y * (1.0 - y) * h);
double w1 = w + beta * (y1 - y) * e1;
m_outputNeurons[iO].SetBiasEligibility(e1);
m_outputNeurons[iO].SetBiasWeight(w1);
}
for(std::size_t iH = 0; iH < NumHidden; ++iH)
{
auto h = NN->m_hiddenNeurons[iH].GetOutput();
for(std::size_t iI = 0; iI < NumInputs; ++iI)
{
auto e = NN->m_hiddenNeurons[iH].GetEligibilityTrace(iI);
auto x = NN->m_hiddenNeurons[iH].GetInputValue(iI);
auto u = NN->m_hiddenNeurons[iH].GetWeight(iI);
double sumError = 0;
for(std::size_t iO = 0; iO < NumOutputs; ++iO)
{
auto w = NN->m_outputNeurons[iO].GetWeight(iH);
auto y = NN->m_outputNeurons[iO].GetOutput();
auto y1 = m_outputNeurons[iO].GetOutput();
auto grad = y1 - y;
double e1 = lambda * e + (y * (1.0 - y) * w * h * (1.0 - h) * x);
sumError += grad * e1;
}
double u1 = u + alpha * sumError;
m_hiddenNeurons[iH].SetEligibilityTrace(iI,sumError);
m_hiddenNeurons[iH].SetWeight(iI,u1);
}
auto e = NN->m_hiddenNeurons[iH].GetBiasEligibility();
auto x = 1.0;
auto u = NN->m_hiddenNeurons[iH].GetBiasWeight();
double sumError = 0;
for(std::size_t iO = 0; iO < NumOutputs; ++iO)
{
auto w = NN->m_outputNeurons[iO].GetWeight(iH);
auto y = NN->m_outputNeurons[iO].GetOutput();
auto y1 = m_outputNeurons[iO].GetOutput();
auto grad = y1 - y;
double e1 = lambda * e + (y * (1.0 - y) * w * h * (1.0 - h) * x);
sumError += grad * e1;
}
double u1 = u + alpha * sumError;
m_hiddenNeurons[iH].SetBiasEligibility(sumError);
m_hiddenNeurons[iH].SetBiasWeight(u1);
}
double retVal = 0;
for(std::size_t o = 0; o < NumOutputs; ++o)
{
retVal += 0.5 * alpha * std::pow((NN->GetOutput(o) - GetOutput(0)),2);
}
return retVal / NumOutputs;
}
std::array<double,NumInputs*NumHidden+NumHidden+NumHidden*NumOutputs+NumOutputs> GetNetworkWeights() const
{
std::array<double,NumInputs*NumHidden+NumHidden+NumHidden*NumOutputs+NumOutputs> returnVal;
std::size_t weightPos = 0;
for(std::size_t h = 0; h < NumHidden; ++h)
{
for(std::size_t i = 0; i < NumInputs; ++i)
returnVal[weightPos++] = m_hiddenNeurons[h].GetWeight(i);
returnVal[weightPos++] = m_hiddenNeurons[h].GetBiasWeight();
}
for(std::size_t o = 0; o < NumOutputs; ++o)
{
for(std::size_t h = 0; h < NumHidden; ++h)
returnVal[weightPos++] = m_outputNeurons[o].GetWeight(h);
returnVal[weightPos++] = m_outputNeurons[o].GetBiasWeight();
}
return returnVal;
}
static constexpr std::size_t NumWeights = NumInputs*NumHidden+NumHidden+NumHidden*NumOutputs+NumOutputs;
void SetNetworkWeights(const std::array<double,NumInputs*NumHidden+NumHidden+NumHidden*NumOutputs+NumOutputs>& weights)
{
std::size_t weightPos = 0;
for(std::size_t h = 0; h < NumHidden; ++h)
{
for(std::size_t i = 0; i < NumInputs; ++i)
m_hiddenNeurons[h].SetWeight(i, weights[weightPos++]);
m_hiddenNeurons[h].SetBiasWeight(weights[weightPos++]);
}
for(std::size_t o = 0; o < NumOutputs; ++o)
{
for(std::size_t h = 0; h < NumHidden; ++h)
m_outputNeurons[o].SetWeight(h, weights[weightPos++]);
m_outputNeurons[o].SetBiasWeight(weights[weightPos++]);
}
}
void ResetEligibilityTraces()
{
for(auto& h : m_hiddenNeurons)
h.ResetEligibilityTraces();
for(auto& o : m_outputNeurons)
o.ResetEligibilityTraces();
}
private:
std::array<Neuron<NumInputs>,NumHidden> m_hiddenNeurons;
std::array<Neuron<NumHidden>,NumOutputs> m_outputNeurons;
};
I believe one of the places I may have an issue is the BackPropagate and BackPropagateFinal methods in the Neural Network class.
Here's my main function that is training the network:
int main()
{
std::ofstream matchFile("match.txt");
RandomGenerator randomPlayerStart(0,1);
RandomGenerator randomMove(0,100);
Board<7,6,4> board;
auto NN = new NeuralNetwork<7*6*4+2,417,1>();
auto previousNN = new NeuralNetwork<7*6*4+2,417,1>();
NN->RandomiseWeights();
const int numGames = 3000000;
double alpha = 0.1;
double beta = 0.1;
double lambda = 0.5;
double learningRateFloor = 0.01;
double decayRateAlpha = (alpha - learningRateFloor) / numGames;
double decayRateBeta = (beta - learningRateFloor) / numGames;
double randomChance = 90; // out of 100
double randomChangeFloor = 10;
double percentToReduceRandomOver = 0.5;
double randomChangeDecay = (randomChance-randomChangeFloor) / (numGames*percentToReduceRandomOver);
double percentOfGamesToRandomiseStart = 0.5;
int numGamesWonP1 = 0;
int numGamesWonP2 = 0;
int gamesToOutput = 100;
matchFile << "Num Games: " << numGames << "\t\ta,b,l: " << alpha << ", " << beta << ", " << lambda << std::endl;
Board<7,6,4>::Player playerStart = randomPlayerStart() > 0.5 ? Board<7,6,4>::Player1 : Board<7,6,4>::Player2;
double totalLoss = 0.0;
for(int gameNumber = 0; gameNumber < numGames; ++gameNumber)
{
bool winState = false;
Board<7,6,4>::Player playerWhoTurnItIs = playerStart;
playerStart = playerStart == Board<7,6,4>::Player1 ? Board<7,6,4>::Player2 : Board<7,6,4>::Player1;
board.ClearBoard();
int turnNumber = 0;
while(!winState)
{
Board<7,6,4>::Player playerWhoTurnItIsNot = playerWhoTurnItIs == Board<7,6,4>::Player1 ? Board<7,6,4>::Player2 : Board<7,6,4>::Player1;
bool wasRandomMove = false;
std::size_t selectedMove;
bool moveFound = false;
if(board.IsThereAvailableMove())
{
std::vector<std::size_t> availableMoves;
if((gameNumber <= numGames * percentOfGamesToRandomiseStart && turnNumber == 0) || randomMove() > 100.0-randomChance)
wasRandomMove = true;
std::size_t bestMove = 8;
double bestWorstResponse = playerWhoTurnItIs == Board<7,6,4>::Player1 ? std::numeric_limits<double>::min() : std::numeric_limits<double>::max();
for(std::size_t m = 0; m < 7; ++m)
{
Board<7,6,4> testBoard = board; // make a copy of the current board to run our tests
if(testBoard.AvailableMoveInColumn(m))
{
if(wasRandomMove)
{
availableMoves.push_back(m);
}
testBoard.AddChecker(m, playerWhoTurnItIs);
double worstResponse = playerWhoTurnItIs == Board<7,6,4>::Player1 ? std::numeric_limits<double>::max() : std::numeric_limits<double>::min();
std::size_t worstMove = 8;
for(std::size_t m2 = 0; m2 < 7; ++m2)
{
Board<7,6,4> testBoard2 = testBoard;
if(testBoard2.AvailableMoveInColumn(m2))
{
testBoard2.AddChecker(m,playerWhoTurnItIsNot);
StateType state;
create_board_state(state, testBoard2, playerWhoTurnItIs);
auto outputs = NN->FeedForward(state);
if(playerWhoTurnItIs == Board<7,6,4>::Player1 && (outputs[0] < worstResponse || worstMove == 8))
{
worstResponse = outputs[0];
worstMove = m2;
}
else if(playerWhoTurnItIs == Board<7,6,4>::Player2 && (outputs[0] > worstResponse || worstMove == 8))
{
worstResponse = outputs[0];
worstMove = m2;
}
}
}
if(playerWhoTurnItIs == Board<7,6,4>::Player1 && (worstResponse > bestWorstResponse || bestMove == 8))
{
bestWorstResponse = worstResponse;
bestMove = m;
}
else if(playerWhoTurnItIs == Board<7,6,4>::Player2 && (worstResponse < bestWorstResponse || bestMove == 8))
{
bestWorstResponse = worstResponse;
bestMove = m;
}
}
}
if(bestMove == 8)
{
std::cerr << "wasn't able to determine the best move to make" << std::endl;
return 0;
}
if(gameNumber <= numGames * percentOfGamesToRandomiseStart && turnNumber == 0)
{
std::size_t rSelection = int(randomMove()) % (availableMoves.size());
selectedMove = availableMoves[rSelection];
moveFound = true;
}
else if(wasRandomMove)
{
std::remove(availableMoves.begin(),availableMoves.end(),bestMove);
std::size_t rSelection = int(randomMove()) % (availableMoves.size());
selectedMove = availableMoves[rSelection];
moveFound = true;
}
else
{
selectedMove = bestMove;
moveFound = true;
}
}
StateType prevState;
create_board_state(prevState,board,playerWhoTurnItIs);
NN->FeedForward(prevState);
*previousNN = *NN;
// now that we have the move, add it to the board
StateType state;
board.AddChecker(selectedMove,playerWhoTurnItIs);
create_board_state(state,board,playerWhoTurnItIsNot);
auto outputs = NN->FeedForward(state);
if(board.InARowConnected(4) == Board<7,6,4>::Player1)
{
totalLoss += NN->BackPropagateFinal({1},previousNN,alpha,beta,lambda);
winState = true;
++numGamesWonP1;
}
else if(board.InARowConnected(4) == Board<7,6,4>::Player2)
{
totalLoss += NN->BackPropagateFinal({-1},previousNN,alpha,beta,lambda);
winState = true;
++numGamesWonP2;
}
else if(!board.IsThereAvailableMove())
{
totalLoss += NN->BackPropagateFinal({0},previousNN,alpha,beta,lambda);
winState = true;
}
else if(turnNumber > 0 && !wasRandomMove)
{
NN->BackPropagate(previousNN,alpha,beta,lambda);
}
if(!wasRandomMove)
{
outputs = NN->FeedForward(state);
}
++turnNumber;
playerWhoTurnItIs = playerWhoTurnItIsNot;
}
alpha -= decayRateAlpha;
beta -= decayRateBeta;
NN->ResetEligibilityTraces();
if(gameNumber > 0 && randomChance > randomChangeFloor && gameNumber <= numGames * percentToReduceRandomOver)
{
randomChance -= randomChangeDecay;
if(randomChance < randomChangeFloor)
randomChance = randomChangeFloor;
}
if(gameNumber % gamesToOutput == 0 && gameNumber != 0)
{
totalLoss = totalLoss / gamesToOutput;
matchFile << std::fixed << std::setprecision(51) << totalLoss << std::endl;
totalLoss = 0.0;
}
}
matchFile << std::endl << "Games won: " << numGamesWonP1 << " . " << numGamesWonP2 << std::endl;
auto weights = NN->GetNetworkWeights();
matchFile << std::endl;
matchFile << std::endl;
for(const auto& w : weights)
matchFile << std::fixed << std::setprecision(51) << w << ", \n";
matchFile << std::endl;
return 0;
}
One place I think I may have an issue is the minimax that's choosing the best move to make.
There's a few additional pieces that I don't think are too pertinent to the issues I'm having.
The Problems
It doesn't seem to matter whether I train 1000 games or 3000000 games, either Player 1 or Player 2 will win the vast majority of games. To the point of like 90 out of 100 games won by one player. If I output the actual individual game moves and outputs I can see that the games won by the other player are almost always the result of a lucky random move.
At the same time, I notice that the prediction outputs sort of "favour" a player. I.e. the outputs seem to be on the negative side of 0, so Player 1 is always making the best moves it can for example, but they all seem to be predicted toward Player 2 winning.
Sometimes it's Player 1 who wins majority, other times it's Player 2. I'm assuming that this is due to the random weights initialising
slight toward one player.
The first game or so doesn't favour one player over the other, but it very quickly starts to "lean" one way.
I've tried training now over 3000000 games, that took 3 days, but the network still doesn't seem to be able to make good decisions. I've tested the network by having it play other "bots" on riddles.io Connect 4 comp.
It fails to recognise that it needs to block the opponents 4 in a row
It, even after 3000000 games, doesn't play the centre column as the first move, which we know is the only starting move you can make that will guarantee a win.
Any help and direction would be greatly appreciated. Specifically, is my implementation of TD-Lambda back-propagation correct?

Why does adding a potential exception result in slower performance even if never thrown?

I copied the code of Moro's Inverse Cumulative function of Normal distribution, where I added an extra error handling to deal with the input that is out of range (0,1). When I tested the code with all valid inputs, the speed is slowed down about 25% (6ns vs 8ns). I'm quite curious that why even no execution the throw exception still slow down the performance?
Code of Inverse function:
double Inverse(double u)
{
double a[4] = {
2.50662823884,
-18.61500062529,
41.39119773534,
-25.44106049637 };
double b[4] = {
-8.47351093090,
23.08336743743,
-21.06224101826,
3.13082909833 };
double c[9] = {
0.3374754822726147,
0.9761690190917186,
0.1607979714918209,
0.0276438810333863,
0.0038405729373609,
0.0003951896511919,
0.0000321767881768,
0.0000002888167364,
0.0000003960315187 };
// Assert( 0 < u && u < 1 );
if (u >= 1 || u <= 0){
throw std::invalid_argument("Input out of range.");
}
/*return the inverse of cumulative normal distribution fonction*/
double x, r;
x = u - 0.5;
if (fabs(x)<0.42) {
r = x*x;
r = x*(((a[3] * r + a[2])*r + a[1])*r + a[0]) / ((((b[3] * r + b[2])*r + b[1])*r + b[0])*r + 1.0);
return(r);
}
r = u;
if (x>0.0) r = 1 - u;
r = log(-log(r));
r = c[0] + r*(c[1] + r*(c[2] + r*(c[3] + r*(c[4] + r*(c[5] + r*(c[6] + r*(c[7] + r*c[8])))))));
if (x<0.0) r = -r;
return(r);
}
And the test code is
int m = 1000000, n = 1000;
double z, p, dz = 1.0 / double(m) / double(n);
clock_t t1, t2;
t1 = clock();
z = 1e-9;
p = 0.0;
for (int i = 0; i < m; i++)
{
for (int j = 0; j < n; j++)
{
z += dz * 0.999;
p = Inverse(z);
}
}
t2 = clock();
cout << z << '\t' << p << endl;
cout << "Inverse" << '\t' << float(t2 - t1) / CLOCKS_PER_SEC << endl;
Even if the exception is not executed, the computer still has to check the value of the variable 'u' in the condition, which takes time.

What to do with negative rho values in hough transform?

Here is my code for creating the hough accumulator for lines in image :
void hough_lines_acc(cv::Mat img_a_edges, std::vector<std::vector<int> > &hough_acc) {
for (size_t r = 0; r < img_a_edges.rows; r++) {
for (size_t c = 0; c < img_a_edges.cols; c++) {
int theta = static_cast<int> (std::atan2(r, c) * 180 / M_PI);
int rho = static_cast<int> ((c * cos(theta)) + (r * sin(theta)));
if (theta < -90) theta = -90;
if (theta > 89) theta = 89;
++hough_acc[abs(rho)][theta];
}
}
cv::Mat img_mat(hough_acc.size(), hough_acc[0].size(), CV_8U);
std::cout << hough_acc.size() << " " << hough_acc[0].size() << std::endl;
for (size_t i = 0; i < hough_acc.size(); i++) {
for (size_t j = 0; j < hough_acc[0].size(); j++) {
img_mat.at<int> (i,j) = hough_acc[i][j];
}
}
imwrite("../output/ps1-­2-­b-­1.png", img_mat);
}
theta varies from -90 to 89. I am getting negative rho values. Right now I am just replacing the negative who with a positive one but am not getting a correct answer. What do I do to the negative rho? Please explain the answer.
theta = arctan (y / x)
rho = x * cos(theta) + y * sin(theta)
Edited code :
bool hough_lines_acc(cv::Mat img_a_edges, std::vector<std::vector<int> > &hough_acc,\
std::vector<double> thetas, std::vector<double> rhos, int rho_resolution, int theta_resolution) {
int img_w = img_a_edges.cols;
int img_h = img_a_edges.rows;
int max_votes = 0;
int min_votes = INT_MAX;
for (size_t r = 0; r < img_h; r++) {
for (size_t c = 0; c < img_w; c++) {
if(img_a_edges.at<int>(r, c) == 255) {
for (size_t i = 0; i < thetas.size(); i++) {
thetas[i] = (thetas[i] * M_PI / 180);
double rho = ( (c * cos(thetas[i])) + (r * sin(thetas[i])) );
int buff = ++hough_acc[static_cast<int>(abs(rho))][static_cast<int>(i)];
if (buff > max_votes) {
max_votes = buff;
}
if (buff < min_votes) {
min_votes = buff;
}
}
}
}
}
double div = static_cast<double>(max_votes) / 255;
int threshold = 10;
int possible_edge = round(static_cast<double>(max_votes) / div) - threshold;
props({
{"max votes", max_votes},
{"min votes", min_votes},
{"scale", div}
});
// needed for scaling intensity for contrast
// not sure if I am doing it correctly
for (size_t r = 0; r < hough_acc.size(); r++) {
for (size_t c = 0; c < hough_acc[0].size(); c++) {
double val = hough_acc[r][c] / div;
if (val < 0) {
val = 0;
}
hough_acc[r][c] = static_cast<int>(val);
}
}
cv::Mat img_mat = cv::Mat(hough_acc.size(), hough_acc[0].size(), CV_8UC1, cv::Scalar(0));
for (size_t i = 0; i < hough_acc.size(); i++) {
for (size_t j = 0; j < hough_acc[0].size(); j++) {
img_mat.at<uint8_t> (i,j) = static_cast<uint8_t>(hough_acc[i][j]);
}
}
imwrite("../output/ps1-­2-­b-­1.png", img_mat);
return true;
}
Still not correct output. What is the error here?
atan2 of two positive numbers... should not be giving you negative angles, it should only be giving you a range of 0-90
also for the hough transform, I think you want everything relative to one point (ie 0,0 in this case). I think for that you would actually want to make theta=90-atan2(r,c)
Admittedly though, I am a bit confused as I thought you had to encode line direction, rather than just "edge pt". ie I thought at each edge point you had to provide a discrete array of guessed line trajectories and calculate rho and theta for each one and throw all of those into your accumulator. As is... I am not sure what you are calculating.