I have a question about MLP Backpropagation - c++

I'm studying MLP reversal while reading this, but there's nothing I'm curious about, so I'm posting questions
In the code above, It used W, V, c, and b as parameters for the train function, but I don't understand why we use random and what it means.
Also, I don't know why I resize and make_pair the value to be handed over to the training function in the training set regarding the vector.
To summarize the questions,
What the parameters of W,V,c,b mean and why rand() is written in the main function
Reasons for using pair, resize, and make_pair in Trainset vectors, which are data for training neural networks
#include <iostream>
#include <vector>
#include <math.h>
#include <time.h>
using namespace std;
#define Train_Set_Size 20
#define PI 3.141592653589793238463
#define N 5
#define epsilon 0.05
#define epoch 50000
double c[N] = {};
double W[N] = {};
double V[N] = {};
double b = 0;
double sigmoid(double x) {
return (1.0f / (1.0f + std::exp(-x)));
}
double f_theta(double x) {
double result = b;
for (int i = 0; i < N; i++) {
result += V[i] * sigmoid(c[i] + W[i] * x);
}
return result;
}
void train(double x, double y) {
for (int i = 0; i < N; i++) {
W[i] = W[i] - epsilon * 2 * (f_theta(x) - y) * V[i] * x *
(1 - sigmoid(c[i] + W[i] * x)) * sigmoid(c[i] + W[i] * x);
}
for (int i = 0; i < N; i++) {
V[i] = V[i] - epsilon * 2 * (f_theta(x) - y) * sigmoid(c[i] + W[i] * x);
}
b = b - epsilon * 2 * (f_theta(x) - y);
for (int i = 0; i < N; i++) {
c[i] = c[i] - epsilon * 2 * (f_theta(x) - y) * V[i] *
(1 - sigmoid(c[i] + W[i] * x)) * sigmoid(c[i] + W[i] * x);
}
}
int main() {
srand(time(NULL));
for (int i = 0; i < N; i++) {
W[i] = 2 * rand() / RAND_MAX -1;
V[i] = 2 * rand() / RAND_MAX -1;
c[i] = 2 * rand() / RAND_MAX -1;
}
vector<pair<double, double>> trainSet;
trainSet.resize(Train_Set_Size);
for (int i = 0; i < Train_Set_Size; i++) {
trainSet[i] = make_pair(i * 2 * PI / Train_Set_Size, sin(i * 2 * PI / Train_Set_Size));
}
for (int j = 0; j < epoch; j++) {
for (int i = 0; i < Train_Set_Size; i++) {
train(trainSet[i].first, trainSet[i].second);
}
std::cout << j << "\r";
}
//Plot the results
vector<float> x;
vector<float> y1, y2;
for (int i = 0; i < 1000; i++) {
x.push_back(i * 2 * PI / 1000);
y1.push_back(sin(i * 2 * PI / 1000));
y2.push_back(f_theta(i * 2 * PI / 1000));
}
FILE * gp = _popen("gnuplot", "w");
fprintf(gp, "set terminal wxt size 600,400 \n");
fprintf(gp, "set grid \n");
fprintf(gp, "set title '%s' \n", "f(x) = sin (x)");
fprintf(gp, "set style line 1 lt 3 pt 7 ps 0.1 lc rgb 'green' lw 1 \n");
fprintf(gp, "set style line 2 lt 3 pt 7 ps 0.1 lc rgb 'red' lw 1 \n");
fprintf(gp, "plot '-' w p ls 1, '-' w p ls 2 \n");
//Exact f(x) = sin(x) -> Green Graph
for (int k = 0; k < x.size(); k++) {
fprintf(gp, "%f %f \n", x[k], y1[k]);
}
fprintf(gp, "e\n");
//Neural Network Approximate f(x) = sin(x) -> Red Graph
for (int k = 0; k < x.size(); k++) {
fprintf(gp, "%f %f \n", x[k], y2[k]);
}
fprintf(gp, "e\n");
fflush(gp);
system("pause");
_pclose(gp);
return 0;
}

Related

Unsharp Masking opencv c++

I am new to opencv and I am performing unsharp masking by using using this criteria Image+(K*(Image-low pass filter)),however ,the resultant image may have values <0 or >255,i need to write a loop to scale that down.
I tried to write one but seemingly its in correct.
Here are the errors.
a) (k * (float)(src.at(y, x) - res.at(y, x))) can be negative, it is incorrect to do (uchar)(k * (float)(src.at(y, x) - res.at(y, x))).
b) src.at(y,x) + (k * (float)(src.at(y, x) - res.at(y, x))) can be greater than 255 and can be smaller than 0.
Can someone help me fix this,thanks in advance
#include <opencv2/imgcodecs.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/imgproc.hpp>
#include <iostream>
#include <stdlib.h>
#include <math.h>
using namespace std;
using namespace cv;
//the pixel lying outside the image i.e. (x – j, y – k) are reflected back into the image
int reflect(int M, int x)
{
if (x < 0)
{
return -x - 1;
}
if (x >= M)
{
return 2 * M - x - 1;
}
return x;
}
int circular(int M, int x)
{
if (x < 0)
return x + M;
if (x >= M)
return x - M;
return x;
}
void noBorderProcessing(Mat src, Mat res, float Kernel[][3])
{
float sum;
for (int y = 1; y < src.rows - 1; y++) {
for (int x = 1; x < src.cols - 1; x++) {
sum = 0.0;
for (int k = -1; k <= 1; k++) {
for (int j = -1; j <= 1; j++) {
sum = sum + Kernel[j + 1][k + 1] * src.at<uchar>(y - j, x - k);
}
}
res.at<uchar>(y, x) = sum;
}
}
}
void refletedIndexing(Mat src, Mat res, float Kernel[][3])
{
float sum, x1, y1;
for (int y = 0; y < src.rows; y++) {
for (int x = 0; x < src.cols; x++) {
sum = 0.0;
for (int k = -1; k <= 1; k++) {
for (int j = -1; j <= 1; j++) {
x1 = reflect(src.cols, x - j);
y1 = reflect(src.rows, y - k);
sum = sum + Kernel[j + 1][k + 1] * src.at<uchar>(y1, x1);
}
}
res.at<uchar>(y, x) = sum;
}
}
}
//coordinates that exceed the bounds of the image wrap around to the opposite side
void circularIndexing(Mat src, Mat res, float Kernel[][3])
{
float sum, x1, y1;
for (int y = 0; y < src.rows; y++) {
for (int x = 0; x < src.cols; x++) {
sum = 0.0;
for (int k = -1; k <= 1; k++) {
for (int j = -1; j <= 1; j++) {
x1 = circular(src.cols, x - j);
y1 = circular(src.rows, y - k);
sum = sum + Kernel[j + 1][k + 1] * src.at<uchar>(y1, x1);
}
}
res.at<uchar>(y, x) = sum;
}
}
}
int main()
{
Mat src, res,dst;
/// Load an image
src = cv::imread("Images4DZ/Gray_Win.bmp", cv::IMREAD_ANYDEPTH);
//low pass filtering
float Kernel[3][3] = {
{1 / 9.0, 1 / 9.0, 1 / 9.0},
{1 / 9.0, 1 / 9.0, 1 / 9.0},
{1 / 9.0, 1 / 9.0, 1 / 9.0}
};
res = src.clone();
for (int y = 0; y < src.rows; y++)
for (int x = 0; x < src.cols; x++)
res.at<uchar>(y, x) = 0.0;
circularIndexing(src, res, Kernel);
//Unsharpen Masking
dst = cv::Mat::zeros(res.rows, res.cols, CV_8UC1);
float k = 0.5;
for (int y = 0; y < res.rows; y++) {
for (int x = 0; x < res.cols; x++) {
dst.at<uchar>(y, x) = src.at<uchar>(y,x) + (uchar)(k * (float)(src.at<uchar>(y, x) - res.at<uchar>(y, x)));
}
}
imshow("Source Image", src);
imshow("Low Pass Filter", res);
imshow("Unsharpen Masking", dst);
waitKey();
return 0;
}

Partial Derivative of Real Data Using Fourier Transform(FFTW) in 2D Gives Not Giving Correct Results

I am trying to calculate the partial derivative of the function Sin(x)Sin(y) along the x-direction using Fourier Transform (FFTW library in C++). I take my function from real to Fourier space (fftw_plan_dft_r2c_2d), multiply the result by -i * kappa array (wavenumber/spatial frequency), and then transform the result back into real space (fftw_plan_dft_c2r_2d). Finally, I divide the result by the size of my 2D array Nx*Ny (I need to do this based on the documentation). However, the resulting function is scaled up by some value and the amplitude is not 1.
'''
#define Nx 360
#define Ny 360
#define REAL 0
#define IMAG 1
#define Pi 3.14
#include <stdio.h>
#include <math.h>
#include <fftw3.h>
#include <iostream>
#include <iostream>
#include <fstream>
#include <iomanip>
int main() {
int i, j;
int Nyh = Ny / 2 + 1;
double k1;
double k2;
double *signal;
double *result2;
double *kappa1;
double *kappa2;
double df;
fftw_complex *result1;
fftw_complex *dfhat;
signal = (double*)fftw_malloc(sizeof(double) * Nx * Ny );
result2 = (double*)fftw_malloc(sizeof(double) * Nx * Ny );
kappa1 = (double*)fftw_malloc(sizeof(double) * Nx * Nyh );
result1 = (fftw_complex*)fftw_malloc(sizeof(fftw_complex) * Nx * Nyh);
dfhat = (fftw_complex*)fftw_malloc(sizeof(fftw_complex) * Nx * Nyh);
for (int i = 0; i < Nx; i++){
if ( i < Nx/2 + 1)
k1 = 2 * Pi * (-Nx + i) / Nx;
else
k1 = 2 * Pi * i / Nx;
for (int j = 0; j < Nyh; j++){
kappa1[j + Nyh * i] = k1;
}
}
fftw_plan plan1 = fftw_plan_dft_r2c_2d(Nx,Ny,signal,result1,FFTW_ESTIMATE);
fftw_plan plan2 = fftw_plan_dft_c2r_2d(Nx,Ny,dfhat,result2,FFTW_ESTIMATE);
for (int i=0; i < Nx; i++){
for (int j=0; j < Ny; j++){
double x = (double)i / 180 * (double) Pi;
double y = (double)j / 180 * (double) Pi;
signal[j + Ny * i] = sin(x) * sin(y);
}
}
fftw_execute(plan1);
for (int i = 0; i < Nx; i++) {
for (int j = 0; j < Nyh; j++) {
dfhat[j + Nyh * i][REAL] = 0;
dfhat[j + Nyh * i][IMAG] = 0;
dfhat[j + Nyh * i][IMAG] = -result1[j + Nyh * i][REAL] * kappa1[j + Nyh * i];
dfhat[j + Nyh * i][REAL] = result1[j + Nyh * i][IMAG] * kappa1[j + Nyh * i];
}
}
fftw_execute(plan2);
for (int i = 0; i < Nx; i++) {
for (int j = 0; j < Ny; j++) {
result2[j + Ny * i] /= (Nx*Ny);
}
}
for (int j = 0; j < Ny; j++) {
for (int i = 0; i < Nx; i++) {
std::cout <<std::setw(15)<< result2[j + Ny * i];
}
std::cout << std::endl;
}
fftw_destroy_plan(plan1);
fftw_destroy_plan(plan2);
fftw_free(result1);
fftw_free(dfhat);
return 0;
}
'''
This is how the result looks
I have defined my kappa array to have Nx * Ny/2+1 elements and have split the array at the center (as all references suggest). But it does not work. I would appreciate any help!

C++ neural network implemented from scratch cannot get above 50% on MNIST

So I have implemented a fully connected one hidden layer neural network in C++ using Eigen for matrix multiplication. It uses minibatch gradient descent.
However, my model cannot get above 50% accuracy on mnist. I have tried learning rates from between 0.0001 and 10. The model does overfit on training sizes < 100 (with ~90% accuracy which is still pretty bad), albeit extremely slowly.
What might be causing this low accuracy and extremely slow learning? My main concern is that the backpropagation is incorrect. Furthermore, I would prefer not to add any other optimization techniques (learning rate schedule, regularization, etc.).
Feed forward and backprop code:
z1 = (w1 * mbX).colwise() + b1;
a1 = sigmoid(z1);
z2 = (w2 * a1).colwise() + b2;
a2 = sigmoid(z2);
MatrixXd err = ((double) epsilon)/((double) minibatch_size) * ((a2 - mbY).array() * sigmoid_derivative(z2).array()).matrix();
b2 = b2 - err * ones;
w2 = w2 - (err * a1.transpose());
err = ((w2.transpose() * err).array() * sigmoid_derivative(z1).array()).matrix();
b1 = b1 - err * ones;
w1 = w1 - (err * mbX.transpose());
Full program code:
#include <iostream>
#include <fstream>
#include <math.h>
#include <cstdlib>
#include <Eigen/Dense>
#include <vector>
#include <string>
using namespace Eigen;
#define N 30
#define epsilon 0.7
#define epoch 1000
//sizes
const int minibatch_size = 10;
const int training_size = 10000;
const int val_size = 10;
unsigned int num, magic, rows, cols;
//images
unsigned int image[training_size][28][28];
unsigned int val_image[val_size][28][28];
//labels
unsigned int label[training_size];
unsigned int val_label[val_size];
//inputs
MatrixXd X(784, training_size);
MatrixXd Y = MatrixXd::Zero(10, training_size);
//minibatch
MatrixXd mbX(784, minibatch_size);
MatrixXd mbY = MatrixXd::Zero(10, minibatch_size);
//validation
MatrixXd Xv(784, val_size);
MatrixXd Yv = MatrixXd::Zero(10, val_size);
//Image processing courtesy of https://stackoverflow.com/users/11146076/%e5%bc%a0%e4%ba%91%e9%93%ad
unsigned int in(std::ifstream& icin, unsigned int size) {
unsigned int ans = 0;
for (int i = 0; i < size; i++) {
unsigned char x;
icin.read((char*)&x, 1);
unsigned int temp = x;
ans <<= 8;
ans += temp;
}
return ans;
}
void input(std::string ipath, std::string lpath, std::string ipath2, std::string lpath2) {
std::ifstream icin;
//training data
icin.open(ipath, std::ios::binary);
magic = in(icin, 4), num = in(icin, 4), rows = in(icin, 4), cols = in(icin, 4);
for (int i = 0; i < training_size; i++) {
int val = 0;
for (int x = 0; x < rows; x++) {
for (int y = 0; y < cols; y++) {
image[i][x][y] = in(icin, 1);
X(val, i) = image[i][x][y]/255;
val++;
}
}
}
icin.close();
//training labels
icin.open(lpath, std::ios::binary);
magic = in(icin, 4), num = in(icin, 4);
for (int i = 0; i < training_size; i++) {
label[i] = in(icin, 1);
Y(label[i], i) = 1;
}
icin.close();
//validation data
icin.open(ipath2, std::ios::binary);
magic = in(icin, 4), num = in(icin, 4), rows = in(icin, 4), cols = in(icin, 4);
for (int i = 0; i < val_size; i++) {
int val = 0;
for (int x = 0; x < rows; x++) {
for (int y = 0; y < cols; y++) {
val_image[i][x][y] = in(icin, 1);
Xv(val, i) = val_image[i][x][y]/255;
val++;
}
}
}
icin.close();
//validation labels
icin.open(lpath2, std::ios::binary);
magic = in(icin, 4), num = in(icin, 4);
for (int i = 0; i < val_size; i++) {
val_label[i] = in(icin, 1);
Yv(val_label[i], i) = 1;
}
icin.close();
}
//Neural Network calculations
MatrixXd sigmoid(MatrixXd m) {
m *= -1;
return (1/(1 + m.array().exp())).matrix();
}
MatrixXd sigmoid_derivative(MatrixXd m) {
return (sigmoid(m).array() * (1 - sigmoid(m).array())).matrix();
}
//Initialize weights and biases
//hidden layer
VectorXd b1 = MatrixXd::Zero(N, 1);
MatrixXd w1 = MatrixXd::Random(N, 784);
//output
VectorXd b2 = MatrixXd::Zero(10, 1);
MatrixXd w2 = MatrixXd::Random(10, N);
//Initialize intermediate values
MatrixXd z1, z2, a1, a2, z1v, z2v, a1v, a2v;
MatrixXd ones = MatrixXd::Constant(minibatch_size, 1, 1);
int main() {
input("C:\\Users\\Aaron\\Documents\\Test\\train-images-idx3-ubyte\\train-images.idx3-ubyte", "C:\\Users\\Aaron\\Documents\\Test\\train-labels-idx1-ubyte\\train-labels.idx1-ubyte", "C:\\Users\\Aaron\\Documents\\Test\\t10k-images-idx3-ubyte\\t10k-images.idx3-ubyte", "C:\\Users\\Aaron\\Documents\\Test\\t10k-labels-idx1-ubyte\\t10k-labels.idx1-ubyte");
std::cout << "Finished Image Processing" << std::endl;
//std::cout << w1 << std::endl;
std::vector<double> val_ac;
std::vector<double> c;
std::vector<int> order;
for (int i = 0; i < training_size; i++) {
order.push_back(i);
}
for (int i = 0; i < epoch; i++) {
//feed forward
std::random_shuffle(order.begin(), order.end());
for (int j = 0; j < training_size/minibatch_size; j++) {
for (int k = 0; k < minibatch_size; k++) {
int index = order[j * minibatch_size + k];
mbX.col(k) = X.col(index);
mbY.col(k) = Y.col(index);
}
z1 = (w1 * mbX).colwise() + b1;
a1 = sigmoid(z1);
z2 = (w2 * a1).colwise() + b2;
a2 = sigmoid(z2);
MatrixXd err = ((double) epsilon)/((double) minibatch_size) * ((a2 - mbY).array() * sigmoid_derivative(z2).array()).matrix();
//std::cout << err << std::endl;
b2 = b2 - err * ones;
w2 = w2 - (err * a1.transpose());
err = ((w2.transpose() * err).array() * sigmoid_derivative(z1).array()).matrix();
//std::cout << err << std::endl;
b1 = b1 - err * ones;
w1 = w1 - (err * mbX.transpose());
}
//validation
z1 = (w1 * X).colwise() + b1;
a1 = sigmoid(z1);
z2 = (w2 * a1).colwise() + b2;
a2 = sigmoid(z2);
double cost = 1/((double) training_size) * ((a2 - Y).array() * (a2 - Y).array()).matrix().sum();
c.push_back(cost);
int correct = 0;
for (int i = 0; i < training_size; i++) {
double maxP = -1;
int na;
for (int j = 0; j < 10; j++) {
if (a2(j, i) > maxP) {
maxP = a2(j, i);
na = j;
}
}
if (na == label[i]) correct++;
}
val_ac.push_back(((double) correct) / ((double) training_size));
std::cout << "Finished Epoch " << i + 1 << std::endl;
std::cout << "Cost: " << cost << std::endl;
std::cout << "Accuracy: " << ((double) correct) / ((double) training_size) << std::endl;
}
//plot accuracy
FILE * gp = _popen("gnuplot", "w");
fprintf(gp, "set terminal wxt size 600,400 \n");
fprintf(gp, "set grid \n");
fprintf(gp, "set title '%s' \n", "NN");
fprintf(gp, "plot '-' w line, '-' w lines \n");
for (int i = 0; i < epoch; i++) {
fprintf(gp, "%f %f \n", i + 1.0, c[i]);
}
fprintf(gp, "e\n");
//validation accuracy
for (int i = 0; i < epoch; i++) {
fprintf(gp, "%f %f \n", i + 1.0, val_ac[i]);
}
fprintf(gp, "e\n");
fflush(gp);
system("pause");
_pclose(gp);
return 0;
}
UPD
Here is a graph of the accuracy on the training dataset (green) and the loss (purple)
https://i.stack.imgur.com/Ya2yR.png
Here is a graph of the loss for the training data and validation data:
https://imgur.com/a/4gmFCrk
The loss of the validation data is increasing past a certain point, which shows signs of overfitting. However, the accuracy still remains abysmal even on the training data.
unsigned int val_image[val_size][28][28];
Xv(val, i) = val_image[i][x][y]/255;
Can you try again with Xv(val, i) = val_image[i][x][y] / 255.0;
There too:
X(val, i) = image[i][x][y]/255;
With the code as written, Xv is 0 very often, and 1, when the image as value 255. With a floating point division, you'll get value between 0.0 and 1.0.
You'll need to check your code for other places where you may be dividing integers.
N.b.: In C++, 240/255 is 0.

Issue with convolution kernels that add up to zero

I'm making an image editing program in c++ using sfml and tried to add image filters using:
int clamp(int value, int min, int max)
{
if (value < min)
return min;
if (value > max)
return max;
return value;
}
void MyImage::applyKernel(std::vector<std::vector<int>> kernel)
{
int index(0), tempx(0), tempy(0);
int wr(0), wg(0), wb(0), wa(0), sum(0);
auto newPixels = new sf::Uint8[this->size_y * this->size_x * 4];
// Calculate the sum of the kernel
for (int i = 0; i < kernel.size(); i++) {
for (int j = 0; j < kernel[i].size(); j++) {
sum += kernel[i][j];
}
}
for (int y = 0; y < this->size_y; y++) {
for (int x = 0; x < this->size_x; x++) {
/*
Calculate weighted sum from kernel
*/
wr = wg = wb = wa = 0;
for (int i = 0; i < kernel.size(); i++) {
for (int j = 0; j < kernel[i].size(); j++) {
/*
Calculates the coordinates of the kernel relative to the pixel we are changing
*/
tempx = x + (j - floor(kernel[i].size() / 2));
tempy = y + (i - floor(kernel.size() / 2));
//std::cout << "kernel=(" << j << ", " << i << "), pixel=(" << x << ", " << y << ") tempPos=(" << tempx << ", " << tempy << ")\n";
/*
This code below should have the effect of mirroring the image in the case the kernel coordinate is out of bounds (along the edge of the image)
*/
tempx = (tempx < 0) ? -1 * tempx : tempx;
tempy = (tempy < 0) ? -1 * tempy : tempy;
tempx = (tempx > this->size_x) ? x - (j - floor(kernel[i].size() / 2)) : tempx;
tempy = (tempy > this->size_y) ? y - (i - floor(kernel.size() / 2)) : tempy;
if (tempx >= 0 && tempx < this->size_x && tempy >= 0 && tempy < this->size_y) {
index = (((tempy * this->size_x) - tempy) + (tempx)) * 4;
wr += kernel[i][j] * this->pixels[index];
wg += kernel[i][j] * this->pixels[index + 1];
wb += kernel[i][j] * this->pixels[index + 2];
wa += kernel[i][j] * this->pixels[index + 3];
}
}
}
if (sum) {
wr /= sum;
wg /= sum;
wb /= sum;
wa /= sum;
}
index = (((y * this->size_x) - y) + (x)) * 4;
newPixels[index] = clamp(wr, 0, 255); // Red
newPixels[index + 1] = clamp(wg, 0, 255); // Green
newPixels[index + 2] = clamp(wb, 0, 255); // Blue
newPixels[index + 3] = clamp(wa, 0, 255); // Alpha
}
}
this->pixels = newPixels;
// Copies the data from our sf::Uint8 array to the image object to be displayed => Removes the overhead of calling setPixel(x,y,color) for every pixel {As a side note setPixel() should always be avoided}|
this->im->create(this->size_x, this->size_y, this->pixels);
}
I was trying to use [-1,-1,-1], [-1,8,-1]. [-1,-1,-1] for edge detection but just ended up with a white image except for some pixels near the bottom. I've tried different images and kernels out but any that add to 0 don't work. For example if I take the edge detection kernel above and change the 8 to a 9, it gives an expected result. Is there something wrong with my idea of how convolution kernels work or is it just a bug in my code?
Thank you.

Confusion testing fftw3 - poisson equation 2d test

I am having trouble explaining/understanding the following phenomenon:
To test fftw3 i am using the 2d poisson test case:
laplacian(f(x,y)) = - g(x,y) with periodic boundary conditions.
After applying the fourier transform to the equation we obtain : F(kx,ky) = G(kx,ky) /(kx² + ky²) (1)
if i take g(x,y) = sin (x) + sin(y) , (x,y) \in [0,2 \pi] i have immediately f(x,y) = g(x,y)
which is what i am trying to obtain with the fft :
i compute G from g with a forward Fourier transform
From this i can compute the Fourier transform of f with (1).
Finally, i compute f with the backward Fourier transform (without forgetting to normalize by 1/(nx*ny)).
In practice, the results are pretty bad?
(For instance, the amplitude for N = 256 is twice the amplitude obtained with N = 512)
Even worse, if i try g(x,y) = sin(x)*sin(y) , the curve has not even the same form of the solution.
(note that i must change the equation; i divide by two the laplacian in this case : (1) becomes F(kx,ky) = 2*G(kx,ky)/(kx²+ky²)
Here is the code:
/*
* fftw test -- double precision
*/
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <fftw3.h>
using namespace std;
int main()
{
int N = 128;
int i, j ;
double pi = 3.14159265359;
double *X, *Y ;
X = (double*) malloc(N*sizeof(double));
Y = (double*) malloc(N*sizeof(double));
fftw_complex *out1, *in2, *out2, *in1;
fftw_plan p1, p2;
double L = 2.*pi;
double dx = L/(N - 1);
in1 = (fftw_complex*) fftw_malloc(sizeof(fftw_complex)*(N*N) );
out2 = (fftw_complex*) fftw_malloc(sizeof(fftw_complex)*(N*N) );
out1 = (fftw_complex*) fftw_malloc(sizeof(fftw_complex)*(N*N) );
in2 = (fftw_complex*) fftw_malloc(sizeof(fftw_complex)*(N*N) );
p1 = fftw_plan_dft_2d(N, N, in1, out1, FFTW_FORWARD,FFTW_MEASURE );
p2 = fftw_plan_dft_2d(N, N, in2, out2, FFTW_BACKWARD,FFTW_MEASURE);
for(i = 0; i < N; i++){
X[i] = -pi + i*dx ;
for(j = 0; j < N; j++){
Y[j] = -pi + j*dx ;
in1[i*N + j][0] = sin(X[i]) + sin(Y[j]) ; // row major ordering
//in1[i*N + j][0] = sin(X[i]) * sin(Y[j]) ; // 2nd test case
in1[i*N + j][1] = 0 ;
}
}
fftw_execute(p1); // FFT forward
for ( i = 0; i < N; i++){ // f = g / ( kx² + ky² )
for( j = 0; j < N; j++){
in2[i*N + j][0] = out1[i*N + j][0]/ (i*i+j*j+1e-16);
in2[i*N + j][1] = out1[i*N + j][1]/ (i*i+j*j+1e-16);
//in2[i*N + j][0] = 2*out1[i*N + j][0]/ (i*i+j*j+1e-16); // 2nd test case
//in2[i*N + j][1] = 2*out1[i*N + j][1]/ (i*i+j*j+1e-16);
}
}
fftw_execute(p2); //FFT backward
// checking the results computed
double erl1 = 0.;
for ( i = 0; i < N; i++) {
for( j = 0; j < N; j++){
erl1 += fabs( in1[i*N + j][0] - out2[i*N + j][0]/N/N )*dx*dx;
cout<< i <<" "<< j<<" "<< sin(X[i])+sin(Y[j])<<" "<< out2[i*N+j][0]/N/N <<" "<< endl; // > output
}
}
cout<< erl1 << endl ; // L1 error
fftw_destroy_plan(p1);
fftw_destroy_plan(p2);
fftw_free(out1);
fftw_free(out2);
fftw_free(in1);
fftw_free(in2);
return 0;
}
I can't find any (more) mistakes in my code (i installed the fftw3 library last week) and i don't see a problem with the maths either but i don't think it's the fft's fault. Hence my predicament. I am all out of ideas and all out of google as well.
Any help solving this puzzle would be greatly appreciated.
note :
compiling : g++ test.cpp -lfftw3 -lm
executing : ./a.out > output
and i use gnuplot in order to plot the curves :
(in gnuplot ) splot "output" u 1:2:4 ( for the computed solution )
Here are some little points to be modified :
You need to account for all small frequencies, including the negative ones ! Index i corresponds to the frequency 2PI i/N but also to the frequency 2PI (i-N)/N. In the Fourier space, the end of the array matters as much as the beginning ! In our case, we keep the smallest frequency : it's 2PI i/N for the first half of the array, and 2PI(i-N)/N on the second half.
Of course, as Paul said, N-1 should be Nin double dx = L/(N - 1); => double dx = L/(N ); N-1 does not correspond to a continious periodic signal. It woud be hard to use it as a test case...
Scaling...I did it empirically
The result i obtain is closer to the expected one, for both cases. Here is the code :
/*
* fftw test -- double precision
*/
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <fftw3.h>
using namespace std;
int main()
{
int N = 128;
int i, j ;
double pi = 3.14159265359;
double *X, *Y ;
X = (double*) malloc(N*sizeof(double));
Y = (double*) malloc(N*sizeof(double));
fftw_complex *out1, *in2, *out2, *in1;
fftw_plan p1, p2;
double L = 2.*pi;
double dx = L/(N );
in1 = (fftw_complex*) fftw_malloc(sizeof(fftw_complex)*(N*N) );
out2 = (fftw_complex*) fftw_malloc(sizeof(fftw_complex)*(N*N) );
out1 = (fftw_complex*) fftw_malloc(sizeof(fftw_complex)*(N*N) );
in2 = (fftw_complex*) fftw_malloc(sizeof(fftw_complex)*(N*N) );
p1 = fftw_plan_dft_2d(N, N, in1, out1, FFTW_FORWARD,FFTW_MEASURE );
p2 = fftw_plan_dft_2d(N, N, in2, out2, FFTW_BACKWARD,FFTW_MEASURE);
for(i = 0; i < N; i++){
X[i] = -pi + i*dx ;
for(j = 0; j < N; j++){
Y[j] = -pi + j*dx ;
in1[i*N + j][0] = sin(X[i]) + sin(Y[j]) ; // row major ordering
// in1[i*N + j][0] = sin(X[i]) * sin(Y[j]) ; // 2nd test case
in1[i*N + j][1] = 0 ;
}
}
fftw_execute(p1); // FFT forward
for ( i = 0; i < N; i++){ // f = g / ( kx² + ky² )
for( j = 0; j < N; j++){
double fact=0;
in2[i*N + j][0]=0;
in2[i*N + j][1]=0;
if(2*i<N){
fact=((double)i*i);
}else{
fact=((double)(N-i)*(N-i));
}
if(2*j<N){
fact+=((double)j*j);
}else{
fact+=((double)(N-j)*(N-j));
}
if(fact!=0){
in2[i*N + j][0] = out1[i*N + j][0]/fact;
in2[i*N + j][1] = out1[i*N + j][1]/fact;
}else{
in2[i*N + j][0] = 0;
in2[i*N + j][1] = 0;
}
//in2[i*N + j][0] = out1[i*N + j][0];
//in2[i*N + j][1] = out1[i*N + j][1];
// in2[i*N + j][0] = out1[i*N + j][0]*(1.0/(i*i+1e-16)+1.0/(j*j+1e-16)+1.0/((N-i)*(N-i)+1e-16)+1.0/((N-j)*(N-j)+1e-16))*N*N;
// in2[i*N + j][1] = out1[i*N + j][1]*(1.0/(i*i+1e-16)+1.0/(j*j+1e-16)+1.0/((N-i)*(N-i)+1e-16)+1.0/((N-j)*(N-j)+1e-16))*N*N;
//in2[i*N + j][0] = 2*out1[i*N + j][0]/ (i*i+j*j+1e-16); // 2nd test case
//in2[i*N + j][1] = 2*out1[i*N + j][1]/ (i*i+j*j+1e-16);
}
}
fftw_execute(p2); //FFT backward
// checking the results computed
double erl1 = 0.;
for ( i = 0; i < N; i++) {
for( j = 0; j < N; j++){
erl1 += fabs( in1[i*N + j][0] - out2[i*N + j][0]/(N*N))*dx*dx;
cout<< i <<" "<< j<<" "<< sin(X[i])+sin(Y[j])<<" "<< out2[i*N+j][0]/(N*N) <<" "<< endl; // > output
// cout<< i <<" "<< j<<" "<< sin(X[i])*sin(Y[j])<<" "<< out2[i*N+j][0]/(N*N) <<" "<< endl; // > output
}
}
cout<< erl1 << endl ; // L1 error
fftw_destroy_plan(p1);
fftw_destroy_plan(p2);
fftw_free(out1);
fftw_free(out2);
fftw_free(in1);
fftw_free(in2);
return 0;
}
This code is far from being perfect, it is neither optimized nor beautiful. But it gives almost what is expected.
Bye,