I am using a simple 'leapfrog algorithm I am aiming to simulate the orbits of the earth a Jupiter around the sun. I am unable to get them to orbit despite being fairly sure the maths is correct. It appears that gravity is acting too weekly and the planet merely floats away from the sun, interestingly if I adjust the Newtonian acceleration due to gravity term by multiplying it by rad2 I find that the system does indeed produce fairly stable orbits but at much much too large radii.
program physim
Implicit none
integer :: i,j,n,day ! Integer variables
doubleprecision :: G , r(1:3,1:10) , a(1:3, 1:10) , v(1:3, 1:10) , m(1:3), dt, Au, dr(1:3),
rad2(1:3), t, tcount, tend, tout
! constants
day = 86400
tout = 10*day
tend = 20*day
Au = 15e11
n = 3
G = 6.67e-11
!n = 2
dt = 100
!sun
r(1,1) = 0.
r(2,1) = 0.
r(3,1) = 0.
v(1,1) = 0.
v(2,1) = 0.
v(3,1) = 0.
m(1) = 1.9898e30
!earth
r(1,2) = Au
r(2,2) = 0.
r(3,2) = 0.
v(1,2) = 0.
v(2,2) = 30000
v(3,2) = 0.
m(2) = 6e24
!jupiter
r(1,3) = 5.2*Au
r(2,3) = 0.
r(3,3) = 0.
v(1,3) = 0.
v(2,3) = 13070
v(3,3) = 0.
m(3) = 2e27
do
a = 0
tcount = 0
do i = 1, n
do j = 1, n
!calculating acceleration
if (i==j)cycle
dr(1:3) = r(1:3, j) - r(1:3, i)
rad2 = dr(1)**2 + dr(2)**2 + dr(3)**2
a(1:3, i) = a(1:3, i) + G*m(j)*dr(1:3)/(rad2*sqrt(rad2))
end do
end do
do i = 1, n
r(1:3 ,i) = r(1:3, i) + v(1:3, i)*dt
v(1:3, i) = v(1:3, i) + a(1:3, i)*dt
end do
t = t + dt
tcount = t + dt
if(tcount>tout) then
!write(6,*) a(1,2)
!write(6,*) rad2
write(6,*) a(1,1) , a(2,1), a(3, 2)
end if
end do
end program
Your most fundamental problem was that 1 A.U. = 1.5e11 m, not 15e11. Then you were doing stuff like resetting tcount every trip through the loop. Set it before the start of the main loop and then only reset when you print out a line of output. It should be updated as tcount=tcount+dt and then you probably want to print out r(1,2) , r(2,2), r(1,3) , r(2,3) so you can plot the positions of jupiter and earth. Also you should maybe go for more time so you can see a few full orbits of earth, and finally put a test at the bottom of the loop so it will exit when t>tend. Making these changes I got output that looked like this:
The code snippet below is running slower than expected. The authors of this paper http://www.cvlibs.net/publications/Geiger2010ACCV.pdf compute support_points of a 900x700 image in 118 ms. I have implemented their algorithm below in Halide.
In my algorithm, the nested for loops over length and width iterate over xi and yi, which are points in output_x and output_y (defined previously but not shown below). Over each iteration of the nested for loops, a vector top_k is computed and pushed_back into support_points.
Computing this pipeline even for left_buffer.width() == 20 and left_buffer.height() == 20 takes 500 ms. Thus this implementation is several orders of magnitude slower:
...
int k = 4; // # of support points
vector<pair<Expr, Expr>> support_points(k * left_buffer.width() * left_buffer.height());
// Calculate support pixel for each
Func support("support");
support(x, y) = Tuple(i32(0), i32(0), f32(0));
for (int yi = 0; yi < left_buffer.height(); yi++) {
for (int xi = 0; xi < left_buffer.width() - 2; xi++) {
bool left = xi < left_buffer.width() / 4;
bool center = (xi >= left_buffer.width() / 4 && xi < left_buffer.width() * 3 / 4);
bool right = xi >= left_buffer.width() * 3 / 4;
vector <pair<Expr, Expr>> scan_range;
pair <Expr, Expr> scan_height(0, (Expr) left_buffer.height());
pair <Expr, Expr> scan_width;
int which_pred = 0;
if (left) {
scan_width = make_pair((Expr) 0, (Expr) left_buffer.width() / 2);
which_pred = 0;
}
else if (center) {
scan_width = make_pair((Expr) xi - left_buffer.width() / 4, (Expr) left_buffer.width() / 2);
which_pred = 1;
}
else if (right) {
scan_width = make_pair((Expr) left_buffer.width() / 2, (Expr) left_buffer.width() / 2);
which_pred = 2;
}
else {
cout<<"Error"<<endl;
}
scan_range = {scan_width, scan_height};
// cout<<"xi "<<xi<<endl;
// cout<<"yi "<<yi<<endl;
// cout<<"scan_width= "<<scan_width.first<<" "<<scan_width.second<<endl;
// cout<<"scan_height= "<<scan_height.first<<" "<<scan_height.second<<endl;
RDom scanner(scan_range);
Expr predicate[3] = {scanner.x != xi && scanner.y != yi, scanner.x != 0 && scanner.y != 0, scanner.x != xi && scanner.y != yi};
scanner.where(predicate[which_pred]);
std::vector<Expr> top_k(k * 3);
for (int i = 0; i < k; i++) { // say we want top 4 support points.
top_k[3*i] = 10000.0f;
top_k[3*i+1] = 0;
top_k[3*i+2] = 0;
}
Func argmin("argmin");
argmin() = Tuple(top_k);
Expr next_val = abs(output_x(xi, yi) - output_x(scanner.x, scanner.y)) + abs(output_y(xi, yi) - output_y(scanner.x, scanner.y));
Expr next_x = scanner.x;
Expr next_y = scanner.y;
top_k = Tuple(argmin()).as_vector();
// Insert a single element into a sorted list without actually branching
top_k.push_back(next_val);
top_k.push_back(next_x);
top_k.push_back(next_y);
for (int i = k; i > 0; i--) {
Expr prev_val = top_k[(i-1)*3];
Expr prev_x = top_k[(i-1)*3 + 1];
Expr prev_y = top_k[(i-1)*3 + 2];
Expr should_swap = top_k[i*3] < prev_val;
top_k[(i-1)*3] = select(should_swap, top_k[i*3], prev_val);
top_k[(i-1)*3 + 1] = select(should_swap, top_k[i*3 + 1], prev_x);
top_k[(i-1)*3 + 2] = select(should_swap, top_k[i*3 + 2], prev_y);
top_k[i*3] = select(should_swap, prev_val, top_k[i*3]);
top_k[i*3 + 1] = select(should_swap, prev_x, top_k[i*3 + 1]);
top_k[i*3 + 2] = select(should_swap, prev_y, top_k[i*3 + 2]);
}
// Discard the k+1th element
top_k.pop_back(); top_k.pop_back(); top_k.pop_back();
bool cond = xi == 10 && yi == 10;
cout << xi << " "<< yi << " " << cond << endl;
Expr e = argmin()[0];
e = print_when(cond, e, "<- argmin() val");
argmin() = Tuple(top_k);
argmin.compute_root();
// argmin.trace_stores();
argmin.compile_to_lowered_stmt("argmin.html", {}, HTML);
Realization real = argmin.realize();
for (int i = 0; i < k; i++) {
pair<Expr, Expr> c(top_k[3*i+1], top_k[3*i+2]);
support_points.push_back(c);
}
}
}
double t2 = current_time();
cout<<(t2-t1)/100<<" ms"<<endl;
cout<<"executed"<<endl;
}
How can I improve efficiency?
It looks like you may be getting a bit confused between the stages of your program. With Halide, your C++ code that works with Exprs, Funcs, etc. is not actually evaluating anything, it is constructing a Halide program, which you can then compile and run. That means that the C++ for loops, std::vectors, etc. that you're using are all happening at program construction time (essentially compile time) of the Halide program. You might think of it like C++ templates, which evaluate at compile time, vs. the C++ code they construct, which evaluate at the run time of your program: the C++ code you're writing here is equivalent to template code with respect to the Halide program that you are building.
This gets a bit more confusing with the ability to JIT-compile and evaluate a Halide program inside of the same C++ program that builds it (realize).
As it is, I suspect the above program doesn't actually compute the results you expect it to. After the double for loop, what are you hoping to do with support_points? What you have built there is a big array of expressions (pieces of code), not concrete values. And you are JIT-compiling and running a new piece of Halide code each time around those loops (i.e., for every pixel).
I think you may have an easier time understanding what you are building if you stick to ahead-of-time compilation (compile_to_file or generators) for now. That makes the two stages—Halide code generation time, and the runtime of that code inside a separate program—very distinct.
I want to plot Det curve and roc curve Why polyxpoly does not work?
I plotted a DET curve based on the following steps: First, I changed the threshold and count the number of false rejections and false acceptances. Second, I use plot MATLAB function to draw FAR and FRR.
function [TPR,FPR] = DETCurve(G,I)
#load('G.dat');
#load('I.dat');
#load data from the column 4 fscore
i0=find(Fscore(:,end)==0);
i1=find(Fscore(:,end)==1);
D0=Fscore(i0,end-1);
D1=Fscore(i1,end-1);
% Creates a matrix
TPR = zeros(1, 1000);
FPR = zeros(1, 1000);
#number of positive responses and negative responses in ground truth
P = length(i1);
N = length(i0);
index = 0;
% Assume the threshold as 0.01
for threshold = 0:0.001:1
TP = 0;
FP = 0;
%Provides the D1 count
for i = 1:length(i1)
if (D1(i) >= threshold) TP = TP + 1;
end
end
% Provides the D0count
for i1 = length(i0)
if(D0(i1) >= threshold)
FP = FP + 1;
end
end
index = index + 1;
% Calculating true positive rate
TPR(index) = TP/P;
% Calculating false positive rate
FPR(index) = FP/N;
end
% Calculating false negative rate(FNR) using TPR+FNR=1
FNR = (1-TPR);
x = 0:0.1:1;
y = x;
#[x(i),y(i)] = polyxpoly(x,y,FPR,FNR);
fprintf('EER(X): %d n', x(i));
fprintf('EER(Y): %d n', y(i));
plot(FPR,FNR,'LineWidth',2, 'color','g');
hold on;
plot(x,y,x,1-y, 'color','r');
plot (x(i),y(i),'X','MarkerSize',10, 'LineWidth', 2,'Color','b');
hold off;
title('DET CURVE');
xlabel('False Positive Rate (FPR) ');
ylabel('False Neagtive Rate (FNR) ');
end
I did use the findcontours() method to extract contour from the image, but I have no idea how to calculate the curvature from a set of contour points. Can somebody help me? Thank you very much!
While the theory behind Gombat's answer is correct, there are some errors in the code as well as in the formulae (the denominator t+n-x should be t+n-t). I have made several changes:
use symmetric derivatives to get more precise locations of curvature maxima
allow to use a step size for derivative calculation (can be used to reduce noise from noisy contours)
works with closed contours
Fixes:
* return infinity as curvature if denominator is 0 (not 0)
* added square calculation in denominator
* correct checking for 0 divisor
std::vector<double> getCurvature(std::vector<cv::Point> const& vecContourPoints, int step)
{
std::vector< double > vecCurvature( vecContourPoints.size() );
if (vecContourPoints.size() < step)
return vecCurvature;
auto frontToBack = vecContourPoints.front() - vecContourPoints.back();
std::cout << CONTENT_OF(frontToBack) << std::endl;
bool isClosed = ((int)std::max(std::abs(frontToBack.x), std::abs(frontToBack.y))) <= 1;
cv::Point2f pplus, pminus;
cv::Point2f f1stDerivative, f2ndDerivative;
for (int i = 0; i < vecContourPoints.size(); i++ )
{
const cv::Point2f& pos = vecContourPoints[i];
int maxStep = step;
if (!isClosed)
{
maxStep = std::min(std::min(step, i), (int)vecContourPoints.size()-1-i);
if (maxStep == 0)
{
vecCurvature[i] = std::numeric_limits<double>::infinity();
continue;
}
}
int iminus = i-maxStep;
int iplus = i+maxStep;
pminus = vecContourPoints[iminus < 0 ? iminus + vecContourPoints.size() : iminus];
pplus = vecContourPoints[iplus > vecContourPoints.size() ? iplus - vecContourPoints.size() : iplus];
f1stDerivative.x = (pplus.x - pminus.x) / (iplus-iminus);
f1stDerivative.y = (pplus.y - pminus.y) / (iplus-iminus);
f2ndDerivative.x = (pplus.x - 2*pos.x + pminus.x) / ((iplus-iminus)/2*(iplus-iminus)/2);
f2ndDerivative.y = (pplus.y - 2*pos.y + pminus.y) / ((iplus-iminus)/2*(iplus-iminus)/2);
double curvature2D;
double divisor = f1stDerivative.x*f1stDerivative.x + f1stDerivative.y*f1stDerivative.y;
if ( std::abs(divisor) > 10e-8 )
{
curvature2D = std::abs(f2ndDerivative.y*f1stDerivative.x - f2ndDerivative.x*f1stDerivative.y) /
pow(divisor, 3.0/2.0 ) ;
}
else
{
curvature2D = std::numeric_limits<double>::infinity();
}
vecCurvature[i] = curvature2D;
}
return vecCurvature;
}
For me curvature is:
where t is the position inside the contour and x(t) resp. y(t) return the related x resp. y value. See here.
So, according to my definition of curvature, one can implement it this way:
std::vector< float > vecCurvature( vecContourPoints.size() );
cv::Point2f posOld, posOlder;
cv::Point2f f1stDerivative, f2ndDerivative;
for (size_t i = 0; i < vecContourPoints.size(); i++ )
{
const cv::Point2f& pos = vecContourPoints[i];
if ( i == 0 ){ posOld = posOlder = pos; }
f1stDerivative.x = pos.x - posOld.x;
f1stDerivative.y = pos.y - posOld.y;
f2ndDerivative.x = - pos.x + 2.0f * posOld.x - posOlder.x;
f2ndDerivative.y = - pos.y + 2.0f * posOld.y - posOlder.y;
float curvature2D = 0.0f;
if ( std::abs(f2ndDerivative.x) > 10e-4 && std::abs(f2ndDerivative.y) > 10e-4 )
{
curvature2D = sqrt( std::abs(
pow( f2ndDerivative.y*f1stDerivative.x - f2ndDerivative.x*f1stDerivative.y, 2.0f ) /
pow( f2ndDerivative.x + f2ndDerivative.y, 3.0 ) ) );
}
vecCurvature[i] = curvature2D;
posOlder = posOld;
posOld = pos;
}
It works on non-closed pointlists as well. For closed contours, you may would like to change the boundary behavior (for the first iterations).
UPDATE:
Explanation for the derivatives:
A derivative for a continuous 1 dimensional function f(t) is:
But we are in a discrete space and have two discrete functions f_x(t) and f_y(t) where the smallest step for t is one.
The second derivative is the derivative of the first derivative:
Using the approximation of the first derivative, it yields to:
There are other approximations for the derivatives, if you google it, you will find a lot.
Here's a python implementation mainly based on Philipp's C++ code. For those interested, more details on the derivation can be found in Chapter 10.4.2 of:
Klette & Rosenfeld, 2004: Digital Geometry
def getCurvature(contour,stride=1):
curvature=[]
assert stride<len(contour),"stride must be shorther than length of contour"
for i in range(len(contour)):
before=i-stride+len(contour) if i-stride<0 else i-stride
after=i+stride-len(contour) if i+stride>=len(contour) else i+stride
f1x,f1y=(contour[after]-contour[before])/stride
f2x,f2y=(contour[after]-2*contour[i]+contour[before])/stride**2
denominator=(f1x**2+f1y**2)**3+1e-11
curvature_at_i=np.sqrt(4*(f2y*f1x-f2x*f1y)**2/denominator) if denominator > 1e-12 else -1
curvature.append(curvature_at_i)
return curvature
EDIT:
you can use convexityDefects from openCV, here's a link
a code example to find fingers based in their contour (variable res) source
def calculateFingers(res,drawing): # -> finished bool, cnt: finger count
# convexity defect
hull = cv2.convexHull(res, returnPoints=False)
if len(hull) > 3:
defects = cv2.convexityDefects(res, hull)
if type(defects) != type(None): # avoid crashing. (BUG not found)
cnt = 0
for i in range(defects.shape[0]): # calculate the angle
s, e, f, d = defects[i][0]
start = tuple(res[s][0])
end = tuple(res[e][0])
far = tuple(res[f][0])
a = math.sqrt((end[0] - start[0]) ** 2 + (end[1] - start[1]) ** 2)
b = math.sqrt((far[0] - start[0]) ** 2 + (far[1] - start[1]) ** 2)
c = math.sqrt((end[0] - far[0]) ** 2 + (end[1] - far[1]) ** 2)
angle = math.acos((b ** 2 + c ** 2 - a ** 2) / (2 * b * c)) # cosine theorem
if angle <= math.pi / 2: # angle less than 90 degree, treat as fingers
cnt += 1
cv2.circle(drawing, far, 8, [211, 84, 0], -1)
return True, cnt
return False, 0
in my case, i used about the same function to estimate the bending of board while extracting the contour
OLD COMMENT:
i am currently working in about the same, great information in this post, i'll come back with a solution when i'll have it ready
from Jonasson's answer, Shouldn't be here a tuple on the right side too?, i believe it won't unpack:
f1x,f1y=(contour[after]-contour[before])/stride
f2x,f2y=(contour[after]-2*contour[i]+contour[before])/stride**2
I am trying to do a project in sound processing and need to put the frequencies into another domain. Now, I have tried to implement an FFT, that didn't go well. I tried to understand the z-transform, that didn't go to well either. I read up and found DFT's a lot more simple to understand, especially the algorithm. So I coded the algorithm using examples but I do not know or think the output is right. (I don't have Matlab on here, and cannot find any resources to test it) and wondered if you guys knew if I was going in the right direction. Here is my code so far:
#include <iostream>
#include <complex>
#include <vector>
using namespace std;
const double PI = 3.141592;
vector< complex<double> > DFT(vector< complex<double> >& theData)
{
// Define the Size of the read in vector
const int S = theData.size();
// Initalise new vector with size of S
vector< complex<double> > out(S, 0);
for(unsigned i=0; (i < S); i++)
{
out[i] = complex<double>(0.0, 0.0);
for(unsigned j=0; (j < S); j++)
{
out[i] += theData[j] * polar<double>(1.0, - 2 * PI * i * j / S);
}
}
return out;
}
int main(int argc, char *argv[]) {
vector< complex<double> > numbers;
numbers.push_back(102023);
numbers.push_back(102023);
numbers.push_back(102023);
numbers.push_back(102023);
vector< complex<double> > testing = DFT(numbers);
for(unsigned i=0; (i < testing.size()); i++)
{
cout << testing[i] << endl;
}
}
The inputs are:
102023 102023
102023 102023
And the result:
(408092, 0)
(-0.0666812, -0.0666812)
(1.30764e-07, -0.133362)
(0.200044, -0.200043)
Any help or advice would be great, I'm not expecting a lot, but, anything would be great. Thank you :)
#Phorce is right here. I don't think there is any reson to reinvent the wheel. However, if you want to do this so that you understand the methodology and to have the joy of coding it yourself I can provide a FORTRAN FFT code that I developed some years ago. Of course this is not C++ and will require a translation; this should not be too difficult and should enable you to learn a lot in doing so...
Below is a Radix 4 based algorithm; this radix-4 FFT recursively partitions a DFT into four quarter-length DFTs of groups of every fourth time sample. The outputs of these shorter FFTs are reused to compute many outputs, thus greatly reducing the total computational cost. The radix-4 decimation-in-frequency FFT groups every fourth output sample into shorter-length DFTs to save computations. The radix-4 FFTs require only 75% as many complex multiplies as the radix-2 FFTs. See here for more information.
!+ FILE: RADIX4.FOR
! ===================================================================
! Discription: Radix 4 is a descreet complex Fourier transform algorithim. It
! is to be supplied with two real arrays, one for real parts of function
! one for imaginary parts: It can also unscramble transformed arrays.
! Usage: calling FASTF(XREAL,XIMAG,ISIZE,ITYPE,IFAULT); we supply the
! following:
!
! XREAL - array containing real parts of transform sequence
! XIMAG - array containing imagianry parts of transformation sequence
! ISIZE - size of transform (ISIZE = 4*2*M)
! ITYPE - +1 forward transform
! -1 reverse transform
! IFAULT - 1 if error
! - 0 otherwise
! ===================================================================
!
! Forward transform computes:
! X(k) = sum_{j=0}^{isize-1} x(j)*exp(-2ijk*pi/isize)
! Backward computes:
! x(j) = (1/isize) sum_{k=0}^{isize-1} X(k)*exp(ijk*pi/isize)
!
! Forward followed by backwards will result in the origonal sequence!
!
! ===================================================================
SUBROUTINE FASTF(XREAL,XIMAG,ISIZE,ITYPE,IFAULT)
REAL*8 XREAL(*),XIMAG(*)
INTEGER MAX2,II,IPOW
PARAMETER (MAX2 = 20)
! Check for valid transform size upto 2**(max2):
IFAULT = 1
IF(ISIZE.LT.4) THEN
print*,'FFT: Error: Data array < 4 - Too small!'
return
ENDIF
II = 4
IPOW = 2
! Prepare mod 2:
1 IF((II-ISIZE).NE.0) THEN
II = II*2
IPOW = IPOW + 1
IF(IPOW.GT.MAX2) THEN
print*,'FFT: Error: FFT1!'
return
ENDIF
GOTO 1
ENDIF
! Check for correct type:
IF(IABS(ITYPE).NE.1) THEN
print*,'FFT: Error: Wrong type of transformation!'
return
ENDIF
! No entry errors - continue:
IFAULT = 0
! call FASTG to preform transformation:
CALL FASTG(XREAL,XIMAG,ISIZE,ITYPE)
! Due to Radix 4 factorisation results are not in the same order
! after transformation as they were when the data was submitted:
! We now call SCRAM, to unscramble the reults:
CALL SCRAM(XREAL,XIMAG,ISIZE,IPOW)
return
END
!-END: RADIX4.FOR
! ===============================================================
! Discription: This is the radix 4 complex descreet fast Fourier
! transform with out unscrabling. Suitable for convolutions or other
! applications that do not require unscrambling. Designed for use
! with FASTF.FOR.
!
SUBROUTINE FASTG(XREAL,XIMAG,N,ITYPE)
INTEGER N,IFACA,IFCAB,LITLA
INTEGER I0,I1,I2,I3
REAL*8 XREAL(*),XIMAG(*),BCOS,BSIN,CW1,CW2,PI
REAL*8 SW1,SW2,SW3,TEMPR,X1,X2,X3,XS0,XS1,XS2,XS3
REAL*8 Y1,Y2,Y3,YS0,YS1,YS2,YS3,Z,ZATAN,ZFLOAT,ZSIN
ZATAN(Z) = ATAN(Z)
ZFLOAT(K) = FLOAT(K) ! Real equivalent of K.
ZSIN(Z) = SIN(Z)
PI = (4.0)*ZATAN(1.0)
IFACA = N/4
! Forward transform:
IF(ITYPE.GT.0) THEN
GOTO 5
ENDIF
! If this is for an inverse transform - conjugate the data:
DO 4, K = 1,N
XIMAG(K) = -XIMAG(K)
4 CONTINUE
5 IFCAB = IFACA*4
! Proform appropriate transformations:
Z = PI/ZFLOAT(IFCAB)
BCOS = -2.0*ZSIN(Z)**2
BSIN = ZSIN(2.0*Z)
CW1 = 1.0
SW1 = 0.0
! This is the main body of radix 4 calculations:
DO 10, LITLA = 1,IFACA
DO 8, I0 = LITLA,N,IFCAB
I1 = I0 + IFACA
I2 = I1 + IFACA
I3 = I2 + IFACA
XS0 = XREAL(I0) + XREAL(I2)
XS1 = XREAL(I0) - XREAL(I2)
YS0 = XIMAG(I0) + XIMAG(I2)
YS1 = XIMAG(I0) - XIMAG(I2)
XS2 = XREAL(I1) + XREAL(I3)
XS3 = XREAL(I1) - XREAL(I3)
YS2 = XIMAG(I1) + XIMAG(I3)
YS3 = XIMAG(I1) - XIMAG(I3)
XREAL(I0) = XS0 + XS2
XIMAG(I0) = YS0 + YS2
X1 = XS1 + YS3
Y1 = YS1 - XS3
X2 = XS0 - XS2
Y2 = YS0 - YS2
X3 = XS1 - YS3
Y3 = YS1 + XS3
IF(LITLA.GT.1) THEN
GOTO 7
ENDIF
XREAL(I2) = X1
XIMAG(I2) = Y1
XREAL(I1) = X2
XIMAG(I1) = Y2
XREAL(I3) = X3
XIMAG(I3) = Y3
GOTO 8
! Now IF required - we multiply by twiddle factors:
7 XREAL(I2) = X1*CW1 + Y1*SW1
XIMAG(I2) = Y1*CW1 - X1*SW1
XREAL(I1) = X2*CW2 + Y2*SW2
XIMAG(I1) = Y2*CW2 - X2*SW2
XREAL(I3) = X3*CW3 + Y3*SW3
XIMAG(I3) = Y3*CW3 - X3*SW3
8 CONTINUE
IF(LITLA.EQ.IFACA) THEN
GOTO 10
ENDIF
! Calculate a new set of twiddle factors:
Z = CW1*BCOS - SW1*BSIN + CW1
SW1 = BCOS*SW1 + BSIN*CW1 + SW1
TEMPR = 1.5 - 0.5*(Z*Z + SW1*SW1)
CW1 = Z*TEMPR
SW1 = SW1*TEMPR
CW2 = CW1*CW1 - SW1*SW1
SW2 = 2.0*CW1*SW1
CW3 = CW1*CW2 - SW1*SW2
SW3 = CW1*SW2 + CW2*SW1
10 CONTINUE
IF(IFACA.LE.1) THEN
GOTO 14
ENDIF
! Set up tranform split for next stage:
IFACA = IFACA/4
IF(IFACA.GT.0) THEN
GOTO 5
ENDIF
! This is the calculation of a radix two-stage:
DO 13, K = 1,N,2
TEMPR = XREAL(K) + XREAL(K + 1)
XREAL(K + 1) = XREAL(K) - XREAL(K + 1)
XREAL(K) = TEMPR
TEMPR = XIMAG(K) + XIMAG(K + 1)
XIMAG(K + 1) = XIMAG(K) - XIMAG(K + 1)
XIMAG(K) = TEMPR
13 CONTINUE
14 IF(ITYPE.GT.0) THEN
GOTO 17
ENDIF
! For the inverse case, cojugate and scale the transform:
Z = 1.0/ZFLOAT(N)
DO 16, K = 1,N
XIMAG(K) = -XIMAG(K)*Z
XREAL(K) = XREAL(K)*Z
16 CONTINUE
17 return
END
! ----------------------------------------------------------
!-END of subroutine FASTG.FOR.
! ----------------------------------------------------------
!+ FILE: SCRAM.FOR
! ==========================================================
! Discription: Subroutine for unscrambiling FFT data:
! ==========================================================
SUBROUTINE SCRAM(XREAL,XIMAG,N,IPOW)
INTEGER L(19),II,J1,J2,J3,J4,J5,J6,J7,J8,J9,J10,J11,J12
INTEGER J13,J14,J15,J16,J17,J18,J19,J20,ITOP,I
REAL*8 XREAL(*),XIMAG(*),TEMPR
EQUIVALENCE (L1,L(1)),(L2,L(2)),(L3,L(3)),(L4,L(4))
EQUIVALENCE (L5,L(5)),(L6,L(6)),(L7,L(7)),(L8,L(8))
EQUIVALENCE (L9,L(9)),(L10,L(10)),(L11,L(11)),(L12,L(12))
EQUIVALENCE (L13,L(13)),(L14,L(14)),(L15,L(15)),(L16,L(16))
EQUIVALENCE (L17,L(17)),(L18,L(18)),(L19,L(19))
II = 1
ITOP = 2**(IPOW - 1)
I = 20 - IPOW
DO 5, K = 1,I
L(K) = II
5 CONTINUE
L0 = II
I = I + 1
DO 6, K = I,19
II = II*2
L(K) = II
6 CONTINUE
II = 0
DO 9, J1 = 1,L1,L0
DO 9, J2 = J1,L2,L1
DO 9, J3 = J2,L3,L2
DO 9, J4 = J3,L4,L3
DO 9, J5 = J4,L5,L4
DO 9, J6 = J5,L6,L5
DO 9, J7 = J6,L7,L6
DO 9, J8 = J7,L8,L7
DO 9, J9 = J8,L9,L8
DO 9, J10 = J9,L10,L9
DO 9, J11 = J10,L11,L10
DO 9, J12 = J11,L12,L11
DO 9, J13 = J12,L13,L12
DO 9, J14 = J13,L14,L13
DO 9, J15 = J14,L15,L14
DO 9, J16 = J15,L16,L15
DO 9, J17 = J16,L17,L16
DO 9, J18 = J17,L18,L17
DO 9, J19 = J18,L19,L18
J20 = J19
DO 9, I = 1,2
II = II +1
IF(II.GE.J20) THEN
GOTO 8
ENDIF
! J20 is the bit reverse of II!
! Pairwise exchange:
TEMPR = XREAL(II)
XREAL(II) = XREAL(J20)
XREAL(J20) = TEMPR
TEMPR = XIMAG(II)
XIMAG(II) = XIMAG(J20)
XIMAG(J20) = TEMPR
8 J20 = J20 + ITOP
9 CONTINUE
return
END
! -------------------------------------------------------------------
!-END:
! -------------------------------------------------------------------
Going through this and understanding it will take time! I wrote this using a CalTech paper I found years ago, I cannot recall the reference I am afraid. Good luck.
I hope this helps.
Your code works.
I would give more digits for PI ( 3.1415926535898 ).
Also, you have to devide the output of the DFT summation by S, the DFT size.
Since the input series in your test is constant, the DFT output should have only one non-zero coefficient.
And indeed all the output coefficients are very small relative to the first one.
But for a large input length, this is not an efficient way of implementing the DFT.
If timing is a concern, look into the Fast Fourrier Transform for faster methods to calculate the DFT.
Your code looks right to me. I'm not sure what you were expecting for output but, given that your input is a constant value, the DFT of a constant is a DC term in bin 0 and zeroes in the remaining bins (or a close equivalent, which you have).
You might try testing you code with a longer sequence containing some type of waveform like a sine wave or a square wave. In general, however, you should consider using something like fftw in production code. Its been wrung out and highly optimized by many people for a long time. FFTs are optimized DFTs for special cases (e.g., lengths that are powers of 2).
Your code looks okey. out[0] should represent the "DC" component of your input waveform. In your case, it is 4 times bigger than the input waveform, because your normalization coefficient is 1.
The other coefficients should represent the amplitude and phase of your input waveform. The coefficients are mirrored, i.e., out[i] == out[N-i]. You can test this with the following code:
double frequency = 1; /* use other values like 2, 3, 4 etc. */
for (int i = 0; i < 16; i++)
numbers.push_back(sin((double)i / 16 * frequency * 2 * PI));
For frequency = 1, this gives:
(6.53592e-07,0)
(6.53592e-07,-8)
(6.53592e-07,1.75661e-07)
(6.53591e-07,2.70728e-07)
(6.5359e-07,3.75466e-07)
(6.5359e-07,4.95006e-07)
(6.53588e-07,6.36767e-07)
(6.53587e-07,8.12183e-07)
(6.53584e-07,1.04006e-06)
(6.53581e-07,1.35364e-06)
(6.53576e-07,1.81691e-06)
(6.53568e-07,2.56792e-06)
(6.53553e-07,3.95615e-06)
(6.53519e-07,7.1238e-06)
(6.53402e-07,1.82855e-05)
(-8.30058e-05,7.99999)
which seems correct to me: negligible DC, amplitude 8 for 1st harmonics, negligible amplitudes for other harmonics.
MoonKnight has already provided a radix-4 Decimation In Frequency Cooley-Tukey scheme in Fortran. I'm below providing a radix-2 Decimation In Frequency Cooley-Tukey scheme in Matlab.
The code is an iterative one and considers the scheme in the following figure:
A recursive approach is also possible.
As you will see, the implementation calculates also the number of performed multiplications and additions and compares it with the theoretical calculations reported in How many FLOPS for FFT?.
The code is obviously much slower than the highly optimized FFTW exploited by Matlab.
Note also that the twiddle factors omegaa^((2^(p - 1) * n)) can be calculated off-line and then restored from a lookup table, but this point is skipped in the code below.
For a Matlab implementation of an iterative radix-2 Decimation In Time Cooley-Tukey scheme, please see Implementing a Fast Fourier Transform for Option Pricing.
% --- Radix-2 Decimation In Frequency - Iterative approach
clear all
close all
clc
N = 32;
x = randn(1, N);
xoriginal = x;
xhat = zeros(1, N);
numStages = log2(N);
omegaa = exp(-1i * 2 * pi / N);
mulCount = 0;
sumCount = 0;
tic
M = N / 2;
for p = 1 : numStages;
for index = 0 : (N / (2^(p - 1))) : (N - 1);
for n = 0 : M - 1;
a = x(n + index + 1) + x(n + index + M + 1);
b = (x(n + index + 1) - x(n + index + M + 1)) .* omegaa^((2^(p - 1) * n));
x(n + 1 + index) = a;
x(n + M + 1 + index) = b;
mulCount = mulCount + 4;
sumCount = sumCount + 6;
end;
end;
M = M / 2;
end
xhat = bitrevorder(x);
timeCooleyTukey = toc;
tic
xhatcheck = fft(xoriginal);
timeFFTW = toc;
rms = 100 * sqrt(sum(sum(abs(xhat - xhatcheck).^2)) / sum(sum(abs(xhat).^2)));
fprintf('Time Cooley-Tukey = %f; \t Time FFTW = %f\n\n', timeCooleyTukey, timeFFTW);
fprintf('Theoretical multiplications count \t = %i; \t Actual multiplications count \t = %i\n', ...
2 * N * log2(N), mulCount);
fprintf('Theoretical additions count \t\t = %i; \t Actual additions count \t\t = %i\n\n', ...
3 * N * log2(N), sumCount);
fprintf('Root mean square with FFTW implementation = %.10e\n', rms);
Your code is correct to obtain the DFT.
The function you are testing is (sin ((double) i / points * frequency * 2) which corresponds to a synoid of amplitude 1, frequency 1 and sampling frequency Fs = number of points taken.
Operating with the obtained data we have:
As you can see, the DFT coefficients are symmetric with respect to the position coefficient N / 2, so only the first N / 2 provide information. The amplitude obtained by means of the module of the real and imaginary part must be divided by N and multiplied by 2 to reconstruct it. The frequencies of the coefficients will be multiples of Fs / N by the coefficient number.
If we introduce two sinusoids, one of frequency 2 and amplitude 1.3 and another of frequency 3 and amplitude 1.7.
for (int i = 0; i < 16; i++)
{
numbers.push_back(1.3 *sin((double)i / 16 * frequency1 * 2 * PI)+ 1.7 *
sin((double)i / 16 * frequency2 * 2 * PI));
}
The obtained data are:
Good luck.