Wrong result when running code in parallel - fortran

The gfortran compiler gives wrong answer, when I run a parallel program using OpenMP. In the same time, ifort gives exact result.
This is the whole compilable code.
!_______________________________________________________________ !
!____________MODULE SECTION_____________________________________ !
MODULE MATRIC
IMPLICIT NONE
INTEGER , PARAMETER :: NG = 40
DOUBLE PRECISION,SAVE :: Z , PA , PB ,CMU
DOUBLE PRECISION , PARAMETER :: PI=2.0D0*ACOS(0.0D0) , &
FPI=4.0D0*PI , SQFPI = SQRT(FPI), DLAM=1.0D0
DOUBLE PRECISION , DIMENSION(450) :: DEL1, DEL2, X, R , SNLO
DOUBLE PRECISION :: XG(60) , WG(60)
END MODULE MATRIC
!_________________________________________________________________________!
! MODULE SECTION
!__________________________________________________________________________!
MODULE POTDATA
IMPLICIT NONE
INTEGER :: IA , IB , ID
DOUBLE PRECISION :: RA , RB , R1s(450)
END MODULE POTDATA
!__________________________________________________________________________!
!______________________________________________________________________!
program check
use matric
use potdata
implicit double precision(a-h,o-z)
pa = 0.72D0 ; pb = 0.19D0
mesh = 441 ; noint= 40 ; z = 2.0d0
CALL GAULEG(-1.d0,1.d0)
NB = MESH/NOINT
I = 1
X(I) = 0.0D+00
DELTAX = 0.0025D+00*40.0D+00/DBLE(NOINT)
DO J=1,NB
IMK = (J-1)*NOINT + 1
DO K=1,NOINT
AK=K
I=I+1
X(I)=X(IMK)+AK*DELTAX
END DO
DELTAX=2.0D+00*DELTAX
END DO
CMU=(9.0D00*PI*PI/(128.0D00*Z))**THIRD
R(1)=0.0D+00 ; SNLO(1) = 0.D00
DO I=2,MESH
R(I)=CMU*X(I)
SNLO(I) = R(I)*dexp(-Z*R(I))
R1S(I) = SNLO(I)/(SQFPI*R(I))
END DO
call EFFPOT(MESH,NOINT)
end program check
subroutine EFFPOT(MESH,NOINT)
USE OMP_LIB
USE MATRIC
USE POTDATA
implicit none
integer, intent(in) :: MESH, NOINT
double precision::anorm(450)
double precision, external :: funct
double precision :: asum, fac, cnorm
!$omp parallel do default(none) private(del1,ia,asum,ib,ra,rb,fac) &
!$omp shared(id,mesh,r,anorm,NOINT,del2,R1s)
do ia = 2,mesh
ra = r(ia)
if(R1s(ia).lt.1.D-7.and.R1s(ia).ge.1.D-8)id = ia
do ib = 2,mesh
rb = r(ib)
call QGAUSS(funct,-1.d0,1.d0,fac)
del1(ib) = rb**2*fac*R1s(ib)**2
end do
CALL NCDF(del1,ASUM,r(2),mesh,NOINT)
anorm(ia) = 2.0d0*pi*asum
del2(ia) = 2.0d0*pi*asum*(ra*R1s(ia))**2
end do
!$omp end parallel do
CALL NCDF(del2,ASUM,r(2),mesh,NOINT)
cnorm = 1.0/dsqrt(4.*pi*ASUM)
write(6,*)'cnorm =',cnorm
return
end
double precision function funct(x)
USE POTDATA , ONLY : RA , RB
USE MATRIC , ONLY : PA , PB , DLAM
implicit none
double precision, intent(in) :: x
double precision :: f1, f2, ramrb
ramrb = dsqrt(ra**2+rb**2-2.d0*ra*rb*x)
f1 = dcosh(pa*ra)+dcosh(pa*rb)
f2 = 1.d0+0.5*dlam*ramrb*dexp(-pb*ramrb)
funct = (f1*f2)**2
return
end
SUBROUTINE QGAUSS(func,aa,bb,ss)
USE OMP_LIB
USE MATRIC , ONLY : XG ,WG , NG
IMPLICIT DOUBLE PRECISION(A-H,O-Z)
external func
xm = 0.5d0*(bb+aa)
xl = 0.5d0*(bb-aa)
ss = 0.d0
do j=1,ng
dx = xl*xg(j)
ss = ss + wg(j)*(func(xm+dx)+func(xm-dx))
end do
ss = xl*ss/2.0
return
END
SUBROUTINE GAULEG(x1,x2)
USE MATRIC , ONLY : XG ,WG ,NG , PI
IMPLICIT DOUBLE PRECISION(A-H,O-Z)
eps = 1.d-14
m = (ng+1)/2
xm = 0.5D0*(x1+x2)
xl = 0.5D0*(x2-x1)
do i=1,m
z = dcos(pi*(dfloat(i)-0.25d0)/(dfloat(ng)+0.5d0))
1 continue
p1 = 1.d0
p2 = 0.d0
do j=1,ng
p3 = p2
p2 = p1
p1 = ((2.d0*dfloat(j)-1.d0)*z*p2 &
- (dfloat(j)-1.d0)*p3)/dfloat(j)
end do
pp = dfloat(ng)*(z*p1-p2)/(z*z-1.d0)
z1 = z
z = z1 - p1/pp
if (dabs(z-z1).gt.eps) go to 1
xg(i) = xm - xl*z
xg(ng+1-i) = xm + xl*z
wg(i) = 2.d0*xl/((1.d0-z*z)*pp*pp)
wg(ng+1-i) = wg(i)
end do
return
end
SUBROUTINE NCDF(F,ASUM,H,KKK,NOINT)
IMPLICIT DOUBLE PRECISION (A-H,O-Z)
DIMENSION F(450)
NBLOCK = (KKK-2)/NOINT + 1
C2HO45 = 2.0D+00*H/45.0D+00
ASUM = 0.0D+00
DO J=1,NBLOCK
ISTAR = NOINT*(J-1)+5
IEND = NOINT*J + 1
IEND = MIN0(KKK,IEND)
DO I=ISTAR,IEND,4
ASUM = ASUM + C2HO45*(7.0D+00*(F(I-4)+F(I)) &
+32.0D+00*(F(I-3)+F(I-1)) + 12.0D+00*F(I-2))
END DO
IF(IEND.EQ.KKK) GO TO 4
C2HO45 = 2.0D+00*C2HO45
4 END DO
RETURN
END
Thanks everybody specially #Vladimir who has taken interest in my problem. Finally i got the right answer by removing ra and rb from the module potdata and defined function as funct(x, ra, rb) and then removing ra and rb from the loop. Because i was writing ra, rb then reading their values in the above code so loop was having flow dependence. Now i get exact result from both compiler (which is 8.7933767516) parallelly, sequentially both. Exact way is this
subroutine EFFPOT(MESH,NOINT)
USE OMP_LIB
USE MATRIC
USE POTDATA
implicit none
integer, intent(in) :: MESH, NOINT
double precision::anorm(450)
double precision, external :: funct
double precision :: asum, fac, cnorm
!$omp parallel do default(none) private(del1,ia,asum,ib,fac) &
!$omp shared(id,mesh,r,anorm,NOINT,del2,R1s)
do ia = 2,mesh
if(R1s(ia).lt.1.D-7.and.R1s(ia).ge.1.D-8)id = ia
do ib = 2,mesh
call QGAUSS(funct,-1.d0,1.d0,fac,r(ia),r(ib))
del1(ib) = r(ib)**2*fac*R1s(ib)**2
end do
CALL NCDF(del1,ASUM,r(2),mesh,NOINT)
anorm(ia) = 2.0d0*pi*asum
del2(ia) = 2.0d0*pi*asum*(r(ia)*R1s(ia))**2
end do
!$omp end parallel do
CALL NCDF(del2,ASUM,r(2),mesh,NOINT)
cnorm = 1.0/dsqrt(4.*pi*ASUM)
write(6,*)'cnorm =',cnorm
return
end
double precision function funct(x,ra,rb)
USE MATRIC , ONLY : PA , PB , DLAM
implicit none
double precision, intent(in) :: x, ra, rb
double precision :: f1, f2, ramrb
ramrb = dsqrt(ra**2+rb**2-2.d0*ra*rb*x)
f1 = dcosh(pa*ra)+dcosh(pa*rb)
f2 = 1.d0+0.5*dlam*ramrb*dexp(-pb*ramrb)
funct = (f1*f2)**2
return
end
SUBROUTINE QGAUSS(func,aa,bb,ss,ra,rb)
USE OMP_LIB
USE MATRIC , ONLY : XG ,WG , NG
IMPLICIT DOUBLE PRECISION(A-H,O-Z)
external func
xm = 0.5d0*(bb+aa)
xl = 0.5d0*(bb-aa)
ss = 0.d0
do j=1,ng
dx = xl*xg(j)
ss = ss + wg(j)*(func(xm+dx,ra,rb)+func(xm-dx,ra,rb))
end do
ss = xl*ss/2.0
return
END

The cause of your problem is that the OpenMP standard does not specify what happens if a private list item is accessed in the region but outside of the construct. See example private.2f (found on page 135 of the OpenMP standard supplement) for a short version of the same problem.
Specifically, the module variables ra and rb are declared private in the OpenMP parallel region inside EFFPOT and also accessed from funct. funct is in the dynamic scope of the parallel region but (lexically) outside of it and therefore it is not specified whether ra and rb referenced by funct are the original module variables or their private copies (most compilers would go for the original variables).
You have already found one of the solutions. The other one would be to declare ra and rb threadprivate since they are only used to pass data from EFFPOT to funct:
MODULE POTDATA
IMPLICIT NONE
INTEGER :: IA , IB , ID
DOUBLE PRECISION :: RA , RB , R1s(450)
!$OMP THREADPRIVATE(RA,RB)
END MODULE POTDATA
You should then also remove ra and rb from the list of the private clause of the parallel region within EFFPOT.
On some platforms, e.g. OS X, using threadprivate with GCC (i.e. gfortran) could be slower than actually passing around the two variables as arguments because of the emulated TLS.
Note that this semantic error is really hard to detect and many OpenMP tools won't actually spot it.

First of all, it is very difficult to say something specific without seeing the actual code. However, I do have some comments on your situation and the conclusions you are drawing.
The fact that your program runs fine both in parallel and sequential execution when compiled with "ifort" doesn't mean that your program is correct. Since compiler bugs leading to programs giving wrong answers are very rare, but on the other hand manual parallel programming is very error-prone, we should assume a problem with the way you parallelized your code. We are probably talking about a race condition.
And no, the problem you are having is not at all unusual. When you have a race condition, it happens often that the sequential execution works everywhere and your parallel execution works in some environments and fails in others. It's even common that your code gives different answers every time you call it (not only depending on the compiler, but on many other factors that can change over time).
What I suggest you should do, is to get a parallel debugger, like for example TotalView that will help you keep track of the various threads and their states. Try to find a simple test environment (as few threads as possible) that fails reliably.

Related

Fortran Subroutines/Functions: Returned Value Changes If Subroutines/Functions Is Called More Often?

I am currently implementing integrals in Fortran as subroutines. The subroutines on their own return the correct values. If i now call the e.g. same subroutine twice after each other, with the same input values, their returned value differs significantly?
The main program only calls the function like this:
program main
use types
use constants
use integrals
use basis
real(dp), dimension(2,3) :: molecule_coords
real(dp), dimension(2) :: z
type(primitive_gaussian), allocatable :: molecule(:,:)
molecule_coords(1,:) = (/0.,0.,0./)
molecule_coords(2,:) = (/0.,0.,1.6/)
molecule = def_molecule(molecule_coords)
z = (/1.0, 1.0/)
call overlap(molecule) ! Correct Value returned
call overlap(molecule) ! Wrong Value returned
end program main
My function for the overlap looks like this:
module integrals
use types
use constants
use basis
use stdlib_specialfunctions_gamma!, only: lig => lower_incomplete_gamma
contains
subroutine overlap(molecule)
implicit none
type(primitive_gaussian), intent(in) :: molecule(:,:)
integer :: nbasis, i, j, k, l
real(dp) :: norm, p, q, coeff, Kab
real(dp), dimension(3) :: Q_xyz
real(dp), dimension(INT(size(molecule,1)),INT(size(molecule,1))) :: S
nbasis = size(molecule,1)
do i = 1, nbasis
do j = 1, nbasis
! Iterate over l and m primitives in basis
do k = 1, size(molecule(i,:))
do l = 1, size(molecule(j,:))
norm = molecule(i, k)%norm() * molecule(j, l)%norm()
! Eq. 63
Q_xyz = (molecule(i, k)%coords - molecule(j, l)%coords)
! Eq. 64, 65
p = (molecule(i, k)%alpha + molecule(j, l)%alpha)
q = (molecule(i, k)%alpha * molecule(j, l)%alpha) / p
! Eq. 66
Kab = exp(-q * dot_product(Q_xyz,Q_xyz))
coeff = molecule(i, k)%coeff * molecule(j, l)%coeff
S(i,j) = S(i,j) + norm * coeff * Kab * (pi / p) ** (1.5)
end do
end do
end do
end do
print *, S
end subroutine overlap
end module integrals
I am a bit lost, why this would be the case, but I am also rather new to Fortran.
Any help is appreciated! Thanks!

Dummy argument not agreeing with actual argument when passing function

I'm trying to implement Newton's method but I'm getting a confusing error message. In my code you'll see I called external with f1 and f2 which I assumes tells the computer to look for the function but it's treating them as variables based on the error message. I've read the stack overflow posts similar to my issue but none of the solutions seem to work. I've tried with and without the external but the issue still persists. Hoping someone could see what I'm missing.
implicit none
contains
subroutine solve(f1,f2,x0,n, EPSILON)
implicit none
real(kind = 2), external:: f1, f2
real (kind = 2), intent(in):: x0, EPSILON
real (kind = 2):: x
integer, intent(in):: n
integer:: iteration
x = x0
do while (abs(f1(x))>EPSILON)
iteration = iteration + 1
print*, iteration, x, f1(x)
x = x - (f1(x)/f2(x))
if (iteration >= n) then
print*, "No Convergence"
stop
end if
end do
print*, iteration, x
end subroutine solve
end module newton
Program Lab10
use newton
implicit none
integer, parameter :: n = 1000 ! maximum iteration
real(kind = 2), parameter :: EPSILON = 1.d-3
real(kind = 2):: x0, x
x0 = 3.0d0
call solve(f(x),fp(x),x0,n, EPSILON)
contains
real (kind = 2) function f(x) ! this is f(x)
implicit none
real (kind = 2), intent(in)::x
f = x**2.0d0-1.0d0
end function f
real (kind = 2) function fp(x) ! This is f'(x)
implicit none
real (kind = 2), intent(in)::x
fp = 2.0d0*x
end function fp
end program Lab10```
You seem to be passing function results to your subroutine and not the functions themselves. Remove (x) when calling solve() and the problem will be resolved. But more importantly, this code is a prime example of how to not program in Fortran. The attribute external is deprecated and you better provide an explicit interface. In addition, what is the meaning of kind = 2. Gfortran does not even comprehend it. Even if it comprehends the kind, it is not portable. Here is a correct portable modern implementation of the code,
module newton
use iso_fortran_env, only: RK => real64
implicit none
abstract interface
pure function f_proc(x) result(result)
import RK
real(RK), intent(in) :: x
real(RK) :: result
end function f_proc
end interface
contains
subroutine solve(f1,f2,x0,n, EPSILON)
procedure(f_proc) :: f1, f2
real(RK), intent(in) :: x0, EPSILON
integer, intent(in) :: n
real(RK) :: x
integer :: iteration
x = x0
do while (abs(f1(x))>EPSILON)
iteration = iteration + 1
print*, iteration, x, f1(x)
x = x - (f1(x)/f2(x))
if (iteration >= n) then
print*, "No Convergence"
stop
end if
end do
print*, iteration, x
end subroutine solve
end module newton
Program Lab10
use newton
integer, parameter :: n = 1000 ! maximum iteration
real(RK), parameter :: EPSILON = 1.e-3_RK
real(RK) :: x0, x
x0 = 3._RK
call solve(f,fp,x0,n, EPSILON)
contains
pure function f(x) result(result) ! this is f(x)
real (RK), intent(in) :: x
real (RK) :: result
result = x**2 - 1._RK
end function f
pure function fp(x) result(result) ! This is f'(x)
real (RK), intent(in) :: x
real (RK) :: result
result = 2 * x
end function fp
end program Lab10
If you expect to pass nonpure functions to the subroutine solve(), then remove the pure attribute. Note the use of real64 to declare 64-bit (double precision) real kind. Also notice how I have used _RK suffix to assign 64-bit precision to real constants. Also, notice I changed the exponents from real to integer as it is multiplication is more efficient than exponentiation computationally. I hope this answer serves more than merely the solution to Lab10.

Evaluation of Fortran's SUM(..) without temporary array?

Readability-wise, I find it preferable to write
momentum = sum( [( calculateMomentum(elements(i)), i=1, size(elements,1) )] )
over
momentum = 0.0d0
do i = 1, size(elements,1)
momentum = momentum + calculateMomentum(elements(i))
end do
because the first version has the form of defining the value of momentum, while the second corresponds to a more lower-level accumulation instruction. The difference becomes more pronounced in real-world code with more arguments and possibly multiple indices.
However, the first version allocates a temporary array. As a human programmer I know, that it could be optimized away, so I was wondering if Fortran offers a syntax, that allows calculating the sum with neither an explicit loop nor a temporary array.
Update
No such construct. It looks like there is no syntax, as what I was asking for. See Vladimir F's answer.
It matters, but less than I thought. I've made my own benchmark (pastebin, embedded) using matrix multiplication using several variants.
C(i,j) = C(i,j) + A(i,k) * B(k,j) was the slowest, probably due to the unnecessary array access in each step.
C(i,j) = sum( [(A(i,k) * B(k,j), k = 1, N)] )` was about 10-20% faster than (1), despite the temporary array.
tmp = tmp + A(i,k) * B(k,j), i.e. using a temporary accumulator variable, was about 20% faster than (2).
C = matmul(A,B) was the fastest by far, compared to (3) by a factor of 25 for 500x500 matrices, growing to 50 for 2000x2000, while the relative speed of the other variants stayed roughly the same.
Bottom line: When the task cannot be expressed in optimized library- or intrinsic functions easily, the sum variant has viable performance, and should only be optimized away, if performance really matters to such a degree in that part of the code.
Whether a temporary array will be allocated or net depends on the optimizations in the compiler. Stack allocation is almost free anyway. Copying the values will probably take longer.
The compiler may optimize unnecessary steps away if it can make sure the result will be the same. However, there is no special syntax for that. Fortran typically tries to stay far from the actual implementation and leaves a lot on the compiler.
For experiment, I've tried this code (which computes the sum of inverse of arr).
program main
use iso_fortran_env, only: dp => real64
implicit none
real(dp) val
real(dp), allocatable :: arr(:)
integer num, loop, i, t1, t2, trate
num = 10**8
arr = [( i, i = 1, num )] !! L1
do loop = 1, 10
call system_clock( t1 )
val = sum( [( testfunc( arr(i) ), i = 1, num )] ) !! L2
call system_clock( t2, trate )
print *, "val = ", val, " in ", (t2 - t1) / real(trate), " (s)"
enddo
contains
function testfunc( x ) result( ret )
real(dp), intent(in) :: x
real(dp) :: ret
ret = 1.0_dp / x
end
end program
Then, on my computer (mac2012), "gfortran-10 -O2 test.f90 && time ./a.out" gives
val = 18.997896413852555 in 1.02999997 (s)
val = 18.997896413852555 in 1.10099995 (s)
val = 18.997896413852555 in 1.17600000 (s)
...
real 0m12.575s
user 0m8.142s
sys 0m4.387s
and "gfortran-10 -O3" gives
val = 18.997896413852555 in 0.875000000 (s)
val = 18.997896413852555 in 0.888000011 (s)
val = 18.997896413852555 in 0.833000004 (s)
...
real 0m9.986s
user 0m5.738s
sys 0m4.210s
In both cases, the htop command shows ~1.5 GB allocated, which may be reasonable if lines L1 and L2 use a temporary array (each ~800 MB with ~0.3 s for allocation).
Because there is no syntax for creating "iterators", I've tried making testfunc() to be elemental (or impure elemental). The only difference here is the lines marked with <--.
program main
use iso_fortran_env, only: dp => real64
implicit none
real(dp) val
real(dp), allocatable :: arr(:)
integer num, loop, i, t1, t2, trate
num = 10**8
arr = [( i, i = 1, num )]
do loop = 1, 10
call system_clock( t1 )
val = sum( testfunc( arr ) ) !<--
call system_clock( t2, trate )
print *, "val = ", val, " in ", (t2 - t1) / real(trate), " (s)"
enddo
contains
impure elemental & !<--
function testfunc( x ) result( ret )
real(dp), intent(in) :: x
real(dp) :: ret
ret = 1.0_dp / x
end
end program
Then, "gfortran-10 -O2" gives
val = 18.997896413852555 in 0.437000006 (s)
val = 18.997896413852555 in 0.453999996 (s)
val = 18.997896413852555 in 0.437999994 (s)
...
real 0m5.946s
user 0m5.069s
sys 0m0.842s
and "gfortran-10 -O3" gives
val = 18.997896413852555 in 0.225999996 (s)
val = 18.997896413852555 in 0.252000004 (s)
val = 18.997896413852555 in 0.246999994 (s)
...
real 0m3.909s
user 0m3.009s
sys 0m0.867s
The htop command show ~800 MB, so it seems only arr is allocated.
For comparison, the following code calculates val with an explicit do-loop (using a scalar version of testfunc())
val = 0
do i = 1, num
val = val + testfunc( arr(i) )
enddo
which gave the same timing with the second code with elemental + testfunc(arr) above (on my mac).
In all the above code, the -Ofast option resulted in a runtime error (Illegal instruction). But this was due to the line L1 (arr = [( i, i = 1, num )]). If I allocate arr beforehand and populate it with an explicit loop, -Ofast also worked without problem (giving almost the same timing with -O3 in this case).
If you make calculateMomentum and elemental function, then it can be used for both scalar values and for arrays
for example with:
elemental function calculateMomentum(obj, v) result(p)
class(body), intent(in) :: obj
real, intent(in) :: v
real :: p
p = obj%mass * v
end function
you can apply the above to an array of v
integer::i
type(body) :: ball
real, allocatable :: v(:), p(:)
real :: tot_p
allocate(v(10))
v = [ (10+i, i=1, 10) ]
p = calculateMomentum(ball, v)
tot_p = sum(p)
having an intermediate array to hold the values is advantageous because it keeps the data close by (probably within the cache-line) and the sum() function would be as quick as it can be.
imagine the type body as follows for example
type body
real :: mass
end type

Using the routine directive on a Fortran90 intrinsic

I have given OpenAcc access to the subroutine called "lang_force", and within that subroutine I call the Fortran90 intrinsic RANDOM_NUMBER().
subroutine lang_force(damp, temp, noise)
!$acc routine(lang_force)
implicit none
double precision, intent(in) :: damp, temp
double precision, dimension(1:3), intent(out) :: noise
double precision :: kb, a1, theta, phi, mag1, pi,r,s
integer :: i,j,k
double precision :: x,y,z
kb = 1.3806E-23
!kb = 1.0
pi=4.D0*DATAN(1.D0)
call random_number(a1)
call random_number(r)
call random_number(s)
mag1 = sqrt(-2.0*log(a1))
theta = r*pi
phi = 2.0*s*pi
x = mag1*cos(phi)*sin(theta)
y = mag1*sin(theta)*sin(phi)
z = mag1*cos(theta)
noise(1) = sqrt(2.0*kb*temp*damp)*x
noise(2) = sqrt(2.0*kb*temp*damp)*y
noise(3) = sqrt(2.0*kb*temp*damp)*z
end subroutine lang_force
When compiled with the latest version of pdf90, it tells me that it needs access to RANDOM_NUMBER(). How do I declare a routine directive to such a fortran90 intrinsic subroutine?
Not all Fortran intrinsics are supported within device code including RANDOM_NUMBER.
RANDOM_NUMBER in particular is not thread safe since all threads would share the same state. Instead, you need to use cuRand for which we ship examples with the compilers under the "2020/examples/CUDA-Libraries/cuRAND/test_rand_oacc_ftn" directory.
While this is in C, I wrote about is in more detail on the NVIDIA user forums:
https://forums.developer.nvidia.com/t/random-numbers-on-device-generation/135748/2

Shooting method in fortran (neutron star oscillation)

I have been writing a script in fortran 90 for solving the radial oscillation problem of a neutron star with the use of shooting method. But for unknown reason, my program never works out. Without the shooting method component, the program runs smoothly as it successfully constructed the star. But once the shooting comes in, everything dies.
PROGRAM ROSCILLATION2
USE eos_parameters
IMPLICIT NONE
INTEGER ::i, j, k, l
INTEGER, PARAMETER :: N_ode = 5
REAL, DIMENSION(N_ode) :: y
REAL(8) :: rho0_cgs, rho0, P0, r0, phi0, pi
REAL(8) :: r, rend, mass, P, phi, delta, xi, eta
REAL(8) :: step, omega, omegastep, tiny, rho_print, Radius, B, a2, s0, lamda, E0, E
EXTERNAL :: fcn
!!!! User input
rho0_cgs = 2.D+15 !central density in cgs unit
step = 1.D-4 ! step size dr
omegastep = 1.D-2 ! step size d(omega)
tiny = 1.D-8 ! small number P(R)/P(0) to define star surface
!!!!!!!!!
open(unit=15, file="data.dat", status="new")
pi = ACOS(-1.D0)
a2 =((((1.6022D-13)**4)*(6.674D-11)*((2.997D8)**-7)*((1.0546D-34)**-3)*(1.D6))**(0.5D0))*a2_MeV !convert to code unit (km^-1)
B = ((1.6022D-13)**4)*(6.674D-11)*((2.997D8)**-7)*((1.0546D-34)**-3)*(1.D6)*B_MeV !convert to code unit (km^-2)
s0 = (1.D0/3.D0) - (1/(6*pi**2))*a2*((1/(16*pi**2)*a2**2 + (pi**-2)*a4*(rho0 - B))**-0.5) !square of the spped of sound at r=0
lamda = -0.5D0*log(1-2*y(1)/r)
E0 = (r0**-2)*s0*exp(lamda + 3*phi0)
rho0 = rho0_cgs*6.67D-18 / 9.D0 !convert rho0 to code unit (km^-2)
!! Calculate central pressure P0
P0 = (1.D0/3.D0)*rho0 - (4.D0/3.D0)*B - (1.D0/(a4*(12.D0)*(pi**2)))*a2**2 - &
&(a2/((3.D0)*a4))*(((1.D0/(16.D0*pi**4))*a2**2+(1.D0/(pi**2))*a4*(rho0-B))**0.5D0)
!! initial value for metric function phi
phi0 = 0.1D0 ! arbitrary (needed to be adjusted later)
r0 = 1.D-30 ! integration starting point
!! Set initial conditions
!!!!!!!!!!!!!!!!!
!!Start integration loop
!!!!!!!!!!!!!!!!!
r = r0
y(1) = 0.D0
y(2) = P0
y(3) = phi0
y(4) = 1/(3*E0)
y(5) = 1
omega = 2*pi*1000/(2.997D5) !omega of 1kHz in code unit
DO l = 1, 1000
omega = omega + omegastep !shooting method part
DO i = 1, 1000000000
rend = r0 + REAL(i)*step
call oderk(r,rend,y,N_ode,fcn)
r = rend
mass = y(1)
P = y(2)
phi = y(3)
xi = y(4)
eta = y(5)
IF (P < tiny*P0) THEN
WRITE(*,*) "Central density (10^14 cgs) = ", rho0_cgs/1.D14
WRITE(*,*) " Mass (solar mass) = ", mass/1.477D0
WRITE(*,*) " Radius (km) = ", r
WRITE(*,*) " Compactness M/R ", mass/r
WRITE(15,*) (omega*2.997D5/(2*pi)), y(5)
GOTO 21
ENDIF
ENDDO
ENDDO
21 CONTINUE
END PROGRAM roscillation2
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
SUBROUTINE fcn(r,y,yprime)
USE eos_parameters
IMPLICIT NONE
REAL(8), DIMENSION(5) :: y, yprime
REAL(8) :: r, m, P, phi, rho, pi, B, a2, xi, eta, W, Q, E, s, lamda, omega
INTEGER :: j
pi = ACOS(-1.D0)
a2 =((((1.6022D-13)**4)*(6.674D-11)*((2.997D8)**-7)*((1.0546D-34)**-3)*(1.D6))**(0.5D0))*a2_MeV !convert to code unit (km^-1)
B = ((1.6022D-13)**4)*(6.674D-11)*((2.997D8)**-7)*((1.0546D-34)**-3)*(1.D6)*B_MeV !convert to code unit (km^-2)
m = y(1)
P = y(2)
phi = y(3)
xi = y(4)
eta = y(5)
rho = 3.D0*P + 4.D0*B +((3.D0)/(4.D0*a4*(pi**2)))*a2**2+(a2/a4)*&
&(((9.D0/((16.D0)*(pi**4)))*a2**2+((3.D0/(pi**2))*a4*(P+B)))**0.5D0)
s = (1.D0/3.D0) - (1/(6*pi**2))*a2*((1/(16*pi**2)*a2**2 + (pi**-2)*a4*(rho - B))**-0.5) !square of speed of sound
W = (r**-2)*(rho + P)*exp(3*lamda + phi)
E = (r**-2)*s*exp(lamda + 3*phi)
Q = (r**-2)*exp(lamda + 3*phi)*(rho + P)*((yprime(3)**2) + 4*(r**-1)*yprime(3)- 8*pi*P*exp(2*lamda))
yprime(1) = 4.D0*pi*rho*r**2
yprime(2) = - (rho + P)*(m + 4.D0*pi*P*r**3)/(r*(r-2.D0*m))
yprime(3) = (m + 4.D0*pi*P*r**3)/(r*(r-2.D0*m))
yprime(4) = y(5)/(3*E)
yprime(5) = -(W*omega**2 + Q)*y(4)
END SUBROUTINE fcn
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!!
!! Runge-Kutta method (from Numerical Recipes)
!!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
subroutine oderk(ri,re,y,n,derivs)
INTEGER, PARAMETER :: NMAX=16
REAL(8) :: ri, re, step
REAL(8), DIMENSION(NMAX) :: y, dydx, yout
EXTERNAL :: derivs,rk4
call derivs(ri,y,dydx)
step=re-ri
CALL rk4(y,dydx,n,ri,step,yout,derivs)
do i=1,n
y(i)=yout(i)
enddo
return
end subroutine oderk
SUBROUTINE RK4(Y,DYDX,N,X,H,YOUT,DERIVS)
INTEGER, PARAMETER :: NMAX=16
REAL(8) :: H,HH,XH,X,H6
REAL(8), DIMENSION(N) :: Y, DYDX, YOUT
REAL(8), DIMENSION(NMAX) :: YT, DYT, DYM
EXTERNAL :: derivs
HH=H*0.5D0
H6=H/6D0
XH=X+HH
DO I=1,N
YT(I)=Y(I)+HH*DYDX(I)
ENDDO
CALL DERIVS(XH,YT,DYT)
DO I=1,N
YT(I)=Y(I)+HH*DYT(I)
ENDDO
CALL DERIVS(XH,YT,DYM)
DO I=1,N
YT(I)=Y(I)+H*DYM(I)
DYM(I)=DYT(I)+DYM(I)
ENDDO
CALL DERIVS(X+H,YT,DYT)
DO I=1,N
YOUT(I)=Y(I)+H6*(DYDX(I)+DYT(I)+2*DYM(I))
ENDDO
END SUBROUTINE RK4
Any reply would be great i am just really depressed for the long debugging.
Your program is blowing up because of this line:
yprime(5) = -(W*omega**2 + Q)*y(4)
in subroutine fcn. In this subroutine, omega is completely independent of the one declared in your main program. This one is uninitialized and used in an expression, which will either contain random values or zero, if your compiler is nice enough (or told) to initialize variables.
If you want the variable omega from your main program to be the same variable you use in fcn then you need to pass that variable to fcn somehow. Due to the way you've architected this program, passing it would require modifying all of your procedures to pass omega so that it can be provided to all of your calls to DERIVS (which is the dummy argument you are associating with fcn).
An alternative would be to put omega into a module and use that module where you need access to omega, e.g. declare it in eos_parameters instead of declaring it in the scoping units of fcn and your main program.