Program received signal SIGSEGV: Segmentation fault - invalid memory reference? - fortran

I am doing a multiple integral, there is a parameter M_D which I can modify. Both M_D=2.9d3 or M_D=3.1d3 works fine, but when I change it into M_D=3.0d0 it got an error
Program received signal SIGSEGV: Segmentation fault - invalid memory reference.
Backtrace for this error:
#0 0x7F831A103E08
#1 0x7F831A102F90
#2 0x7F83198344AF
#3 0x43587C in __mc_vegas_MOD_vegas
#4 0x400EBE in MAIN__ at MAINqq.f90:?
Segmentation fault (core dumped)
It's very unlikely there is a sigularity which out of range while progressing. From the answer to this kind of problem I found, I guess it's not about array dimension that is out of bounds.
This time I didn't make it to simplify the problem which can demonstrate my question in order to write less amount of code . It's unpractical to post all the code here, so I post the segment which I think is relevant to the error.
module my_fxn
implicit none
private
public :: fxn_1
public :: cos_theta
real(kind(0d0)), parameter :: S=1.690d8
real(kind(0d0)), parameter :: g_s = 0.118d0
real(kind(0d0)), parameter :: M_D = 3.0d3 !!!
real(kind(0d0)), parameter :: m=172d0
real(kind(0d0)), parameter :: Q=2d0
real(kind(0d0)), parameter :: pi=3.14159d0
real(kind(0d0)), external :: CT14pdf
real(kind(0d0)) :: cos_theta
real(kind(0d0)) :: s12
integer :: i
contains
function jacobian( upper, lower) result(jfactor)
implicit none
real(kind(0d0)), dimension(1:6) :: upper, lower
real(kind(0d0)) :: jfactor
jfactor = 1d0
do i = 1, 6
jfactor = jfactor * (upper(i) - lower(i))
end do
end function jacobian
function dot_vec(p,q) result(fourvectordot)
implicit none
real(kind(0d0)) :: fourvectordot
real(kind(0d0)), dimension(0:3) :: p,q
fourvectordot = p(0) * q(0)
do i = 1, 3
fourvectordot = fourvectordot - p(i) * q(i)
end do
end function dot_vec
subroutine commonpart(p3_0, p4_0, eta, k_v,P3_v, p4_v, s13, s14, s23, s24)
implicit none
real(kind(0d0)), intent(in) :: p3_0, p4_0, eta, k_v, p3_v, p4_v
real(kind(0d0)), intent(out):: s13, s14, s23, s24
real(kind(0d0)) :: sin_theta, &
cos_eta, sin_eta, &
cos_ksi, sin_ksi
real(kind(0d0)), dimension(0:3) :: k1, k2, p3, p4, k
sin_theta = sqrt(1-cos_theta**2)
cos_eta = cos(eta)
sin_eta = sqrt(1-cos_eta**2)
cos_ksi = (k_v**2-p3_v**2-p4_v**2)/(2*p3_v*p4_v)
sin_ksi = sqrt(1-cos_ksi**2)
k1 = [sqrt(s12)/2d0,0d0,0d0, sqrt(s12)/2d0]
k2 = [sqrt(s12)/2d0,0d0,0d0, -sqrt(s12)/2d0]
p3 = [p3_0, p3_v*(cos_theta*cos_eta*sin_ksi+sin_theta*cos_ksi), &
p3_v* sin_eta*sin_ksi, p3_v*( cos_theta*cos_ksi-sin_theta*cos_eta*sin_ksi)]
p4 = [p4_0, p4_v*sin_theta, 0d0, p4_v*cos_theta]
do i = 1, 3
k(i) = 0 - p3(i) - p4(i)
end do
k(0) = sqrt(s12) - p3_0-p4_0
s13 = m**2- 2*dot_vec(k1,p3)
s14 = m**2- 2*dot_vec(k1,p4)
s23 = m**2- 2*dot_vec(k2,p3)
s24 = m**2- 2*dot_vec(k2,p3)
end subroutine commonpart
function fxn_1(z, wgt) result(fxn_qq)
implicit none
real(kind(0d0)), dimension(1:6) :: z
real(kind(0d0)) :: wgt
real(kind(0d0)) :: tau_0
real(kind(0d0)) :: sigma, tau, m_plus, m_minus, & ! intermediate var
p3_v, p4_v, k_v, phi
real(kind(0d0)) :: s13,s14,s23, s24, gm
real(kind(0d0)) :: part1_qq,part_qq,fxn_qq
real(kind(0d0)) :: p3_0_max, p4_0_max, eta_max, gm_max, x1_max, x2_max, &
p3_0_min, p4_0_min, eta_min, gm_min, x1_min, x2_min
real(kind(0d0)), dimension(1:6) :: upper, lower
real(kind(0d0)) :: jfactor
wgt = 0
gm_max = M_D
gm_min = 0.1d0
z(1)= (gm_max-gm_min)*z(1) + gm_min
tau_0 = (2*m)**2/S
eta_max = 2*pi
eta_min = 0
z(2) = (eta_max-eta_min)*z(2)+eta_min
x1_max = 1
x1_min = tau_0
z(3) = (x1_max-x1_min)*z(3) + x1_min
x2_max = 1
x2_min = tau_0/z(3)
z(4) = (x2_max-x2_min)*z(4)+x2_min
s12 = z(3)*z(4) * S
if (sqrt(s12) < (2*m+z(1)))then
fxn_qq = 0d0
return
else
end if
p4_0_max = sqrt(s12)/2 - ((m+z(1))**2-m**2)/(2*sqrt(s12))
p4_0_min = m
z(5) = (p4_0_max-p4_0_min)*z(5)+p4_0_min
p4_v = sqrt(z(5)**2-m**2)
sigma = sqrt(s12)-z(5)
tau = sigma**2 - p4_v**2
m_plus = m + z(1)
m_minus = m - z(1)
p3_0_max = 1/(2*tau)*(sigma*(tau+m_plus*m_minus)+p4_v*sqrt((tau-m_plus**2)*(tau-m_minus**2)))
p3_0_min = 1/(2*tau)*(sigma*(tau+m_plus*m_minus)-p4_v-sqrt((tau-m_plus**2)*(tau-m_minus**2)))
z(6) = (p3_0_max-p3_0_min)*z(6)+p3_0_min
p3_v = sqrt(z(6)**2-m**2)
k_v = sqrt((sqrt(s12)-z(5)-z(6))**2-z(1)**2)
gm = z(1)
upper = [gm_max, eta_max, x1_max, x2_max, p4_0_max, p3_0_max]
lower = [gm_min, eta_min, x1_min, x2_min, p4_0_min, p3_0_min]
jfactor = jacobian(upper, lower)
call commonpart(z(6),z(5),z(2), k_v,p3_v, p4_v, s13, s14, s23, s24)
include "juicy.m"
part1_qq = 0d0
do i = 1, 5
part1_qq = part1_qq+CT14Pdf(i, z(3), Q)*CT14Pdf(-i, z(4), Q)*part_qq
end do
phi = 1/(8*(2*pi)**4) * 1/(2*s12)
fxn_qq = jfactor * g_s**4/M_D**5*pi*z(1)**2*phi*part1_qq
end function fxn_1
end module my_fxn
MC_VEGAS
MODULE MC_VEGAS
!*****************************************************************
! This module is a modification f95 version of VEGA_ALPHA.for
! by G.P. LEPAGE SEPT 1976/(REV)AUG 1979.
!*****************************************************************
IMPLICIT NONE
SAVE
INTEGER,PARAMETER :: MAX_SIZE=20 ! The max dimensions of the integrals
INTEGER,PRIVATE :: i_vegas
REAL(KIND(1d0)),DIMENSION(MAX_SIZE),PUBLIC:: XL=(/(0d0,i_vegas=1,MAX_SIZE)/),&
XU=(/(1d0,i_vegas=1,MAX_SIZE)/)
INTEGER,PUBLIC :: NCALL=50000,& ! The number of integrand evaluations per iteration
!+++++++++++++++++++++++++++++++++++++++++++++++++++++
! You can change NCALL to change the precision
!+++++++++++++++++++++++++++++++++++++++++++++++++++++
ITMX=5,& ! The maximum number of iterations
NPRN=5,& ! printed or not
NDEV=6,& ! device number for output
IT=0,& ! number of iterations completed
NDO=1,& ! number of subdivisions on an axis
NDMX=50,& ! determines the maximum number of increments along each axis
MDS=1 ! =0 use importance sampling only
! =\0 use importance sampling and stratified sampling
! increments are concentrated either wehre the
! integrand is largest in magnitude (MDS=1), or
! where the contribution to the error is largest(MDS=-1)
INTEGER,PUBLIC :: IINIP
REAL(KIND(1d0)),PUBLIC :: ACC=-1d0 ! Algorithm stops when the relative accuracy,
! |SD/AVGI|, is less than ACC; accuracy is not
! cheched when ACC<0
REAL(KIND(1d0)),PUBLIC :: MC_SI=0d0,& ! sum(AVGI_i/SD_i^2,i=1,IT)
SWGT=0d0,& ! sum(1/SD_i^2,i=1,IT)
SCHI=0d0,& ! sum(AVGI_i^2/SD_i^2,i=1,IT)
ALPH=1.5d0 ! controls the rate which the grid is modified from
! iteration to iteration; decreasing ALPH slows
! modification of the grid
! (ALPH=0 implies no modification)
REAL(KIND(1d0)),PUBLIC :: DSEED=1234567d0 ! seed of
! location of the I-th division on the J-th axi, normalized to lie between 0 and 1.
REAL(KIND(1d0)),DIMENSION(50,MAX_SIZE),PUBLIC::XI=1d0
REAL(KIND(1d0)),PUBLIC :: CALLS,TI,TSI
CONTAINS
SUBROUTINE RANDA(NR,R)
IMPLICIT NONE
INTEGER,INTENT(IN) :: NR
REAL(KIND(1d0)),DIMENSION(NR),INTENT(OUT) :: R
INTEGER :: I
! D2P31M=(2**31) - 1 D2P31 =(2**31)(OR AN ADJUSTED VALUE)
REAL(KIND(1d0))::D2P31M=2147483647.d0,D2P31=2147483711.d0
!FIRST EXECUTABLE STATEMENT
DO I=1,NR
DSEED = DMOD(16807.d0*DSEED,D2P31M)
R(I) = DSEED / D2P31
ENDDO
END SUBROUTINE RANDA
SUBROUTINE VEGAS(NDIM,FXN,AVGI,SD,CHI2A,INIT)
!***************************************************************
! SUBROUTINE PERFORMS NDIM-DIMENSIONAL MONTE CARLO INTEG'N
! - BY G.P. LEPAGE SEPT 1976/(REV)AUG 1979
! - ALGORITHM DESCRIBED IN J COMP PHYS 27,192(1978)
!***************************************************************
! Without INIT or INIT=0, CALL VEGAS
! INIT=1 CALL VEGAS1
! INIT=2 CALL VEGAS2
! INIT=3 CALL VEGAS3
!***************************************************************
IMPLICIT NONE
INTEGER,INTENT(IN) :: NDIM
REAL(KIND(1d0)),EXTERNAL :: FXN
INTEGER,INTENT(IN),OPTIONAL :: INIT
REAL(KIND(1d0)),INTENT(INOUT) :: AVGI,SD,CHI2A
REAL(KIND(1d0)),DIMENSION(50,MAX_SIZE):: D,DI
REAL(KIND(1d0)),DIMENSION(50) :: XIN,R
REAL(KIND(1d0)),DIMENSION(MAX_SIZE) :: DX,X,DT,RAND
INTEGER,DIMENSION(MAX_SIZE) :: IA,KG
INTEGER :: initflag
REAL(KIND(1d0)),PARAMETER :: ONE=1.d0
INTEGER :: I, J, K, NPG, NG, ND, NDM, LABEL = 0
REAL(KIND(1d0)) :: DXG, DV2G, XND, XJAC, RC, XN, DR, XO, TI2, WGT, FB, F2B, F, F2
!***************************
!SAVE AVGI,SD,CHI2A
!SQRT(A)=DSQRT(A)
!ALOG(A)=DLOG(A)
!ABS(A)=DABS(A)
!***************************
IF(PRESENT(INIT))THEN
initflag=INIT
ELSE
initflag=0
ENDIF
! INIT=0 - INITIALIZES CUMULATIVE VARIABLES AND GRID
ini0:IF(initflag.LT.1) THEN
NDO=1
DO J=1,NDIM
XI(1,J)=ONE
ENDDO
ENDIF ini0
! INIT=1 - INITIALIZES CUMULATIVE VARIABLES, BUT NOT GRID
ini1:IF(initflag.LT.2) THEN
IT=0
MC_SI=0.d0
SWGT=MC_SI
SCHI=MC_SI
ENDIF ini1
! INIT=2 - NO INITIALIZATION
ini2:IF(initflag.LE.2)THEN
ND=NDMX
NG=1
IF(MDS.NE.0) THEN
NG=(NCALL/2.d0)**(1.d0/NDIM)
MDS=1
IF((2*NG-NDMX).GE.0) THEN
MDS=-1
NPG=NG/NDMX+1
ND=NG/NPG
NG=NPG*ND
ENDIF
ENDIF
K=NG**NDIM ! K sub volumes
NPG=NCALL/K ! The number of random numbers in per sub volumes Ms
IF(NPG.LT.2) NPG=2
CALLS=DBLE(NPG*K) ! The total number of random numbers M
DXG=ONE/NG
DV2G=(CALLS*DXG**NDIM)**2/NPG/NPG/(NPG-ONE) ! 1/(Ms-1)
XND=ND ! ~NDMX!
! determines the number of increments along each axis
NDM=ND-1 ! ~NDMX-1
DXG=DXG*XND ! determines the number of increments along each axis per sub-v
XJAC=ONE/CALLS
DO J=1,NDIM
DX(J)=XU(J)-XL(J)
XJAC=XJAC*DX(J) ! XJAC=Volume/M
ENDDO
! REBIN, PRESERVING BIN DENSITY
IF(ND.NE.NDO) THEN
RC=NDO/XND ! XND=ND
outer:DO J=1, NDIM ! Set the new division
K=0
XN=0.d0
DR=XN
I=K
LABEL=0
inner5:DO
IF(LABEL.EQ.0) THEN
inner4:DO
K=K+1
DR=DR+ONE
XO=XN
XN=XI(K,J)
IF(RC.LE.DR) EXIT
ENDDO inner4
ENDIF
I=I+1
DR=DR-RC
XIN(I)=XN-(XN-XO)*DR
IF(I.GE.NDM) THEN
EXIT
ELSEIF(RC.LE.DR) THEN
LABEL=1
ELSE
LABEL=0
ENDIF
ENDDO inner5
inner:DO I=1,NDM
XI(I,J)=XIN(I)
ENDDO inner
XI(ND,J)=ONE
ENDDO outer
NDO=ND
ENDIF
IF(NPRN.GE.0) WRITE(NDEV,200) NDIM,CALLS,IT,ITMX,ACC,NPRN,&
ALPH,MDS,ND,(XL(J),XU(J),J=1,NDIM)
ENDIF ini2
!ENTRY VEGAS3(NDIM,FXN,AVGI,SD,CHI2A) INIT=3 - MAIN INTEGRATION LOOP
mainloop:DO
IT=IT+1
TI=0.d0
TSI=TI
DO J=1,NDIM
KG(J)=1
DO I=1,ND
D(I,J)=TI
DI(I,J)=TI
ENDDO
ENDDO
LABEL=0
level1:DO
level2:DO
ifla:IF(LABEL.EQ.0)THEN
FB=0.d0
F2B=FB
level3:DO K=1,NPG
CALL RANDA(NDIM,RAND)
WGT=XJAC
DO J=1,NDIM
XN=(KG(J)-RAND(J))*DXG+ONE
IA(J)=XN
IF(IA(J).LE.1) THEN
XO=XI(IA(J),J)
RC=(XN-IA(J))*XO
ELSE
XO=XI(IA(J),J)-XI(IA(J)-1,J)
RC=XI(IA(J)-1,J)+(XN-IA(J))*XO
ENDIF
X(J)=XL(J)+RC*DX(J)
WGT=WGT*XO*XND
ENDDO
F=WGT
F=F*FXN(X,WGT)
F2=F*F
FB=FB+F
F2B=F2B+F2
DO J=1,NDIM
DI(IA(J),J)=DI(IA(J),J)+F
IF(MDS.GE.0) D(IA(J),J)=D(IA(J),J)+F2
ENDDO
ENDDO level3
! K=K-1 !K=NPG
F2B=DSQRT(F2B*DBLE(NPG))
F2B=(F2B-FB)*(F2B+FB)
TI=TI+FB
TSI=TSI+F2B
IF(MDS.LT.0) THEN
DO J=1,NDIM
D(IA(J),J)=D(IA(J),J)+F2B
ENDDO
ENDIF
K=NDIM
ENDIF ifla
KG(K)=MOD(KG(K),NG)+1
IF(KG(K).EQ.1) THEN
EXIT
ELSE
LABEL=0
ENDIF
ENDDO level2
K=K-1
IF(K.GT.0) THEN
LABEL=1
ELSE
EXIT
ENDIF
ENDDO level1
! COMPUTE FINAL RESULTS FOR THIS ITERATION
TSI=TSI*DV2G
TI2=TI*TI
WGT=ONE/TSI
MC_SI=MC_SI+TI*WGT
SWGT=SWGT+WGT
SCHI=SCHI+TI2*WGT
AVGI=MC_SI/SWGT
CHI2A=(SCHI-MC_SI*AVGI)/(IT-0.9999d0)
SD=DSQRT(ONE/SWGT)
IF(NPRN.GE.0) THEN
TSI=DSQRT(TSI)
WRITE(NDEV,201) IT,TI,TSI,AVGI,SD,CHI2A
ENDIF
IF(NPRN.GT.0) THEN
DO J=1,NDIM
WRITE(NDEV,202) J,(XI(I,J),DI(I,J),I=1+NPRN/2,ND,NPRN)
ENDDO
ENDIF
!*************************************************************************************
! REFINE GRID
! XI(k,j)=XI(k,j)-(XI(k,j)-XI(k-1,j))*(sum(R(i),i=1,k)-s*sum(R(i),i=1,ND)/M)/R(k)
! divides the original k-th interval into s parts
!*************************************************************************************
outer2:DO J=1,NDIM
XO=D(1,J)
XN=D(2,J)
D(1,J)=(XO+XN)/2.d0
DT(J)=D(1,J)
inner2:DO I=2,NDM
D(I,J)=XO+XN
XO=XN
XN=D(I+1,J)
D(I,J)=(D(I,J)+XN)/3.d0
DT(J)=DT(J)+D(I,J)
ENDDO inner2
D(ND,J)=(XN+XO)/2.d0
DT(J)=DT(J)+D(ND,J)
ENDDO outer2
le1:DO J=1,NDIM
RC=0.d0
DO I=1,ND
R(I)=0.d0
IF(D(I,J).GT.0.) THEN
XO=DT(J)/D(I,J)
R(I)=((XO-ONE)/XO/DLOG(XO))**ALPH
ENDIF
RC=RC+R(I)
ENDDO
RC=RC/XND
K=0
XN=0.d0
DR=XN
I=K
LABEL=0
le2:DO
le3:DO
IF(LABEL.EQ.0)THEN
K=K+1
DR=DR+R(K)
XO=XN
XN=XI(K,J)
ENDIF
IF(RC.LE.DR) THEN
EXIT
ELSE
LABEL=0
ENDIF
ENDDO le3
I=I+1
DR=DR-RC
XIN(I)=XN-(XN-XO)*DR/R(K)
IF(I.GE.NDM) THEN
EXIT
ELSE
LABEL=1
ENDIF
ENDDO le2
DO I=1,NDM
XI(I,J)=XIN(I)
ENDDO
XI(ND,J)=ONE
ENDDO le1
IF(IT.GE.ITMX.OR.ACC*ABS(AVGI).GE.SD) EXIT
ENDDO mainloop
200 FORMAT(/," INPUT PARAMETERS FOR MC_VEGAS: ",/," NDIM=",I3," NCALL=",F8.0,&
" IT=",I3,/," ITMX=",I3," ACC= ",G9.3,&
" NPRN=",I3,/," ALPH=",F5.2," MDS=",I3," ND=",I4,/,&
"(XL,XU)=",(T10,"(" G12.6,",",G12.6 ")"))
201 FORMAT(/," INTEGRATION BY MC_VEGAS ", " ITERATION NO. ",I3, /,&
" INTEGRAL = ",G14.8, /," SQURE DEV = ",G10.4,/,&
" ACCUMULATED RESULTS: INTEGRAL = ",G14.8,/,&
" DEV = ",G10.4, /," CHI**2 PER IT'N = ",G10.4)
! X is the division of the coordinate
! DELTA I is the sum of F in this interval
202 FORMAT(/,"DATA FOR AXIS ",I2,/," X DELTA I ", &
24H X DELTA I ,18H X DELTA I, &
/(1H ,F7.6,1X,G11.4,5X,F7.6,1X,G11.4,5X,F7.6,1X,G11.4))
END SUBROUTINE VEGAS
END MODULE MC_VEGAS
Main.f90
program main
use my_fxn
use MC_VEGAS
implicit none
integer, parameter :: NDIM = 6
real(kind(0d0)) :: avgi, sd, chi2a
Character(len=40) :: Tablefile
data Tablefile/'CT14LL.pds'/
Call SetCT14(Tablefile)
call vegas(NDIM,fxn_1,avgi,sd,chi2a)
print *, avgi
end program main
After running build.sh
#!/bin/sh
rm -rf *.mod
rm -rf *.o
rm -rf ./calc
rm DATAqq.txt
gfortran -c CT14Pdf.for
gfortran -c FXNqq.f90
gfortran -c MC_VEGAS.f90
gfortran -c MAINqq.f90
gfortran -g -fbacktrace -fcheck=all -Wall -o calc MAINqq.o CT14Pdf.o FXNqq.o MC_VEGAS.o
./calc
rm -rf *.mod
rm -rf *.o
rm -rf ./calc
The whole output has not changed
rm: cannot remove 'DATAqq.txt': No such file or directory
INPUT PARAMETERS FOR MC_VEGAS:
NDIM= 6 NCALL= 46875. IT= 0
ITMX= 5 ACC= -1.00 NPRN= 5
ALPH= 1.50 MDS= 1 ND= 50
(XL,XU)= ( 0.00000 , 1.00000 )
( 0.00000 , 1.00000 )
( 0.00000 , 1.00000 )
( 0.00000 , 1.00000 )
( 0.00000 , 1.00000 )
( 0.00000 , 1.00000 )
INTEGRATION BY MC_VEGAS ITERATION NO. 1
INTEGRAL = NaN
SQURE DEV = NaN
ACCUMULATED RESULTS: INTEGRAL = NaN
DEV = NaN
CHI**2 PER IT'N = NaN
DATA FOR AXIS 1
X DELTA I X DELTA I X DELTA I
.060000 0.2431E-14 .160000 0.5475E-15 .260000 0.8216E-14
.360000 0.3641E-14 .460000 0.6229E-12 .560000 0.6692E-13
.660000 0.9681E-15 .760000 0.9121E-15 .860000 0.2753E-13
.960000 -0.9269E-16
DATA FOR AXIS 2
X DELTA I X DELTA I X DELTA I
.060000 0.1658E-13 .160000 0.5011E-14 .260000 0.8006E-12
.360000 0.1135E-14 .460000 0.9218E-13 .560000 0.7337E-15
.660000 0.6192E-12 .760000 0.3676E-14 .860000 0.2315E-14
.960000 0.5426E-13
DATA FOR AXIS 3
X DELTA I X DELTA I X DELTA I
.060000 0.3197E-14 .160000 0.1096E-12 .260000 0.5996E-14
.360000 0.5695E-13 .460000 0.3240E-14 .560000 0.5504E-13
.660000 0.9276E-15 .760000 0.6193E-12 .860000 0.1151E-13
.960000 0.7968E-17
DATA FOR AXIS 4
X DELTA I X DELTA I X DELTA I
.060000 0.3605E-13 .160000 0.1656E-14 .260000 0.7266E-12
.360000 0.2149E-13 .460000 0.8086E-13 .560000 0.9119E-14
.660000 0.3692E-15 .760000 0.6499E-15 .860000 0.1906E-17
.960000 0.1542E-19
DATA FOR AXIS 5
X DELTA I X DELTA I X DELTA I
.060000 -0.4229E-15 .160000 -0.4056E-14 .260000 -0.1121E-14
.360000 0.6757E-15 .460000 0.7460E-14 .560000 0.9331E-15
.660000 0.8301E-14 .760000 0.6595E-14 .860000 -0.5203E-11
.960000 0.6361E-12
DATA FOR AXIS 6
X DELTA I X DELTA I X DELTA I
.060000 0.2111E-12 .160000 0.5410E-13 .260000 0.1418E-12
.360000 0.1103E-13 .460000 0.8338E-14 .560000 -0.5840E-14
.660000 0.1263E-14 .760000 -0.1501E-15 .860000 0.4647E-14
.960000 0.3134E-15
Program received signal SIGSEGV: Segmentation fault - invalid memory reference.
Backtrace for this error:
#0 0x7F9D828B0E08
#1 0x7F9D828AFF90
#2 0x7F9D81FE24AF
#3 0x43586C in __mc_vegas_MOD_vegas
#4 0x400EAE in MAIN__ at MAINqq.f90:?
Segmentation fault (core dumped)

Related

fortran gives strange results when writing to file

I am using fortran. I want to write some information to a file, and here is my code:
open(unit=114514, position="append", file="msst_info.txt")
write(114514, 100) "step =", step
write(114514, 200) "A =", A
write(114514, 200) "omega =", omega(sd)
write(114514, 300) "dilation =", dilation(sd)
write(114514, 200) "p_msst =", p_msst / (kBar * 10)
write(114514, 300) "vol =", vol / Ang**3
write(114514, 300) "temp =", temperature
write(114514, 400) "+++++++++++++++++++++++++++"
100 format(A10, i8)
200 format(A10, e12.4)
300 format(A10, f12.4)
400 format(A)
close(114514)
I wish it can append the information to this file after each step. But the result is very strange:
dilation = 0.9999
p_msst = 0.3619E+02
vol = 10 A = -0.1041E-23
omega = -0.2290E-15
dilation = 0.9999
p_msst = 0.3619E+02
vol A = -0.1041E-23
omega = -0.2290E-15
dilation = 0.9999
p_msst = 0.3619E+02
vol = 1053.0452
temp = 195.9830
+++++++++++++++++++++++++++
step = 6
A = -0.1041E-23
omega = -0.2290E-15
dilation = 0.9999
p_msst = 0.3619E+02
vol = 1053.0452
temp = 195.9830
+++++++++++++++++++++++++++
step = 7
A = -0.1249E step = 7
A = -0.1249E-23
omega = -0.2290E-15
dilation = 0.9999
p_msst = 0.4342E+02
vol = 1052.8163
temp = 198.4668
+++++++++++++++++++++++++++
the format of step 6 is what i want. but in step 7, some text is written multiple times, and their position is completely wrong. I wonder why it happens.
the whole code:
module msst
use poscar_struct_def
use dynconstr
use prec
use reader_tags
use poscar
implicit none
contains
subroutine msst_step(dyn, latt_cur, t_info, tsif, tifor, io, niond, nionpd, ntypd, ntyppd)
! =======================================================================
! Written by S. Pan 2021/5/28
! This subroutine calculate a msst step. (Evan J. Reed 2003 PRL)
! I refer to fix_msst.cpp in LAMMPS when writing this code.
! The v(t-dt/2) is needed for velocity verlet algo.
! Two step: first, calculate the velocity v(t) with the a(t) and v(t-dt/2).
! second: give the position of ion and cell x(t+dt), and v(t+dt/2).
! The parameter mu is neglected since it makes the process complex.
! dyn: the struct for some MD variables, such as x, v, force.
! t_info: the struct for type information, such as number of ions.
! step: the current number of step. Init if step == 1.
! =======================================================================
! the dyn info will be changed
type(dynamics), intent(in out) :: dyn
type(latt), intent(in out) :: latt_cur
type(type_info), intent(in out) :: t_info
! these are inputs, should not be modified
type(in_struct), intent(in) :: io
real(q), dimension(3, t_info%nions), intent(in) :: tifor ! force on ions
real(q), dimension(3, 3), intent(in) :: tsif ! stress
! some units
real(q), parameter :: eV = 1.60218e-19, Ang = 1e-10, fs = 1e-15, &
amu = 1.66053886e-27, kBar = 1e5, kB = 1.38064852e-23
! energy in J, length in meter, time in s, mass in kg
! stress in Pa
! internal variables
real(q), save :: dt, dthalf, tscale, qmass, vs, total_mass=0, vol0
real(q), dimension(3, t_info%nions) :: velocity_before_half_dt, velocity_now, velocity_after_half_dt, &
C_force, x
real(q), dimension(3), save :: omega = [0, 0, 0] ! the time derivative of cell
real(q), dimension(3) :: dilation = [1, 1, 1]
integer :: ierr, ii, ni, nt, niond, nionpd, ntypd, ntyppd
integer, save :: sd, step = 0
real(q), dimension(3, 3) :: kinetic_stress, total_stress
real(q), dimension(3, 3), save :: p0
real(q) :: p_msst, A, vol, vol1, vol2, fac1, sqrt_initial_temperature_scaling, temperature, ekin
if (step .eq. 0) then
dyn%init = 0
call rd_poscar(latt_cur, t_info, dyn, &
& niond, nionpd, ntypd, ntyppd, &
& io%iu0, io%iu6)
end if
dyn%posioc = dyn%posion
step = step + 1
dt = dyn%potim * fs
dthalf = dt/2
! convert x to cartsian coordination. x and velocity use standard unit.
do ii = 1, 3
x(ii, :) = dyn%posion(ii, :) * latt_cur%a(ii, ii) * Ang
end do
! there isn't anything in dyn%pomass. so must use t_info%pomass
ni=1
do nt=1,t_info%ntyp
do ni=ni,t_info%nityp(nt)+ni-1
C_force(:, ni) = tifor(:, ni)/t_info%pomass(nt)
total_mass = total_mass + t_info%pomass(nt)
enddo
enddo
C_force = C_force * (eV/Ang/amu)
total_mass = total_mass * amu
if (step .eq. 1) then
! read these tags from INCAR
call process_incar(io%lopen, io%iu0, io%iu5, 'qmass', qmass, ierr, .true.)
call process_incar(io%lopen, io%iu0, io%iu5, 'tscale', tscale, ierr, .true.)
call process_incar(io%lopen, io%iu0, io%iu5, 'shock_direction', sd, ierr, .true.)
call process_incar(io%lopen, io%iu0, io%iu5, 'shock_velocity', vs, ierr, .true.)
! use tscale to give a initial cell velocity, or the cell will stay still forever
do ii = 1, 3
velocity_now(ii, :) = dyn%vel(ii, :) * latt_cur%a(ii, ii) * (Ang/fs) / dyn%potim
end do
CALL EKINC(EKIN,T_INFO%NIONS,T_INFO%NTYP,T_INFO%ITYP,T_INFO%POMASS,DYN%POTIM,LATT_CUR%A,DYN%VEL)
temperature = 2 * ekin * eV / (kB * t_info%nions * 3 )
fac1 = tscale*total_mass/qmass*temperature
omega(sd) = -sqrt(fac1)
sqrt_initial_temperature_scaling = sqrt(1.0-tscale)
velocity_now = velocity_now * sqrt_initial_temperature_scaling
else
! =======================================================================
! 2nd half of Verlet update. this part get current velocity from t-dt/2.
! =======================================================================
! propagate particle velocities 1/2 step, it should not be done on the first step of MD.
do ii = 1, 3
velocity_before_half_dt(ii, :) = dyn%vel(ii, :) * latt_cur%a(ii, ii) * (Ang/fs) / dyn%potim
end do
velocity_now = velocity_before_half_dt + C_force*dthalf
end if
! compute new pressure and volume and temperature
call compute_kinetic_stress(t_info, latt_cur, dyn, kinetic_stress, tsif, -1)
total_stress = tsif + kinetic_stress
total_stress = total_stress * kBar
vol = latt_cur%a(1, 1) * latt_cur%a(2, 2) * latt_cur%a(3, 3) * (Ang**3)
if (step .eq. 1) then
p0 = total_stress
vol0 = vol
end if
p_msst = vs**2 * total_mass * (vol0 - vol)/vol0**2
A = total_mass * (total_stress(sd, sd) - p0(sd, sd) - p_msst) / qmass
! qmass is in mass(kg)**2 / length(m)**4
if (step .ne. 1) then
! propagate the time derivative of the volume 1/2 step at fixed V, r, rdot
! it should not be done on the first step of MD.
omega(sd) = omega(sd) + A*dthalf ! this is the current omega
end if
! =======================================================================
! 1st half of Verlet update
! in VASP, the 1st half of Verlet update cannot be performed before
! compute force and stress. So it must be placed here.
! =======================================================================
! propagate the time derivative of the volume 1/2 step at fixed vol, r, rdot
omega(sd) = omega(sd) + A*dthalf ! this is omega(t+dt/2)
! propagate velocities 1/2 step
velocity_after_half_dt = velocity_now + C_force*dthalf
! now, we need to compute the vol and pos for next step:
! propagate the volume 1/2 step
vol1 = vol + omega(sd)*dthalf
! rescale positions and change box size
dilation(sd) = vol1/vol
call remap(latt_cur, sd, x, velocity_after_half_dt, dilation,t_info)
! propagate particle positions 1 time step
x = x + dt * velocity_after_half_dt
! propagate the volume 1/2 step
vol2 = vol1 + omega(sd)*dthalf
! rescale positions and change box size
dilation(sd) = vol2/vol1
call remap(latt_cur, sd, x, velocity_after_half_dt, dilation,t_info)
do ii = 1, 3
dyn%posion(ii, :) = x(ii, :) / latt_cur%a(ii, ii) / Ang
dyn%vel(ii, :) = velocity_after_half_dt(ii, :) / latt_cur%a(ii, ii) / (Ang/fs) * dyn%potim
end do
t_info%posion = dyn%posion
CALL EKINC(EKIN,T_INFO%NIONS,T_INFO%NTYP,T_INFO%ITYP,T_INFO%POMASS,DYN%POTIM,LATT_CUR%A,DYN%VEL)
temperature = 2 * ekin * eV / (kB * t_info%nions * 3 )
! this block for debug. write the information to a file
if (.true.) then
open(unit=114514, position="append", file="msst_info.txt")
write(114514, 100) "step =", step
write(114514, 200) "A =", A
write(114514, 200) "omega =", omega(sd)
write(114514, 300) "dilation =", dilation(sd)
write(114514, 200) "p_msst =", p_msst / (kBar * 10)
write(114514, 300) "vol =", vol / Ang**3
write(114514, 300) "temp =", temperature
write(114514, 400) "+++++++++++++++++++++++++++"
100 format(A10, i8)
200 format(A10, e12.4)
300 format(A10, f12.4)
400 format(A)
close(114514)
end if
end subroutine msst_step
! change the shape of cell, along with ion pos and vel.
subroutine remap(latt_cur, sd, x, v, dilation,t_info)
integer, intent(in):: sd
type(type_info), intent(in) :: t_info
real(q), dimension(3), intent(in) :: dilation
real(q), dimension(3, t_info%nions), intent(in out) :: x, v
type(latt), intent(in out) :: latt_cur
latt_cur%a(sd, sd) = dilation(sd) * latt_cur%a(sd, sd)
x(sd, :) = dilation(sd) * x(sd, :)
v(sd, :) = dilation(sd) * v(sd, :)
end subroutine remap
end module msst

running mpi subroutine in fortran program

I want to run a Fortran program which calls a subroutine that I want to parallelize with MPI. I know this sounds complicated, but I want to be able to specify the number of processes for each call. What I would want to use is a structure like this:
program my_program
implicit none
!Define variables
nprocs = !formula for calculating number of processes.
call my_subroutine(output,nprocs,other input vars)
end my_program
I want to run my_subroutine with the same effect as this:
mpirun -n nprocs my_subroutine.o
where my_subroutine has been compiled with 'other input vars.'
Is this possible?
Here is a simple example. I try compiling as follows:
$ mpif90 -o my_program WAVE_2D_FP_TUNER_mpi.f90 randgen.f SIMPLE_ROUTINE.f90
I try to run it like this:
$ mpirun -np (1 or 2) my_program
PROGRAM WAVE_2D_FP_TUNER_mpi
USE MPI
IMPLICIT NONE
REAL(KIND=8) :: T,PARAM(1:3),Z,ZBQLU01
REAL(KIND=8) :: ERRORS,COSTS,CMAX,CMAX_V(1:1000),THRESHOLD,Z_MIN,Z_MAX
REAL(KIND=8) :: U,S,R(1:6),MATRIX(1:15)
INTEGER :: EN,INC,I,J,M,P
INTEGER :: NPROCS,IERR
!0.8,-0.4,0.4,10,4,4,7 -- [0.003,0.534]
!0.8,-0.2,0.2,10,4,4,7 -- [0.190,0.588]
CALL MPI_INIT(IERR)
CALL MPI_COMM_SIZE(MPI_COMM_WORLD,NPROCS,IERR)
THRESHOLD = 0.D0
EN = 81
INC = 1
Z_MIN = -2.D-1; Z_MAX = 2.D-1
T = 1.D0
PARAM(1) = 10.D0; PARAM(2) = 4.D0; PARAM(3) = 4.D0
CMAX = 7.D0 !Max that wave speed could possibly be.
CALL ZBQLINI(0.D0)
OPEN(UNIT = 1, FILE = "TUNER_F.txt")
WRITE(1,*) 'Grid Size: '
WRITE(1,*) T/(EN-1)
DO P = 1,15
S = 0
Z = Z_MIN + (1.d0/(15-1))*dble((P-1))*(Z_MAX - Z_MIN)
WRITE(1,*) 'Z: ',Z
DO I = 1,1000
DO J = 1,6
R(J) = ZBQLU01(0.D0)
END DO
!CALL PDE_WAVE_F_mpi(T,PARAM,R,Z,CMAX,EN,INC,NPROCS,U)
CALL SIMPLE_ROUTINE(T,PARAM,R,Z,CMAX,EN,INC,NPROCS,U)
IF (U<=threshold) THEN
S = S + 1.D0
ELSE
S = S + 0.D0
END IF
END DO
MATRIX(P) = (1.D0/1000)*S
END DO
DO I = 1,15
WRITE(1,*) MATRIX(I)
END DO
PRINT *,MINVAL(MATRIX)
PRINT *,MAXVAL(MATRIX)
CLOSE(1)
CALL MPI_FINALIZE(IERR)
END PROGRAM WAVE_2D_FP_TUNER_mpi
Here is the subroutine that I wish to parallelize with mpi.
SUBROUTINE SIMPLE_ROUTINE(T,PARAM,R,Z,CMAX,EN,INC,NPROCS,U)
! Outputs scalar U = T*Z*CMAX*INC*SUM(PARAM)*SUM(R)*SUM(Y)
USE MPI
IMPLICIT NONE
REAL(KIND=8), INTENT(IN) :: T,PARAM(1:3),R(1:6),Z,CMAX
INTEGER, INTENT(IN) :: EN,INC
INTEGER, INTENT(IN) :: NPROCS
REAL(KIND=8), INTENT(OUT) :: U
REAL(KIND=8) :: H,LOCAL_SUM,SUM_OF_X
REAL(KIND=8), DIMENSION(:), ALLOCATABLE :: X
INTEGER :: PX,PX_MAX,NXL,REMX,IX_OFF,P_LEFT,P_RIGHT
INTEGER :: J
INTEGER :: IERR,MYID
! Broadcast nprocs handle to all processes in MPRI_COMM_WORLD
CALL MPI_BCAST(&NPROCS, NPROCS, MPI_INT, 0, MPI_COMM_WORLD,IERR)
! Create subcommunicator SUBCOMM (Do not know how to define WORLD_GROUP?)
CALL MPI_COMM_SPLIT(MPI_COMM_WORLD,WORLD_GROUP,SUBCOMM,IERR)
! Assign IDs to processes in SUBCOMM
CALL MPI_COMM_RANK(SUBCOMM,MYID,IERR)
! Give NPROCS - 1 to SUBCOMM
CALL MPI_COMM_SIZE(SUBCOMM,NPROCS-1,IERR)
H = 2.D0/(EN-1)
! LABEL THE PROCESSES FROM 1 TO PX_MAX.
PX = MYID + 1
PX_MAX = NPROCS
! SPLIT UP THE GRID IN THE X-DIRECTION.
NXL = EN/PX_MAX !nxl = 10/3 = 3
REMX = EN-NXL*PX_MAX !remx = 10-3*3 = 1
IF (PX .LE. REMX) THEN !for px = 1,nxl = 3
NXL = NXL+1 !nxl = 4
IX_OFF = (PX-1)*NXL !ix_off = 0
ELSE
IX_OFF = REMX*(NXL+1)+(PX-(REMX+1))*NXL !for px = 2 and px = 3, ix_off = 1*(3+1)+(2-(1+1))*3 = 4, ix_off = 1*(3+1)+(3-(1+1))*3 = 7
END IF
! ALLOCATE MEMORY FOR VARIOUS ARRAYS.
ALLOCATE(X(0:NXL+1))
X(:) = (/(-1.D0+DBLE(J-1+IX_OFF)*H, J=1,EN)/)
LOCAL_SUM = SUM(X(1:NXL))
CALL MPI_REDUCE(LOCAL_SUM,SUM_OF_X,1,&
MPI_DOUBLE_PRECISION,MPI_SUM,&
0,MPI_COMM_WORLD,IERR)
U = T*Z*CMAX*INC*SUM(PARAM)*SUM(R)*SUM_OF_X
DEALLOCATE(X)
CALL MPI_COMM_FREE(SUBCOMM,IERR)
CALL MPI_BARRIER(MPI_COMM_WORLD,IERR)
END SUBROUTINE SIMPLE_ROUTINE
Ultimately, I want to be able to change the number of processors used in the subroutine, where I want nprocs to be calculated from the value of EN.
A simple approach is to start the MPI app with the maximum number of processes.
Then my_subroutine will first MPI_Bcast(&nprocs, ...) and MPI_COMM_SPLIT(MPI_COMM_WORLD, ..., &subcomm) in order to create a sub communicator subcomm with nprocs
(you can use MPI_UNDEFINED so the "other" communicator will be MPI_COMM_NULL.
Then the MPI tasks that are part of subcomm will perform the computation.
Finally, MPI_Comm_free(&subcomm) and MPI_Barrier(MPI_COMM_WORLD)
From a performance point of view, note sub-communicator creation can be expensive, but hopefully not significant compared to the computation time.
If not, you'd rather revamp your algorithm so it can have nprocs tasks do the job, and the other ones waiting.
An other approach would be to start your app with one MPI task, MPI_Comm_spawn() nprocs-1 tasks, merge the inter-communicator, perform the computation, and terminates the spawned tasks.
The overhead of task creation is way more important, and this might not be fully supported by your resource manager, so I would not advise this option.

Type mismatch in argument: passed COMPLEX(8) to REAL(8)

I'm trying to compile and run a relatively old code from a PhD thesis, you can find whole code in Appendices C and D of this document.
Here is necessary parts from the code:
from wfMath.f90 :
subroutine wfmath_gaussian(widthz,pz)
use progvars
implicit none
real*8, intent(in) :: widthz ! the width of the wavepacket
real*8, intent(in) :: pz ! momentum
integer :: nR
real*8 :: rvalue
complex*16 :: cvalue
! complex*16 :: psi !!!ORIGINAL CODE LINE 23/02/2018. Saba
complex*16, dimension(1) :: psi !!! psi is originally defined as a scalar. But wfmath_normalize(wf) takes a rank-1 tensor as
!argument. So here I change the declaration of psi from a scalar to a rank-1 tensor contains only one element. 23/02/2018. Saba
real*8 :: z2
z2 = minz + deltaz
do nR=1, nz
rvalue = exp( -((z2-centerz)/widthz)**2 /2 )/ (2*pi*widthz) !!!ORIGINAL CODE LINE
!rvalue = exp( -((z2-centerz)/1.0d0)**2 /2 )/ (2*pi*1.0d0)
cvalue = cdexp( cmplx(0.0,1.0)*(pz*z2))
psi(nR) = rvalue * cvalue
z2 = z2 + deltaz ! next grid position in x-direction
enddo
call wfmath_normalize(psi)
end subroutine wfmath_gaussian
from tdse.f90 - main part :
subroutine init
use progvars
use strings;
use wfMath;
use wfPot;
!use tdseMethods;
implicit none
integer :: nloop
real*8 :: widthz,pz
select case (trim(molecule))
case("H2")
mass = 917.66d0; nz = 2048; deltaz = 0.05d0;
case("D2")
mass = 1835.241507d0; nz = 1024; deltaz = 0.05d0;
case("N2")
mass = 12846.69099d0; nz = 512; deltaz = 0.01d0;
case("O2")
mass = 14681.93206d0; nz =8192 ; deltaz = 0.005d0;
case("Ar2")
mass = 36447.94123d0; nz = 65536; deltaz = 0.002d0;
end select
maxt = 33072.80d0 !800fs ! maximum time
deltat = 1.0d0 ! delta time
widthz = 1.0d0 ! width of the gaussian
minz = 0.05d0 ! minimum z in a.u.
maxz = nz * deltaz ! maximum z in a.u.
centerz = 2.1d0 ! center of the gaussian
nt = NINT(maxt/deltat) ! time steps
pz = 0.d0 ! not used currently
!_____________________________FFT Section____________________________________________
deltafft = 20.d0* deltat !1.0d0*deltat ! time step for FFT
nfft = NINT(maxt/deltafft) ! no of steps for FFT
!_________________________absorber parameters_______________________________________
fadewidth = 10.d0 ! the width of the absorber in a.u.
fadestrength = 0.01d0 ! the maximum heigth of the negative imaginary potential
!_________________________E FIELD section_____________________________________________
Ewidth = 1446.2d0 !35fs ! width of the envelope
Eo = 0.053 !E14 ! field amplitude
Eomega = 0.057d0 !800nm ! laser frequency
! Eomega = 0.033d0 !1400nm ! laser frequency
Ephi = 0.d0 ! carrier envelope phase
Eto = 1000.d0 ! ecenter of the Gaussian envelope
EoPed = 0.0755 !2E14
EwidthPed = 826.638 !20fs
EomegaPed = Eomega
EphiPed = 0.d0
EtoPed = 1000.d0
EoPump = 0.053 !1E14 0.00285d0
EwidthPump = Ewidth
EomegaPump = 0.057d0
EphiPump = 0.0d0
EtoPump = 0.d0
includeAbsorber = .true. ! switch for absorber
includeField = .true. ! .false. ! switch for efield
includePedestal = .false. ! switch for pedestal
includeConstantPump = .true. ! .false. ! switch for efield
useADK = .false. ! ADK switch
calculatePowerSpectra = .true.
calculateKERPowerSpectra = .true. !.false.
!_____________________________Printing & Plotting Filters__________________________________
printFilter = nz
maxFrequencyFilter = 500
printInterval =100 !200
! print filter upper boundary check
if(printFilter > nz) then
printFilter = nz
end if
call allocateArrays();
do nloop = 1,nz
Z(nloop) = minz+ (nloop)* deltaz;
P(nloop) = 2*pi*(nloop-(nz/2)-1)/(maxz-minz);
E(nloop) = 27.2*(P(nloop)**2)/(4.d0*mass);
end do
! call wfmath_gaussian(psiground,widthz,pz) !!! ORIGINAL CODE LINE
call wfmath_gaussian(psiground,real(widthz),pz) ! Attempt to solve mismatch error. Does not work. Saba 24/02/2018
! call wfmath_gaussian(psiground,1.0d0,pz) ! Attempt to solve mismatch error. Does not work. Saba 23/02/2018
! call wfmath_gaussian(pz) ! Attempt to solve mismatch error. Does not work. Saba 23/02/2018
call setabsorber_right(fadewidth, fadestrength)
call printpsi(psiground,trim(concat(outputFolder,"psi_gausssian.dat")))
call potentials_init(nz) !initialize potential arrays
call read_potential();
end subroutine init
As far as I understand, there is no mismatch at all. widthz is declared as real*8 in main part of the code (subroutine init), and subroutine wfmath_gaussian(...) expects widthz to be a real*8. I can't see where this mismatch error occurs?
used compiler: GNU Fortran 6.3.0
used compile line: $gfortran tdse.f90
error message:
tdse.f90:159:118:
call wfmath_gaussian(psiground,real(widthz),pz) ! Attempt to solve mismatch error. Does not work. Saba 24/02/2018
(1)
Error: Type mismatch in argument 'widthz' at (1); passed COMPLEX(8) to REAL(8)
Thanks in advance...

Reading data from files using MPI in Fortran

I want to read data from some .dat files to a Fortran code for postprocessing. As a test case, I am just using one processor for MPI and trying to read single data file to my code. The content of the data file is as follows:
qout0050.dat : 1 1 1
However, the matrix (Vn in this case) which is supposed to store the content of this data file shows all 0 values. The relevant part of the code which reads from data file and store to the matrix is as follows:
subroutine postproc()
use precision_mod
use mpicomms_mod
implicit none
integer(kind=MPI_Offset_kind) :: i, j , igrid, k, l, disp, iproc, info, lwork
integer :: rst, numvar, ifile, number, num, step, ntot
integer :: Nx_max, Ny_max, Nz_max
integer :: Nxp, Nzp, Nyp, Ngrid
integer :: Ifirst, Ilast, Jfirst, Jlast, Kfirst, Klast
character*(64) :: fname, buffer, ffname
integer :: tmp, N1, N2, N3, tmpp
real(WP), allocatable :: qout(:,:,:,:), phi_xyz(:,:,:,:,:)
real(WP), allocatable :: Vn(:,:), Vntmp(:,:), TAU(:,:)
real(WP), allocatable :: Rprime(:,:), Rtmp(:,:), Rend(:,:), Q(:,:), tmpL(:), tmpG(:)
real(WP), allocatable :: s(:), vt(:,:), u(:,:), utmp(:,:)
real(WP), allocatable :: tmp1(:,:), tmp2(:,:), tmp3(:,:), phi(:,:)
integer, dimension(2) :: view
integer :: view1
integer, dimension(3) :: lsizes, gsizes, start
real(WP) :: tmpr
real(WP), allocatable :: work(:), mu(:), eigY(:,:), wr(:), wi(:), beta(:)
integer(kind=MPI_Offset_kind) :: SP_MOK, Nx_MOK, Ny_MOK, Nz_MOK, WP_MOK
open(unit=110,file='postparameters.dat',form="formatted")
read (110,*) Nx_max
read (110,*) Ny_max
read (110,*) Nz_max
read (110,*) numvar
read (110,*) step
close(110)
! Define the size of grid on each processor
if (mod(Nx_max,px).ne.0) then
write(*,*) 'Error in preproc: Nx_max is not devisable by px'
call MPI_ABORT(MPI_COMM_WORLD,0,ierr)
end if
Nxp = Nx_max/px
if (mod(Ny_max,py).ne.0) then
write(*,*) 'Error in preproc: Nx_max is not devisable by px'
call MPI_ABORT(MPI_COMM_WORLD,0,ierr)
end if
Nyp = Ny_max/py
if (mod(Nz_max,pz).ne.0) then
write(*,*) 'Error in preproc: Nx_max is not devisable by px'
call MPI_ABORT(MPI_COMM_WORLD,0,ierr)
end if
Nzp = Nz_max/pz
Ifirst = irank*Nxp + 1
Ilast = Ifirst + Nxp - 1
Jfirst = jrank*Nyp + 1
Jlast = Jfirst + Nyp - 1
Kfirst = krank*Nzp + 1
Klast = Kfirst + Nzp - 1
! Setting the view for phi
gsizes(1) = Nx_max
gsizes(2) = Ny_max
gsizes(3) = Nz_max
lsizes(1) = Nxp
lsizes(2) = Nyp
lsizes(3) = Nzp
start(1) = Ifirst - 1
start(2) = Jfirst - 1
start(3) = Kfirst - 1
call MPI_TYPE_CREATE_SUBARRAY(3,gsizes,lsizes,start,&
MPI_ORDER_FORTRAN,MPI_REAL_SP,view,ierr)
call MPI_TYPE_COMMIT(view,ierr)
call MPI_TYPE_CREATE_SUBARRAY(3,gsizes,lsizes,start,&
MPI_ORDER_FORTRAN,MPI_REAL_WP,view1,ierr)
call MPI_TYPE_COMMIT(view1,ierr)
WP_MOK = int(8, MPI_Offset_kind)
Nx_MOK = int(Nx_max, MPI_Offset_kind)
Ny_MOK = int(Ny_max, MPI_Offset_kind)
Nz_MOK = int(Nz_max, MPI_Offset_kind)
! Reading the qout file
ffname = 'qout'
allocate(qout(Nxp,Nyp,Nzp,numvar))
allocate(Vn(Nxp*Nyp*Nzp*numvar,step))
do rst = 1,step
if (myrank == 0) print*, 'Step = ', 50 + rst -1
write(buffer,"(i4.4)") 50 + rst -1
fname = trim(ffname)//trim(buffer)
fname = trim('ufs')//":"// trim(fname)
fname = trim(adjustl(fname))//'.dat'
call MPI_FILE_OPEN(MPI_COMM_WORLD,fname,MPI_MODE_RDONLY,MPI_INFO_NULL,ifile,ierr)
call MPI_FILE_READ(ifile,Ngrid,1,MPI_INTEGER,status,ierr)
if (1 /= Ngrid) then
if (myrank == 0 ) write(*,*) Ngrid
endif
call MPI_FILE_READ(ifile,tmp,1,MPI_INTEGER,status,ierr)
if (tmp /= Nx_max) write(*,*) tmp
call MPI_FILE_READ(ifile,tmp,1,MPI_INTEGER,status,ierr)
if (tmp /= Ny_max) write(*,*) tmp
call MPI_FILE_READ(ifile,tmp,1,MPI_INTEGER,status,ierr)
if (tmp /= Nz_max) write(*,*) tmp
call MPI_FILE_READ(ifile,tmpr,1,MPI_REAL_WP,status,ierr)
call MPI_FILE_READ(ifile,tmpr,1,MPI_REAL_WP,status,ierr)
call MPI_FILE_READ(ifile,tmpr,1,MPI_REAL_WP,status,ierr)
call MPI_FILE_READ(ifile,tmpr,1,MPI_REAL_WP,status,ierr)
do l=1,numvar
disp = 4*4 + 4*WP_MOK + Nx_MOK*Ny_MOK*Nz_MOK*WP_MOK*(l-1)
call MPI_FILE_SET_VIEW(ifile,disp,MPI_REAL_WP,view1,"native",MPI_INFO_NULL,ierr)
call MPI_FILE_READ_ALL(ifile,qout(1:Nxp,1:Nyp,1:Nzp,l),Nxp*Nzp*Nyp, MPI_REAL_WP,status,ierr)
end do
call MPI_FILE_CLOSE(ifile,ierr)
!-----------------------------------------------
! Bluiding the snapshot matrix Vn --------------
!-----------------------------------------------
do i=1,numvar
do k=1,Nzp
do j=1,Nyp
Vn((1 + Nxp*(j-1) + Nxp*Nyp*(k-1) + Nxp*Nyp*Nzp*(i-1)):(Nxp*j + Nxp*Nyp*(k-1) + Nxp*Nyp*Nzp*(i-1)),rst) = qout(1:Nxp,j,k,i)
end do
end do
end do
end do
call MPI_BARRIER(MPI_COMM_WORLD,ierr)
deallocate(qout)

BLAS Matrix multiplication NaN

I am performing an SVD of a matrix using the LAPACK library and then multiplying the matrices to double check that they are correct. See the code below
subroutine svd_and_dgemm() ! -- Matrix decomp: A = USV^t
implicit none
integer,parameter :: m = 2
integer,parameter :: n = 3
integer i,info,lda,ldu,ldv,lwork,l,lds,ldc,ldvt,ldd
real*8 :: a(m,n),a_copy(m,n),sdiag(min(m,n)),s(m,n),u(m,m),vt(n,n),alpha,beta,c(m,n),d(m,n)
character jobu, jobv, transu, transs
real*8, allocatable, dimension ( : ) :: work
lwork = max(1,3*min(m,n) + max(m,n), 5*min(m,n))
allocate (work(lwork))
a = reshape((/3,1,1,-1,3,1/),shape(a),order=(/2, 1/)) !column-wise
print*,'A'
print*, a(1,1), a(1,2), a(1,3)
print*, a(2,1), a(2,2), a(2,3)
jobu = 'A'
jobv = 'A'
lda = m
ldu = m
ldv = n
a_copy = a
call dgesvd (jobu, jobv, m, n, a_copy, lda, sdiag, u, ldu, vt, ldv, work, lwork, info)
if ( info /= 0 ) then
write ( *, '(a)' ) ' '
write ( *, '(a)' ) 'R8MAT_SVD_LAPACK - Failure!'
write ( *, '(a)' ) ' The SVD could not be calculated.'
write ( *, '(a)' ) ' LAPACK routine DGESVD returned a nonzero'
write ( *, '(a,i8)' ) ' value of the error flag, INFO = ', info
return
end if
!
! Make the MxN matrix S from the diagonal values in SDIAG.
s(1:m,1:n) = 0.0D+00
do i = 1, min ( m, n )
s(i,i) = sdiag(i)
end do
print*,'U'
print*, u(1,1), u(1,2)
print*, u(2,1), u(2,2)
print*,'S'
print*, s(1,1), s(1,2), s(1,3)
print*, s(2,1), s(2,2), s(2,3)
print*,'Vt'
print*, vt(1,1), vt(1,2), vt(1,3)
print*, vt(2,1), vt(2,2), vt(2,3)
print*, vt(3,1), vt(3,2), vt(3,3)
deallocate (work)
! -- Verify SVD: A = USV^t
! -- Compute C = US
transu = 'N'
transs = 'N'
ldu = m; lds = m; ldc = m
alpha = 1.; beta = 1.
call dgemm(transu,transs,m,n,m,alpha,u,ldu,s,lds,beta,c,ldc)
! -- Compute A = D = CV^t
l = m ! nrows C
ldvt = n; ldd = m
call dgemm(transu,transs,m,n,n,alpha,c,ldc,vt,ldvt,beta,d,ldd)
print*,'C'
print*, c(1,1), c(1,2), c(1,3)
print*, c(2,1), c(2,2), c(2,3)
print*,'D'
print*, d(1,1), d(1,2), d(1,3)
print*, d(2,1), d(2,2), d(2,3)
end subroutine svd_and_dgemm
The output I get is
A
3.0000000000000000 1.0000000000000000 1.0000000000000000
-1.0000000000000000 3.0000000000000000 1.0000000000000000
U
-0.70710678118654835 -0.70710678118654657
-0.70710678118654668 0.70710678118654846
S
3.4641016151377553 0.0000000000000000 0.0000000000000000
0.0000000000000000 3.1622776601683795 0.0000000000000000
Vt
-0.40824829046386402 -0.81649658092772526 -0.40824829046386291
-0.89442719099991508 0.44721359549995882 5.2735593669694936E-016
-0.18257418583505536 -0.36514837167011066 0.91287092917527679
C
-2.4494897427831814 -2.2360679774997867 0.0000000000000000
-2.4494897427831757 2.2360679774997929 0.0000000000000000
D
2.9999999999999991 1.0000000000000002 0.99999999999999989
NaN 2.9999999999999991 1.0000000000000000
So I am not sure where is this NaN coming from. The odd thing is that if before printing D in such way I print it as follows:
print*,'D'
print*, d
Then I don't get the NaN anymore, so the output for D is
D
2.9999999999999991 -0.99999999999999933 1.0000000000000002 2.9999999999999991 0.99999999999999989 1.0000000000000000
D
2.9999999999999991 1.0000000000000002 0.99999999999999989
-0.99999999999999933 2.9999999999999991 1.0000000000000000
Any idea why is this happening?
PS: Information for the dgesvd (LAPACK) and dgemm (BLAS) subroutines.
So from our comments dialogue it seems like you have a problem that stems from not initializing an array. It is always good practice to do this, and in situations where you do operations like var = var +1 it is required. If you are unlucky your program will work fine anyway. But then strange things happen once in a while when some garbage happened to reside in memory where the array gets allocated.
A double should be initialized like this
array = 0.0d0 ! for double precision
or
array = 0 ! ok for single,double and integer
Initialize single precision but not double precision like this:
array = 0.0 ! single (not double) precision.
or this
array = 0.0e0 ! single (not double) precision.
I recommend the page fortran90.org.