I kindly request your help on this code where I kept on getting
an error: Rank mismatch in array reference at (1) (2/1). My objective is to go through each point in a cube(p = i+(j-1)*N + (k-1)NN) and calculate the gradient of the potential along each axis (gradphi_x, gradphi_y, gradphi_z).
PROGRAM sub_rho_phi
integer, parameter:: N=3
real, dimension(N):: gradphi_x, gradphi_y, gradphi_z
call output(gradphi_x, gradphi_y, gradphi_z)
open(unit=1,file="grad_phi.dat")
l = 0
do
l=l+1
write(1,*) gradphi_x(l),gradphi_y(l),gradphi_z(l)
if ( l == N**3) then
exit
end if
end do
END
SUBROUTINE output( gradphi_x, gradphi_y, gradphi_z )
real, parameter:: h=0.7,G=6.67,M=1.98892*(10**3)!!in(10**15) kg
integer, parameter:: N=3
real, dimension(N):: r, gradphi_x,gradphi_y,gradphi_z
integer, dimension(N**3):: x,y,z
integer:: p
real:: a
a=500/h !in kpc
do i=0, N
do j=0, N
do k=0, N
p = i+(j-1)*N + (k-1)*N*N
x(p,1)=i
y(p,2)=j
z(p,3)=k
r(p)=sqrt(x(p,1)*x(p,1)+y(p,2)*y(p,2)+z(p,3)*z(p,3))
gradphi_x(p)=(G*M)*x(p,1)/r(p)*(r(p)+a)**2
gradphi_y(p)=(G*M)*y(p,2)/r(p)*(r(p)+a)**2
gradphi_z(p)=(G*M)*z(p,3)/r(p)*(r(p)+a)**2
enddo
enddo
enddo
return
END
You have declared x, y and z as one dimensional arrays, but are using two dimensional indexing all the way through output.
I am trying to find the first (smallest) k eigenvalues of a real symmetric tridiagonal matrix using the appropriate lapack routine.
I am new to both Fortran and lapack libraries, but (d)stemr seemd to me a good choice so I tried calling it but keep getting segmentation faults.
After some trials I noticed the problem was my input matrix, which has:
diagonal = 2 * (1 + order 1e-5 to 1e-3 small variable correction)
subdiagonal all equal -1 (if I use e.g. 0.95 instead everything works)
I reduced the code to a single M(not)WE program shown below.
So the question are:
why is stemr failing with such a matrix, while e.g. stev works?
why a segmentation fault?
program mwe
implicit none
integer, parameter :: n = 10
integer, parameter :: iu = 3
integer :: k
double precision :: d(n), e(n)
double precision :: vals(n), vecs(n,iu)
integer :: m, ldz, nzc, lwk, liwk, info
integer, allocatable :: isuppz(:), iwk(:)
double precision, allocatable :: wk(:)
do k = 1, n
d(k) = +2d0 + ((k-5.5d0)*1d-2)**2
e(k) = -1d0 ! e(n) not really needed
end do
ldz = n
nzc = iu
allocate(wk(1), iwk(1), isuppz(2*iu))
! ifort -mkl gives SIGSEGV at this call <----------------
call dstemr( &
'V', 'I', n, d, e, 0d0, 0d0, 1, iu, &
m, vals, vecs, ldz, -1, isuppz, .true., &
wk, -1, iwk, -1, info)
lwk = ceiling(wk(1)); deallocate(wk); allocate(wk(lwk))
liwk = iwk(1); deallocate(iwk); allocate(iwk(liwk))
print *, info, lwk, liwk ! ok with gfortran
! gfortran -llapack gives SIGSEGV at this call <---------
call dstemr( &
'V', 'I', n, d, e, 0d0, 0d0, 1, iu, &
m, vals, vecs, ldz, nzc, isuppz, .true., &
wk, lwk, iwk, liwk, info)
end program
Compilers are invoked via:
gfortran [(GCC) 9.2.0]: gfortran -llapack -o o.x mwe.f90
ifort [(IFORT) 19.0.5.281 20190815]: ifort -mkl -o o.x mwe.f90
According to the manual, one issue seems that the argument TRYRAC needs to be a variable (rather than a constant) because it can be overwritten by dstemr():
[in,out] TRYRAC : ... On exit, a .TRUE. TRYRAC will be set to .FALSE. if the matrix
does not define its eigenvalues to high relative accuracy.
So, for example, a modified code may look like:
logical :: tryrac
...
tryrac = .true.
call dstemr( &
'V', 'I', n, d, e, 0d0, 0d0, 1, iu, &
m, vals, vecs, ldz, -1, isuppz, tryrac, & !<--
wk, -1, iwk, -1, info)
...
tryrac = .true.
call dstemr( &
'V', 'I', n, d, e, 0d0, 0d0, 1, iu, &
m, vals, vecs, ldz, nzc, isuppz, tryrac, & !<--
wk, lwk, iwk, liwk, info)
I'll post the whole code segment here, but the only issue really is the nested loop at the end. All read-in matrices are of dimension 180x180 and the loop is unbearably slow. I don't see an easy way to simplify the calculation, since the index-wise multiplications to get the matrix "AnaInt" are no simple matrix products due to the threefold occurance of the indices. Any thoughts? Thanks!
program AC
implicit none
integer, parameter :: dp = selected_real_kind(15, 307)
integer :: n, ndim, k, j, i, o, l, m, steps
real(dp) :: emax, omega, pi, EFermi, auev
complex(dp) :: Grs,Gas, ACCond, tinyc, cunit, czero, cone
complex(dp), allocatable :: GammaL(:,:)
complex(dp), allocatable :: GammaL_EB(:,:)
complex(dp), allocatable :: GammaR(:,:)
complex(dp), allocatable :: R(:,:)
complex(dp), allocatable :: Yc(:,:)
complex(dp), allocatable :: Yd(:,:)
complex(dp), allocatable :: AnaInt(:,:)
complex(dp), allocatable :: H(:,:)
complex(dp), allocatable :: HamEff(:,:)
complex(dp), allocatable :: EigVec(:,:)
complex(dp), allocatable :: InvEigVec(:,:)
complex(dp), allocatable :: EigVal(:)
complex(dp), allocatable :: ctemp(:,:)
complex(dp), allocatable :: ctemp2(:,:)
complex(dp), allocatable :: S(:,:)
complex(dp), allocatable :: SelfL(:,:)
complex(dp), allocatable :: SelfR(:,:)
complex(dp), allocatable :: SHalf(:,:)
complex(dp), allocatable :: InvSHalf(:,:)
complex(dp), allocatable :: HEB(:,:)
complex(dp), allocatable :: Integrand(:,:)
!Lapack arrays and variables
integer :: info, lwork
complex(dp), allocatable :: work(:)
real(dp), allocatable :: rwork(:)
integer,allocatable :: ipiv(:)
!########################################################################
!Constants
auev = 27.211385
pi = 3.14159265359
cunit = (0,1)
czero = (0,0)
cone = (1,0)
tinyc = (0.0, 0.000000000001)
!System and calculation parameters
open(unit=123, file="ForAC.dat", action='read', form='formatted')
read(123,*) ndim, EFermi
lwork = ndim*ndim
emax = 5.0/auev
steps = 1000
allocate(HEB(ndim,ndim))
allocate(H(ndim,ndim))
allocate(Yc(ndim,ndim))
allocate(Yd(ndim,ndim))
allocate(S(ndim,ndim))
allocate(SelfL(ndim,ndim))
allocate(SelfR(ndim,ndim))
allocate(HamEff(ndim,ndim))
allocate(GammaR(ndim,ndim))
allocate(GammaL(ndim,ndim))
allocate(AnaInt(ndim,ndim))
allocate(EigVec(ndim,ndim))
allocate(EigVal(ndim))
allocate(InvEigVec(ndim,ndim))
allocate(R(ndim,ndim))
allocate(GammaL_EB(ndim,ndim))
allocate(Integrand(ndim,ndim))
!################################################
read(123,*) H, S, SelfL, SelfR
close(unit=123)
HamEff(:,:)=(H(:,:) + SelfL(:,:) + SelfR(:,:))
allocate(SHalf(ndim, ndim))
allocate(InvSHalf(ndim,ndim))
SHalf(:,:) = (cmplx(real(S(:,:),dp),0.0_dp,dp))
call zpotrf('l', ndim, SHalf, ndim, info)
InvSHalf(:,:) = SHalf(:,:)
call ztrtri('l', 'n', ndim, InvSHalf, ndim, info)
call ztrmm('l', 'l', 'n', 'n', ndim, ndim, cone, InvSHalf, ndim, HamEff, ndim)
call ztrmm('r', 'l', 't', 'n', ndim, ndim, cone, InvSHalf, ndim, HamEff, ndim)
call ztrmm('l', 'l', 'n', 'n', ndim, ndim, cone, InvSHalf, ndim, GammaL, ndim)
call ztrmm('r', 'l', 't', 'n', ndim, ndim, cone, InvSHalf, ndim, GammaL, ndim)
call ztrmm('l', 'l', 'n', 'n', ndim, ndim, cone, InvSHalf, ndim, GammaR, ndim)
call ztrmm('r', 'l', 't', 'n', ndim, ndim, cone, InvSHalf, ndim, GammaR, ndim)
deallocate(SHalf)
deallocate(InvSHalf)
!In the PDF: B = EigVec, B^(-1) = InvEigVec, Hk = EigVal
allocate(ctemp(ndim,ndim))
ctemp(:,:) = HamEff(:,:)
allocate(work(lwork),rwork(2*ndim))
call zgeev('N', 'V', ndim, ctemp, ndim, EigVal, InvEigVec, ndim, EigVec, ndim, work, lwork, rwork, info)
if(info/=0)write(*,*) "Warning: zgeev info=", info
deallocate(work,rwork)
deallocate(ctemp)
InvEigVec(:,:)=EigVec(:,:)
lwork = 3*ndim
allocate(ipiv(ndim))
allocate(work(lwork))
call zgetrf(ndim,ndim,InvEigVec,ndim,ipiv,info)
if(info/=0)write(*,*) "Warning: zgetrf info=", info ! LU decomposition
call zgetri(ndim,InvEigVec,ndim,ipiv,work,lwork,info)
if(info/=0)write(*,*) "Warning: zgetri info=", info ! Inversion by LU decomposition (Building of InvEigVec)
deallocate(work)
deallocate(ipiv)
R(:,:) = 0.0_dp
do j=1,ndim
do m=1,ndim
do k=1,ndim
do l=1,ndim
R(j,m) = R(j,m) + InvEigVec(j,k) * GammaR(k,l) * conjg(InvEigVec(m,l))
end do
end do
end do
end do
!!!THIS IS THE LOOP IN QUESTION. MATRIX DIMENSION 180x180, STEPS=1000
open(unit=125,file="ACCond.dat")
!Looping over omega
do o=1,steps
omega=real(o,dp)*emax/real(steps,dp)
AnaInt(:,:) = 0.0_dp
do i=1,ndim
do n=1,ndim
do j=1,ndim
do m=1,ndim
Grs = log((EFermi-(EigVal(j)+tinyc)+omega)/(EFermi-(EigVal(j)+tinyc)))
Gas = log((EFermi-conjg(EigVal(m)+tinyc))/(EFermi-omega-conjg(EigVal(m)+tinyc)))
Integrand = (Grs-Gas)/(EigVal(j)-tinyc-omega-conjg(EigVal(m)-tinyc))
AnaInt(i,n)= AnaInt(i,n) + EigVec(i,j) * R(j,m) * Integrand(j,m) * conjg(EigVec(n,m))
end do
end do
end do
end do
Yc = 1/(2.0*pi*omega) * matmul(AnaInt,GammaL)
Yd(:,:) = - 1/(2.0*pi) * cunit * AnaInt(:,:)
ACCond = czero
do k=1,ndim
ACCond=ACCond+Yc(k,k) + 1/(2.0) * Yd(k,k)
end do
write(125,*) omega, real(ACCond,dp), aimag(ACCond)
end do
!#############################################
deallocate(Integrand)
deallocate(HEB)
deallocate(Yc)
deallocate(Yd)
deallocate(HamEff)
deallocate(GammaR)
deallocate(GammaL)
deallocate(AnaInt)
deallocate(EigVec)
deallocate(EigVal)
deallocate(InvEigVec)
deallocate(H)
deallocate(S)
deallocate(SelfL)
deallocate(SelfR)
deallocate(R)
deallocate(GammaL_EB)
end program AC
So, here's the first adaption according to the suggestions:
HermEigVec(:,:) = 0.0_dp
do i=1, ndim
do j=1, ndim
HermEigVec(i,j) = conjg(EigVec(j,i))
end do
end do
HermInvEigVec(:,:) = 0.0_dp
do i=1, ndim
do j=1, ndim
HermInvEigVec(i,j) = conjg(InvEigVec(j,i))
end do
end do
R(:,:) = 0.0_dp
R = matmul(InvEigVec,matmul(GammaR,HermInvEigVec))
open(unit=125,file="ACCond.dat")
!Looping over omega
do o=1,steps
omega=real(o,dp)*emax/real(steps,dp)
AnaInt(:,:) = 0.0_dp
do j=1,ndim
do m=1,ndim
Grs = log((EFermi-(EigVal(j)+tinyc)+omega)/(EFermi-(EigVal(j)+tinyc)))
Gas = log((EFermi-conjg(EigVal(m)+tinyc))/(EFermi-omega-conjg(EigVal(m)+tinyc)))
Integrand(j,m) = (Grs-Gas)/(EigVal(j)-tinyc-omega-conjg(EigVal(m)-tinyc))
T(j,m) = R(j,m) * Integrand(j,m)
end do
end do
AnaInt = matmul(EigVec,matmul(T,HermEigVec))
Yc = 1/(2.0*pi*omega) * matmul(AnaInt,GammaL)
Yd(:,:) = - 1/(2.0*pi) * cunit * AnaInt(:,:)
ACCond = czero
do k=1,ndim
ACCond=ACCond+Yc(k,k) + 1/(2.0) * Yd(k,k)
end do
write(125,*) omega, real(ACCond,dp), aimag(ACCond)
end do
There are several problems in your code.
Let's start with the loop before the one that you emphasize (it's easier to understand, but the following big loop has more or less the same problem).
So we have a loop on i, j, k, l.
You could consider reordering your loops, for better cache access. Your most inner loop is on l, which only appears as column index. With column-major arrays in Fortran, you can expect bad performance from that. An inner loop on j would probably be better.
Much worse, your whole loop is a matrix update by a product of three matrices (InvEigVec * GammaR * InvEigVec^H), but you do it in O(ndim^4). Each matrix product is O(n^3) (or maybe less if you call optimized ZGEMM, using Strassen algorithm). Hence, two products should be O(n^3), not O(n^4), by storing matrix products.
That is, you can do a matrix product, then a matrix product-update.
Now, your big loop: steps times over i, n, j, m.
If I read well, you write
Integrand = (Grs-Gas)/(EigVal(j)-tinyc-omega-conjg(EigVal(m)-tinyc))
Where all variables on right-hand side are scalars, but Integrand is a ndim*ndim matrix. Much work to copy one value in multiple places.
But then you loop on the Integrand, where you could use simply a scalar. Or maybe it's a bug and you should have Integrand(j, m) or alike in left-hand side?
Then, your four inner loops are like in the preceding comments, an update of
AnaInt with array product EigVec * (R .* Integrand) * EigVec^H, with .* the (term by term) scalar product of arrays (or just EigVec * R * EigVec^H if Integrand is just a scalar).
Again, it would probably be good to try to write this with ZGEMMs, thus lowering the complexity a lot.
Have you considered a parallelization over the loops using OPENMP ? It's pretty easy to implement. If interested I could give you some hints maybe.
Try to take a look here: openMP DO tutorial
I am trying to get the ZGEEV routine in Lapack to work for a test problem and having some difficulties. I just started coding in FORTRAN a week ago, so I think it is probably something very trivial that I am missing.
I have to diagonalize rather large complex symmetric matrices. To get started, using Matlab I created a 200 by 200 matrix, which I have verified is diagonalizable. When I run the code, it brings up no errors and the INFO = 0, suggesting a success. However, all the eigenvalues are (0,0) which I know is wrong.
Attached is my code.
PROGRAM speed_zgeev
IMPLICIT NONE
INTEGER(8) :: N
COMPLEX*16, DIMENSION(:,:), ALLOCATABLE :: MAT
INTEGER(8) :: INFO, I, J
COMPLEX*16, DIMENSION(:), ALLOCATABLE :: RWORK
COMPLEX*16, DIMENSION(:), ALLOCATABLE :: D
COMPLEX*16, DIMENSION(1,1) :: VR, VL
INTEGER(8) :: LWORK = -1
COMPLEX*16, DIMENSION(:), ALLOCATABLE :: WORK
DOUBLE PRECISION :: RPART, IPART
EXTERNAL ZGEEV
N = 200
ALLOCATE(D(N))
ALLOCATE(RWORK(2*N))
ALLOCATE(WORK(N))
ALLOCATE(MAT(N,N))
OPEN(UNIT = 31, FILE = "newmat.txt")
OPEN(UNIT = 32, FILE = "newmati.txt")
DO J = 1,N
DO I = 1,N
READ(31,*) RPART
READ(32,*) IPART
MAT(I,J) = CMPLX(RPART, IPART)
END DO
END DO
CLOSE(31)
CLOSE(32)
CALL ZGEEV('N','N', N, MAT, N, D, VL, 1, VR, 1, WORK, LWORK, RWORK, INFO)
INFO = WORK(1)
DEALLOCATE(WORK)
ALLOCATE(WORK(INFO))
CALL ZGEEV('N','N', N, MAT, N, D, VL, 1, VR, 1, WORK, LWORK, RWORK, INFO)
IF (INFO .EQ. 0) THEN
PRINT*, D(1:10)
ELSE
PRINT*, INFO
END IF
DEALLOCATE(MAT)
DEALLOCATE(D)
DEALLOCATE(RWORK)
DEALLOCATE(WORK)
END PROGRAM speed_zgeev
I have tried the same code on smaller matrices, of size 30 by 30 and they work fine. Any help would be appreciated! Thanks.
I forgot to mention that I am loading the matrices from a test file which I have verified to be working right.
Maybe LWORK = WORK (1) instead of INFO = WORK(1)? Also change ALLOCATE(WORK(INFO)).
I kindly request your help on this code where I kept on getting
an error: Rank mismatch in array reference at (1) (2/1). My objective is to go through each point in a cube(p = i+(j-1)*N + (k-1)NN) and calculate the gradient of the potential along each axis (gradphi_x, gradphi_y, gradphi_z).
PROGRAM sub_rho_phi
integer, parameter:: N=3
real, dimension(N):: gradphi_x, gradphi_y, gradphi_z
call output(gradphi_x, gradphi_y, gradphi_z)
open(unit=1,file="grad_phi.dat")
l = 0
do
l=l+1
write(1,*) gradphi_x(l),gradphi_y(l),gradphi_z(l)
if ( l == N**3) then
exit
end if
end do
END
SUBROUTINE output( gradphi_x, gradphi_y, gradphi_z )
real, parameter:: h=0.7,G=6.67,M=1.98892*(10**3)!!in(10**15) kg
integer, parameter:: N=3
real, dimension(N):: r, gradphi_x,gradphi_y,gradphi_z
integer, dimension(N**3):: x,y,z
integer:: p
real:: a
a=500/h !in kpc
do i=0, N
do j=0, N
do k=0, N
p = i+(j-1)*N + (k-1)*N*N
x(p,1)=i
y(p,2)=j
z(p,3)=k
r(p)=sqrt(x(p,1)*x(p,1)+y(p,2)*y(p,2)+z(p,3)*z(p,3))
gradphi_x(p)=(G*M)*x(p,1)/r(p)*(r(p)+a)**2
gradphi_y(p)=(G*M)*y(p,2)/r(p)*(r(p)+a)**2
gradphi_z(p)=(G*M)*z(p,3)/r(p)*(r(p)+a)**2
enddo
enddo
enddo
return
END
You have declared x, y and z as one dimensional arrays, but are using two dimensional indexing all the way through output.