I'd like to use FFTW to compute the Fourier Harmonics for a 2D dataset. And I think I'm getting the hang of it but I need to 'decipher' the complex output in terms of harmonics. First here's my toy code:
PROGRAM FFTW_TEST
USE, intrinsic :: iso_c_binding
IMPLICIT NONE
INTEGER :: i, j, n, m, l, k
REAL :: pi2, norm, norm2
REAL(C_DOUBLE), POINTER, DIMENSION(:) :: theta, func
COMPLEX(C_DOUBLE_COMPLEX), POINTER, DIMENSION(:) :: fmn_1d
REAL(C_DOUBLE), POINTER, DIMENSION(:,:) :: func_2d, r_2d, i_2d
COMPLEX(C_DOUBLE_COMPLEX), POINTER, DIMENSION(:,:) :: fmn_2d
INCLUDE 'fftw3.f03'
TYPE(C_PTR) :: plan
TYPE(C_PTR) :: real_1d, complex_1d
TYPE(C_PTR) :: real_2d, complex_2d
pi2 = 8.0 * ATAN(1.0)
!------ 1D Example
n = 64
real_1d = FFTW_ALLOC_REAL(INT(n,C_SIZE_T))
complex_1d = FFTW_ALLOC_COMPLEX(INT(n,C_SIZE_T))
CALL C_F_POINTER(real_1d, func, [n])
CALL C_F_POINTER(complex_1d, fmn_1d, [n])
ALLOCATE(theta(n))
FORALL(i=1:n) theta(i) = DBLE(i-1)/DBLE(n)
FORALL(i=1:n) func(i) = 1.0+2.0*cos(2*pi2*theta(i))-3.0*cos(3*pi2*theta(i)) ! Even Values
! FORALL(i=1:n) func(i) = 0.0+2.0*sin(4*pi2*theta(i))-3.0*sin(5*pi2*theta(i)) ! Odd Values
WRITE(401,*) func(1:n)
CALL FLUSH(401)
norm = n/2
norm2 = n
plan = FFTW_PLAN_DFT_R2C_1D(n, func, fmn_1d, FFTW_ESTIMATE)
CALL FFTW_EXECUTE_DFT_R2C(plan, func, fmn_1d)
CALL FFTW_DESTROY_PLAN(plan)
func = REAL(fmn_1d)/norm
func(1) = 2*func(1)
WRITE(402,*) func; CALL FLUSH(402)
func = -AIMAG(fmn_1d)/norm
func(1) = 2*func(1) ! note should be zero
WRITE(403,*) func; CALL FLUSH(403)
plan = FFTW_PLAN_DFT_C2R_1D(n, fmn_1d, func, FFTW_ESTIMATE)
CALL FFTW_EXECUTE_DFT_C2R(plan, fmn_1d, func)
CALL FFTW_DESTROY_PLAN(plan)
WRITE(404,*) func/norm2; CALL FLUSH(404)
CALL FFTW_FREE(real_1d)
CALL FFTW_FREE(complex_1d)
IF (ASSOCIATED(theta)) DEALLOCATE(theta)
!------- 2D Example
m=16
n=8
real_2d = FFTW_ALLOC_REAL(INT(m*n,C_SIZE_T))
complex_2d = FFTW_ALLOC_COMPLEX(INT(m*n,C_SIZE_T))
CALL C_F_POINTER(real_2d, func_2d, [m,n])
CALL C_F_POINTER(complex_2d, fmn_2d, [m,n])
PRINT *,LBOUND(func_2d,DIM=1)
ALLOCATE(fmn_1d(m),r_2d(m,n),i_2d(m,n))
func_2d = 0.5
DO i = 1, m
DO j = 1, n
DO l = 0, 8
DO k = -4,4
func_2d(i,j) = func_2d(i,j) + (l*100+k) * cos(l*pi2*(i-1)/m + k*pi2*(j-1)/n)
END DO
END DO
END DO
WRITE(501,*) func_2d(i,1:n); CALL FLUSH(501)
END DO
! Note in the m direction we read m=0,1,2,3,4...
! in the n direction we read n=0..nx/2,-nx/2+1...-1
norm = m*n/2
norm2 = norm*2
plan = FFTW_PLAN_DFT_R2C_2D(n, m, func_2d, fmn_2d, FFTW_ESTIMATE)
CALL FFTW_EXECUTE_DFT_R2C(plan, func_2d, fmn_2d)
CALL FFTW_DESTROY_PLAN(plan)
r_2d = REAL(fmn_2d)/norm
r_2d(1,1) = r_2d(1,1)/2
DO i = 1, m
WRITE(502,*) r_2d(i,1:n); CALL FLUSH(502)
END DO
i_2d = -AIMAG(fmn_2d)/norm
i_2d(1,1) = i_2d(1,1)/2
DO i = 1, m
WRITE(503,*) i_2d(i,1:n); CALL FLUSH(503)
END DO
DO i = 1, m
DO j = 1, n
IF (abs(r_2d(i,j))>1.0E-5 .or. ABS(i_2d(i,j))>1.0E-5) &
WRITE(6,*) i,j,r_2d(i,j),i_2d(i,j)
END DO
END DO
plan = FFTW_PLAN_DFT_C2R_2D(n, m, fmn_2d, func_2d, FFTW_ESTIMATE)
CALL FFTW_EXECUTE_DFT_C2R(plan, fmn_2d, func_2d)
CALL FFTW_DESTROY_PLAN(plan)
DO i = 1, m
WRITE(504,*) func_2d(i,1:n)/norm2; CALL FLUSH(504)
END DO
CALL FFTW_FREE(real_2d)
CALL FFTW_FREE(complex_2d)
IF (ASSOCIATED(fmn_1d)) DEALLOCATE(fmn_1d)
END PROGRAM FFTW_TEST
Now I'd like to extract the harmonics of a function which looks like:
$A_{mn} \cos(m\theta+n\phi)$, where $\theta$ and $\phi$ are the real-space grid. First, taking the real part of the complex array appears to be correct. The harmonics seem correct for simple functions. However, in the final test case it seems that the array has an odd ordering. Here's the ordering
m/n
0/0 1/0 2/0 3/0 4/0 5/0 6/0 7/0 8/0 0/1 1/1 2/1 3/1 4/1 5/1 6/1
7/1 8/1 0/2 1/2 2/2 3/2 4/2 5/2 6/2 7/2 8/2 0/3 1/3 2/3 3/3 4/3
5/3 6/3 7/3 8/3 0/4 1/4 2/4 3/4 5/4 6/4 7/4 8/4 ?/? ?/? ?/? ?/?
For reference here's what I'd expect the ordering to look like:
m/n
0/0 1/0 2/0 3/0 4/0 5/0 6/0 7/0 8/0 -7/0 -6/0 -5/0 -4/0 -3/0 -2/0 -1/0
0/1 1/1 2/1 3/1 4/1 5/1 6/1 7/1 8/1 -7/1 -6/1 -5/1 -4/1 -3/1 -2/1 -1/1
0/2 1/2 2/2 3/2 4/2 5/2 6/2 7/2 8/2 -7/2 -6/2 -5/2 -4/2 -3/2 -2/2 -1/2
It seems like the code is just treating the array like a linear array. But what what I can read in the FFTW documentation this should not be the case. One dimension of the array should correspond to the transforms along one dimension and the other the other. Thus in some sense we should be able to read off m and n harmonics.
Can someone explain how the complex array should be ordered in terms of harmonics and the proper normalizations?
Related
I am currently implementing integrals in Fortran as subroutines. The subroutines on their own return the correct values. If i now call the e.g. same subroutine twice after each other, with the same input values, their returned value differs significantly?
The main program only calls the function like this:
program main
use types
use constants
use integrals
use basis
real(dp), dimension(2,3) :: molecule_coords
real(dp), dimension(2) :: z
type(primitive_gaussian), allocatable :: molecule(:,:)
molecule_coords(1,:) = (/0.,0.,0./)
molecule_coords(2,:) = (/0.,0.,1.6/)
molecule = def_molecule(molecule_coords)
z = (/1.0, 1.0/)
call overlap(molecule) ! Correct Value returned
call overlap(molecule) ! Wrong Value returned
end program main
My function for the overlap looks like this:
module integrals
use types
use constants
use basis
use stdlib_specialfunctions_gamma!, only: lig => lower_incomplete_gamma
contains
subroutine overlap(molecule)
implicit none
type(primitive_gaussian), intent(in) :: molecule(:,:)
integer :: nbasis, i, j, k, l
real(dp) :: norm, p, q, coeff, Kab
real(dp), dimension(3) :: Q_xyz
real(dp), dimension(INT(size(molecule,1)),INT(size(molecule,1))) :: S
nbasis = size(molecule,1)
do i = 1, nbasis
do j = 1, nbasis
! Iterate over l and m primitives in basis
do k = 1, size(molecule(i,:))
do l = 1, size(molecule(j,:))
norm = molecule(i, k)%norm() * molecule(j, l)%norm()
! Eq. 63
Q_xyz = (molecule(i, k)%coords - molecule(j, l)%coords)
! Eq. 64, 65
p = (molecule(i, k)%alpha + molecule(j, l)%alpha)
q = (molecule(i, k)%alpha * molecule(j, l)%alpha) / p
! Eq. 66
Kab = exp(-q * dot_product(Q_xyz,Q_xyz))
coeff = molecule(i, k)%coeff * molecule(j, l)%coeff
S(i,j) = S(i,j) + norm * coeff * Kab * (pi / p) ** (1.5)
end do
end do
end do
end do
print *, S
end subroutine overlap
end module integrals
I am a bit lost, why this would be the case, but I am also rather new to Fortran.
Any help is appreciated! Thanks!
I want to calculate z value as the coordinate in range of x:-50~50 and y:-50~50 like below code.
program test
implicit none
! --- [local entities]
real*8 :: rrr,th,U0,amp,alp,Ndiv
real*8 :: pi,alpR,NR,Rmin,Rmax,z
integer :: ir, i, j
do i=0, 50
do j=0, 50
th=datan2(i,j)
pi=datan(1.d0)*4.d0
!
Ndiv= 24.d0 !! Number of circumferential division
alp = 90.d0/180.d0*pi !! phase [rad]
U0 = 11.4d0 !! average velocity
amp = 0.5d0 !! amplitude of velocity
Rmin = 10 !! [m]
Rmax = 50 !! [m]
NR = 6.d0 !! Number of radial division
!
rrr=dsqrt(i**2+j**2)
ir=int((rrr-Rmin)/(Rmax-Rmin)*NR)
alpR=2.d0*pi/dble(Ndiv)*dble(mod(ir,2))
z=U0*(1.d0+amp*dsin(0.5d0*Ndiv*th+alp+alpR))
write(*,*) 'i, j, z'
write(*,*) i, j, z
end do
end do
stop
end program test
But I couldn't make it work like below error. I think because i, j are in datan(i,j). How should I change these code?
test.f90:10.16:
th=datan2(i,j)
1
Error: 'y' argument of 'datan2' intrinsic at (1) must be REAL
test.f90:21.16:
rrr=dsqrt(i**2+j**2)
1
Error: 'x' argument of 'dsqrt' intrinsic at (1) must be REAL
Inspired by the comments of #Rodrigo Rodrigues, #Ian Bush, and #Richard, here is a suggested rewrite of the code segment from #SW. Kim
program test
use, intrinsic :: iso_fortran_env, only : real64
implicit none
! --- [local entities]
! Determine the kind of your real variables (select one):
! for specifying a given numerical precision
integer, parameter :: wp = selected_real_kind(15, 307) !15 digits, 10**307 range
! for specifying a given number of bits
! integer, parameter :: wp = real64
real(kind=wp), parameter :: pi = atan(1._wp)*4._wp
real(kind=wp) :: rrr, th, U0, amp, alp, Ndiv
real(kind=wp) :: alpR, NR, Rmin, Rmax, z
integer :: ir, i, j
do i = 0, 50
do j = 0, 50
th = atan2(real(i, kind=wp), real(j, kind=wp))
!
Ndiv= 24._wp !! Number of circumferential division
alp = 90._wp/180._wp*pi !! phase [rad]
U0 = 11.4_wp !! average velocity
amp = 0.5_wp !! amplitude of velocity
Rmin = 10 !! [m]
Rmax = 50 !! [m]
NR = 6._wp !! Number of radial division
!
rrr = sqrt(real(i, kind=wp)**2 + real(j, kind=wp)**2)
ir = int((rrr - Rmin) / (Rmax - Rmin) * NR)
alpR = 2._wp * pi / Ndiv * mod(ir, 2)
z = U0 * (1._wp + amp * sin(0.5_wp * Ndiv * th + alp + alpR))
!
write(*,*) 'i, j, z'
write(*,*) i, j, z
end do
end do
stop
end program test
Specifically, the following changes were made with respect to the original code posted:
Minimum change to answer the question: casting integer variables i and j to real values for using them in the real valued functions datan and dsqrt.
Using generic names for intrinsic procedures, i.e sqrt instead of dsqrt, atan instead of datan, and sin instead of dsin. One benefit of this approach, is that the kind of working precision wp can be changed in one place, without requiring explicit changes elsewhere in the code.
Defining the kind of real variables and calling it wp. Extended discussion of this topic, its implications and consequences can be found on this site, for example here and here. Also #Steve Lionel has an in depth post on his blog, where his general advice is to use selected_real_kind.
Defining pi as a parameter calculating its value once, instead of calculating the same value repeatedly within the nested for loops.
I want to calculate z value as the coordinate in range of x:-50~50 and y:-50~50 like below code.
program test
implicit none
! --- [local entities]
real*8 :: rrr,th,U0,amp,alp,Ndiv
real*8 :: pi,alpR,NR,Rmin,Rmax,z
integer :: ir, i, j
do i=0, 50
do j=0, 50
th=datan2(i,j)
pi=datan(1.d0)*4.d0
!
Ndiv= 24.d0 !! Number of circumferential division
alp = 90.d0/180.d0*pi !! phase [rad]
U0 = 11.4d0 !! average velocity
amp = 0.5d0 !! amplitude of velocity
Rmin = 10 !! [m]
Rmax = 50 !! [m]
NR = 6.d0 !! Number of radial division
!
rrr=dsqrt(i**2+j**2)
ir=int((rrr-Rmin)/(Rmax-Rmin)*NR)
alpR=2.d0*pi/dble(Ndiv)*dble(mod(ir,2))
z=U0*(1.d0+amp*dsin(0.5d0*Ndiv*th+alp+alpR))
write(*,*) 'i, j, z'
write(*,*) i, j, z
end do
end do
stop
end program test
But I couldn't make it work like below error. I think because i, j are in datan(i,j). How should I change these code?
test.f90:10.16:
th=datan2(i,j)
1
Error: 'y' argument of 'datan2' intrinsic at (1) must be REAL
test.f90:21.16:
rrr=dsqrt(i**2+j**2)
1
Error: 'x' argument of 'dsqrt' intrinsic at (1) must be REAL
Inspired by the comments of #Rodrigo Rodrigues, #Ian Bush, and #Richard, here is a suggested rewrite of the code segment from #SW. Kim
program test
use, intrinsic :: iso_fortran_env, only : real64
implicit none
! --- [local entities]
! Determine the kind of your real variables (select one):
! for specifying a given numerical precision
integer, parameter :: wp = selected_real_kind(15, 307) !15 digits, 10**307 range
! for specifying a given number of bits
! integer, parameter :: wp = real64
real(kind=wp), parameter :: pi = atan(1._wp)*4._wp
real(kind=wp) :: rrr, th, U0, amp, alp, Ndiv
real(kind=wp) :: alpR, NR, Rmin, Rmax, z
integer :: ir, i, j
do i = 0, 50
do j = 0, 50
th = atan2(real(i, kind=wp), real(j, kind=wp))
!
Ndiv= 24._wp !! Number of circumferential division
alp = 90._wp/180._wp*pi !! phase [rad]
U0 = 11.4_wp !! average velocity
amp = 0.5_wp !! amplitude of velocity
Rmin = 10 !! [m]
Rmax = 50 !! [m]
NR = 6._wp !! Number of radial division
!
rrr = sqrt(real(i, kind=wp)**2 + real(j, kind=wp)**2)
ir = int((rrr - Rmin) / (Rmax - Rmin) * NR)
alpR = 2._wp * pi / Ndiv * mod(ir, 2)
z = U0 * (1._wp + amp * sin(0.5_wp * Ndiv * th + alp + alpR))
!
write(*,*) 'i, j, z'
write(*,*) i, j, z
end do
end do
stop
end program test
Specifically, the following changes were made with respect to the original code posted:
Minimum change to answer the question: casting integer variables i and j to real values for using them in the real valued functions datan and dsqrt.
Using generic names for intrinsic procedures, i.e sqrt instead of dsqrt, atan instead of datan, and sin instead of dsin. One benefit of this approach, is that the kind of working precision wp can be changed in one place, without requiring explicit changes elsewhere in the code.
Defining the kind of real variables and calling it wp. Extended discussion of this topic, its implications and consequences can be found on this site, for example here and here. Also #Steve Lionel has an in depth post on his blog, where his general advice is to use selected_real_kind.
Defining pi as a parameter calculating its value once, instead of calculating the same value repeatedly within the nested for loops.
Question
Consider the following code:
program example
implicit none
integer, parameter :: n_coeffs = 1000
integer, parameter :: n_indices = 5
integer :: i
real(8), dimension(n_coeffs) :: coeff
integer, dimension(n_coeffs,n_indices) :: index
do i = 1, n_coeffs
coeff(i) = real(i*3,8)
index(i,:) = [2,4,8,16,32]*i
end do
end
For any 5 dimensional index I need to obtain the associated coefficient, without knowing or calculating i. For instance, given [2,4,8,16,32] I need to obtain 3.0 without computing i.
Is there a reasonable solution, perhaps using sparse matrices, that would work for n_indices in the order of 100 (though n_coeffs still in the order of 1000)?
A Bad Solution
One solution would be to define a 5 dimensional array as in
real(8), dimension(2000,4000,8000,16000,32000) :: coeff2
do i = 1, ncoeffs
coeff2(index(i,1),index(i,2),index(i,3),index(i,4),index(i,5)) = coeff(i)
end do
then, to get the coefficient associated with [2,4,8,16,32], call
coeff2(2,4,8,16,32)
However, besides being very wasteful of memory, this solution would not allow n_indices to be set to a number higher than 7 given the limit of 7 dimensions to an array.
OBS: This question is a spin-off of this one. I have tried to ask the question more precisely having failed in the first attempt, an effort that greatly benefited from the answer of #Rodrigo_Rodrigues.
Actual Code
In case it helps here is the code for the actual problem I am trying to solve. It is an adaptive sparse grid method for approximating a function. The main goal is to make the interpolation at the and as fast as possible:
MODULE MOD_PARAMETERS
IMPLICIT NONE
SAVE
INTEGER, PARAMETER :: d = 2 ! number of dimensions
INTEGER, PARAMETER :: L_0 = 4 ! after this adaptive grid kicks in, for L <= L_0 usual sparse grid
INTEGER, PARAMETER :: L_max = 9 ! maximum level
INTEGER, PARAMETER :: bound = 0 ! 0 -> for f = 0 at boundary
! 1 -> adding grid points at boundary
! 2 -> extrapolating close to boundary
INTEGER, PARAMETER :: max_error = 1
INTEGER, PARAMETER :: L2_error = 1
INTEGER, PARAMETER :: testing_sample = 1000000
REAL(8), PARAMETER :: eps = 0.01D0 ! epsilon for adaptive grid
END MODULE MOD_PARAMETERS
PROGRAM MAIN
USE MOD_PARAMETERS
IMPLICIT NONE
INTEGER, DIMENSION(d,d) :: ident
REAL(8), DIMENSION(d) :: xd
INTEGER, DIMENSION(2*d) :: temp
INTEGER, DIMENSION(:,:), ALLOCATABLE :: grid_index, temp_grid_index, grid_index_new, J_index
REAL(8), DIMENSION(:), ALLOCATABLE :: coeff, temp_coeff, J_coeff
REAL(8) :: temp_min, temp_max, V, T, B, F, x1
INTEGER :: k, k_1, k_2, h, i, j, L, n, dd, L1, L2, dsize, count, first, repeated, add, ind
INTEGER :: time1, time2, clock_rate, clock_max
REAL(8), DIMENSION(L_max,L_max,2**(L_max),2**(L_max)) :: coeff_grid
INTEGER, DIMENSION(d) :: level, LL, ii
REAL(8), DIMENSION(testing_sample,d) :: x_rand
REAL(8), DIMENSION(testing_sample) :: interp1, interp2
! ============================================================================
! EXECUTABLE
! ============================================================================
ident = 0
DO i = 1,d
ident(i,i) = 1
ENDDO
! Initial grid point
dsize = 1
ALLOCATE(grid_index(dsize,2*d),grid_index_new(dsize,2*d))
grid_index(1,:) = 1
grid_index_new = grid_index
ALLOCATE(coeff(dsize))
xd = (/ 0.5D0, 0.5D0 /)
CALL FF(xd,coeff(1))
CALL FF(xd,coeff_grid(1,1,1,1))
L = 1
n = SIZE(grid_index_new,1)
ALLOCATE(J_index(n*2*d,2*d))
ALLOCATE(J_coeff(n*2*d))
CALL SYSTEM_CLOCK (time1,clock_rate,clock_max)
DO WHILE (L .LT. L_max)
L = L+1
n = SIZE(grid_index_new,1)
count = 0
first = 1
DEALLOCATE(J_index,J_coeff)
ALLOCATE(J_index(n*2*d,2*d))
ALLOCATE(J_coeff(n*2*d))
J_index = 0
J_coeff = 0.0D0
DO k = 1,n
DO i = 1,d
DO j = 1,2
IF ((bound .EQ. 0) .OR. (bound .EQ. 2)) THEN
temp = grid_index_new(k,:)+(/ident(i,:),ident(i,:)*(grid_index_new(k,d+i)-(-1)**j)/)
ELSEIF (bound .EQ. 1) THEN
IF (grid_index_new(k,i) .EQ. 1) THEN
temp = grid_index_new(k,:)+(/ident(i,:),ident(i,:)*(-(-1)**j)/)
ELSE
temp = grid_index_new(k,:)+(/ident(i,:),ident(i,:)*(grid_index_new(k,d+i)-(-1)**j)/)
ENDIF
ENDIF
CALL XX(d,temp(1:d),temp(d+1:2*d),xd)
temp_min = MINVAL(xd)
temp_max = MAXVAL(xd)
IF ((temp_min .GE. 0.0D0) .AND. (temp_max .LE. 1.0D0)) THEN
IF (first .EQ. 1) THEN
first = 0
count = count+1
J_index(count,:) = temp
V = 0.0D0
DO k_1 = 1,SIZE(grid_index,1)
T = 1.0D0
DO k_2 = 1,d
CALL XX(1,temp(k_2),temp(d+k_2),x1)
CALL BASE(x1,grid_index(k_1,k_2),grid_index(k_1,k_2+d),B)
T = T*B
ENDDO
V = V+coeff(k_1)*T
ENDDO
CALL FF(xd,F)
J_coeff(count) = F-V
ELSE
repeated = 0
DO h = 1,count
IF (SUM(ABS(J_index(h,:)-temp)) .EQ. 0) THEN
repeated = 1
ENDIF
ENDDO
IF (repeated .EQ. 0) THEN
count = count+1
J_index(count,:) = temp
V = 0.0D0
DO k_1 = 1,SIZE(grid_index,1)
T = 1.0D0
DO k_2 = 1,d
CALL XX(1,temp(k_2),temp(d+k_2),x1)
CALL BASE(x1,grid_index(k_1,k_2),grid_index(k_1,k_2+d),B)
T = T*B
ENDDO
V = V+coeff(k_1)*T
ENDDO
CALL FF(xd,F)
J_coeff(count) = F-V
ENDIF
ENDIF
ENDIF
ENDDO
ENDDO
ENDDO
ALLOCATE(temp_grid_index(dsize,2*d))
ALLOCATE(temp_coeff(dsize))
temp_grid_index = grid_index
temp_coeff = coeff
DEALLOCATE(grid_index,coeff)
ALLOCATE(grid_index(dsize+count,2*d))
ALLOCATE(coeff(dsize+count))
grid_index(1:dsize,:) = temp_grid_index
coeff(1:dsize) = temp_coeff
DEALLOCATE(temp_grid_index,temp_coeff)
grid_index(dsize+1:dsize+count,:) = J_index(1:count,:)
coeff(dsize+1:dsize+count) = J_coeff(1:count)
dsize = dsize + count
DO i = 1,count
coeff_grid(J_index(i,1),J_index(i,2),J_index(i,3),J_index(i,4)) = J_coeff(i)
ENDDO
IF (L .LE. L_0) THEN
DEALLOCATE(grid_index_new)
ALLOCATE(grid_index_new(count,2*d))
grid_index_new = J_index(1:count,:)
ELSE
add = 0
DO h = 1,count
IF (ABS(J_coeff(h)) .GT. eps) THEN
add = add + 1
J_index(add,:) = J_index(h,:)
ENDIF
ENDDO
DEALLOCATE(grid_index_new)
ALLOCATE(grid_index_new(add,2*d))
grid_index_new = J_index(1:add,:)
ENDIF
ENDDO
CALL SYSTEM_CLOCK (time2,clock_rate,clock_max)
PRINT *, 'Elapsed real time1 = ', DBLE(time2-time1)/DBLE(clock_rate)
PRINT *, 'Grid Points = ', SIZE(grid_index,1)
! ============================================================================
! Compute interpolated values:
! ============================================================================
CALL RANDOM_NUMBER(x_rand)
CALL SYSTEM_CLOCK (time1,clock_rate,clock_max)
DO i = 1,testing_sample
V = 0.0D0
DO L1=1,L_max
DO L2=1,L_max
IF (L1+L2 .LE. L_max+1) THEN
level = (/L1,L2/)
T = 1.0D0
DO dd = 1,d
T = T*(1.0D0-ABS(x_rand(i,dd)/2.0D0**(-DBLE(level(dd)))-DBLE(2*FLOOR(x_rand(i,dd)*2.0D0**DBLE(level(dd)-1))+1)))
ENDDO
V = V + coeff_grid(L1,L2,2*FLOOR(x_rand(i,1)*2.0D0**DBLE(L1-1))+1,2*FLOOR(x_rand(i,2)*2.0D0**DBLE(L2-1))+1)*T
ENDIF
ENDDO
ENDDO
interp2(i) = V
ENDDO
CALL SYSTEM_CLOCK (time2,clock_rate,clock_max)
PRINT *, 'Elapsed real time2 = ', DBLE(time2-time1)/DBLE(clock_rate)
END PROGRAM
For any 5 dimensional index I need to obtain the associated
coefficient, without knowing or calculating i. For instance, given
[2,4,8,16,32] I need to obtain 3.0 without computing i.
function findloc_vector(matrix, vector) result(out)
integer, intent(in) :: matrix(:, :)
integer, intent(in) :: vector(size(matrix, dim=2))
integer :: out, i
do i = 1, size(matrix, dim=1)
if (all(matrix(i, :) == vector)) then
out = i
return
end if
end do
stop "No match for this vector"
end
And that's how you use it:
print*, coeff(findloc_vector(index, [2,4,8,16,32])) ! outputs 3.0
I must confess I was reluctant to post this code because, even though this answers your question, I honestly think this is not what you really want/need, but you dind't provide enough information for me to know what you really do want/need.
Edit (After actual code from OP):
If I decrypted your code correctly (and considering what you said in your previous question), you are declaring:
REAL(8), DIMENSION(L_max,L_max,2**(L_max),2**(L_max)) :: coeff_grid
(where L_max = 9, so size(coeff_grid) = 21233664 =~160MB) and then populating it with:
DO i = 1,count
coeff_grid(J_index(i,1),J_index(i,2),J_index(i,3),J_index(i,4)) = J_coeff(i)
ENDDO
(where count is of the order of 1000, i.e. 0.005% of its elements), so this way you can fetch the values by its 4 indices with the array notation.
Please, don't do that. You don't need a sparse matrix in this case either. The new approach you proposed is much better: storing the indices in each row of an smaller array, and fetching on the array of coefficients by the corresponding location of those indices in its own array. This is way faster (avoiding the large allocation) and much more memory-efficient.
PS: Is it mandatory for you to stick to Fortran 90? Its a very old version of the standard and chances are that the compiler you're using implements a more recent version. You could improve the quality of your code a lot with the intrinsic move_alloc (for less array copies), the kind constants from the intrinsic module iso_fortran_env (for portability), the [], >, <, <=,... notation (for readability)...
I'm trying to break up a 4D array over the third dimension, and send to each node using MPI. Basically, I'm computing derivatives of a matrix, Cpq, with respect to atom positions in each of the three cartesian directions. Cpq is of size nat_sl x nat_sl, so dCpqdR is of size nat_sl x nat_sl x nat x 3. At the end of the day, for ever s,i pair, I have to compute the matrix product of dCpqdR between the transpose of the eigenvectors of Cpq and the eigenvectors of Cpq like so:
temp = MATMUL(TRANSPOSE(Cpq), MATMUL(dCpqdR(:, :, s, i), Cpq))
This is fine, but as it turns out, the loop over s and i is now by far the slow part of my code. Because each can be done independently, I was hoping that I could break up dCpqdR, and give each task it's own s, i to compute the derivative of. That is, I'd like task 1 to get dCpqdR(:,:,1,1), task 2 to get dCpqdR(:,:,1,2), etc.
I've got this working in some sense by using a buffered send/recv pair of calls. The root node allocates a temporary array, fills it, sends to the relevant nodes, and the relevant nodes do their computations as they wish. This is fine, but can be slow and memory inefficient. I'd ideally like to break it up in a more memory efficient way.
The logical thing to do, then, is to use mpi_scatterv, but here is where I start running into trouble, as I'm having trouble figuring out the memory layout for this. I've written this, so far:
call mpi_type_create_subarray(4, (/ nat_sl, nat_sl, nat, 3 /), (/nat_sl, nat_sl, n_pairs(me_image+1), 3/),&
(/0, 0, 0, 0/), mpi_order_fortran, mpi_double_precision, subarr_typ, ierr)
call mpi_type_commit(subarr_typ, ierr)
call mpi_scatterv(dCpqdR, n_pairs(me_image+1), f_displs, subarr_typ,&
my_dCpqdR, 3*nat_sl*3*nat_sl*3*n_pairs(me_image+1), subarr_typ,&
root_image, intra_image_comm, ierr)
I've computed n_pairs using this subroutine:
subroutine mbdvdw_para_init_int_forces()
implicit none
integer :: p, s, i, counter, k, cpu_ind
integer :: num_unique_rpq, n_pairs_per_proc, cpu
real(dp) :: Rpq(3), Rpq_norm, current_val
num_pairs = nat
if(.not.allocated(f_cpu_id)) allocate(f_cpu_id(nat, 3))
n_pairs_per_proc = floor(dble(num_pairs)/nproc_image)
cpu = 0
n_pairs = 0
counter = 1
p = 1
do counter = 0, num_pairs-1, 1
n_pairs(modulo(counter, nproc_image)+1) = n_pairs(modulo(counter, nproc_image)+1) + 1
end do
do s = 1, nat, 1
f_cpu_id(s) = cpu
if((counter.lt.num_pairs)) then
if(p.eq.n_pairs(cpu+1)) then
cpu = cpu + 1
p = 0
end if
end if
p = p + 1
end do
call mp_set_displs( n_pairs, f_displs, num_pairs, nproc_image)
f_displs = f_displs*nat_sl*nat_sl*3
end subroutine mbdvdw_para_init_int_forces
and the full method for the matrix multiplication is
subroutine mbdvdw_interacting_energy(energy, forcedR, forcedh, forcedV)
implicit none
real(dp), intent(out) :: energy
real(dp), dimension(nat, 3), intent(out) :: forcedR
real(dp), dimension(3,3), intent(out) :: forcedh
real(dp), dimension(nat), intent(out) :: forcedV
real(dp), dimension(3*nat_sl, 3*nat_sl) :: temp
real(dp), dimension(:,:,:,:), allocatable :: my_dCpqdR
integer :: num_negative, i_atom, s, i, j, counter
integer, parameter :: eigs_check = 200
integer :: subarr_typ, ierr
! lapack work variables
integer :: LWORK, errorflag
real(dp) :: WORK((3*nat_sl)*(3+(3*nat_sl)/2)), eigenvalues(3*nat_sl)
call start_clock('mbd_int_energy')
call mp_sum(Cpq, intra_image_comm)
eigenvalues = 0.0_DP
forcedR = 0.0_DP
energy = 0.0_DP
num_negative = 0
forcedV = 0.0_DP
errorflag=0
LWORK=3*nat_sl*(3+(3*nat_sl)/2)
call DSYEV('V', 'U', 3*nat_sl, Cpq, 3*nat_sl, eigenvalues, WORK, LWORK, errorflag)
if(errorflag.eq.0) then
do i_atom=1, 3*nat_sl, 1
!open (unit=eigs_check, file="eigs.tmp",action="write",status="unknown",position="append")
! write(eigs_check, *) eigenvalues(i_atom)
!close(eigs_check)
if(eigenvalues(i_atom).ge.0.0_DP) then
energy = energy + dsqrt(eigenvalues(i_atom))
else
num_negative = num_negative + 1
end if
end do
if(num_negative.ge.1) then
write(stdout, '(3X," WARNING: Found ", I3, " Negative Eigenvalues.")'), num_negative
end if
else
end if
energy = energy*nat/nat_sl
!!!!!!!!!!!!!!!!!!!!
! Forces below here. There's going to be some long parallelization business.
!!!!!!!!!!!!!!!!!!!!
call start_clock('mbd_int_forces')
if(.not.allocated(my_dCpqdR)) allocate(my_dCpqdR(nat_sl, nat_sl, n_pairs(me_image+1), 3)), my_dCpqdR = 0.0_DP
if(mbd_vdw_forces) then
do s=1,nat,1
if(me_image.eq.(f_cpu_id(s)+1)) then
do i=1,3,1
temp = MATMUL(TRANSPOSE(Cpq), MATMUL(my_dCpqdR(:, :, counter, i), Cpq))
do j=1,3*nat_sl,1
if(eigenvalues(j).ge.0.0_DP) then
forcedR(s, i) = forcedR(s, i) + 1.0_DP/(2.0_DP*dsqrt(eigenvalues(j)))*temp(j,j)
end if
end do
end do
counter = counter + 1
end if
end do
forcedR = forcedR*nat/nat_sl
do s=1,3,1
do i=1,3,1
temp = MATMUL(TRANSPOSE(Cpq), MATMUL(dCpqdh(:, :, s, i), Cpq))
do j=1,3*nat_sl,1
if(eigenvalues(j).ge.0.0_DP) then
forcedh(s, i) = forcedh(s, i) + 1.0_DP/(2.0_DP*dsqrt(eigenvalues(j)))*temp(j,j)
end if
end do
end do
end do
forcedh = forcedh*nat/nat_sl
call mp_sum(forcedR, intra_image_comm)
call mp_sum(forcedh, intra_image_comm)
end if
call stop_clock('mbd_int_forces')
call stop_clock('mbd_int_energy')
return
end subroutine mbdvdw_interacting_energy
But when run, it's complaining that
[MathBook Pro:58100] *** An error occurred in MPI_Type_create_subarray
[MathBook Pro:58100] *** reported by process [2560884737,2314885530279477248]
[MathBook Pro:58100] *** on communicator MPI_COMM_WORLD
[MathBook Pro:58100] *** MPI_ERR_ARG: invalid argument of some other kind
[MathBook Pro:58100] *** MPI_ERRORS_ARE_FATAL (processes in this communicator will now abort,
[MathBook Pro:58100] *** and potentially your MPI job)
so something is going wrong, but I have no idea what. I know my description is somewhat sparse to start with, so please let me know what information would be necessary to help.