Storing a Variable with a Multi-Dimensional Index in Fortran - fortran

Question
Consider the following code:
program example
implicit none
integer, parameter :: n_coeffs = 1000
integer, parameter :: n_indices = 5
integer :: i
real(8), dimension(n_coeffs) :: coeff
integer, dimension(n_coeffs,n_indices) :: index
do i = 1, n_coeffs
coeff(i) = real(i*3,8)
index(i,:) = [2,4,8,16,32]*i
end do
end
For any 5 dimensional index I need to obtain the associated coefficient, without knowing or calculating i. For instance, given [2,4,8,16,32] I need to obtain 3.0 without computing i.
Is there a reasonable solution, perhaps using sparse matrices, that would work for n_indices in the order of 100 (though n_coeffs still in the order of 1000)?
A Bad Solution
One solution would be to define a 5 dimensional array as in
real(8), dimension(2000,4000,8000,16000,32000) :: coeff2
do i = 1, ncoeffs
coeff2(index(i,1),index(i,2),index(i,3),index(i,4),index(i,5)) = coeff(i)
end do
then, to get the coefficient associated with [2,4,8,16,32], call
coeff2(2,4,8,16,32)
However, besides being very wasteful of memory, this solution would not allow n_indices to be set to a number higher than 7 given the limit of 7 dimensions to an array.
OBS: This question is a spin-off of this one. I have tried to ask the question more precisely having failed in the first attempt, an effort that greatly benefited from the answer of #Rodrigo_Rodrigues.
Actual Code
In case it helps here is the code for the actual problem I am trying to solve. It is an adaptive sparse grid method for approximating a function. The main goal is to make the interpolation at the and as fast as possible:
MODULE MOD_PARAMETERS
IMPLICIT NONE
SAVE
INTEGER, PARAMETER :: d = 2 ! number of dimensions
INTEGER, PARAMETER :: L_0 = 4 ! after this adaptive grid kicks in, for L <= L_0 usual sparse grid
INTEGER, PARAMETER :: L_max = 9 ! maximum level
INTEGER, PARAMETER :: bound = 0 ! 0 -> for f = 0 at boundary
! 1 -> adding grid points at boundary
! 2 -> extrapolating close to boundary
INTEGER, PARAMETER :: max_error = 1
INTEGER, PARAMETER :: L2_error = 1
INTEGER, PARAMETER :: testing_sample = 1000000
REAL(8), PARAMETER :: eps = 0.01D0 ! epsilon for adaptive grid
END MODULE MOD_PARAMETERS
PROGRAM MAIN
USE MOD_PARAMETERS
IMPLICIT NONE
INTEGER, DIMENSION(d,d) :: ident
REAL(8), DIMENSION(d) :: xd
INTEGER, DIMENSION(2*d) :: temp
INTEGER, DIMENSION(:,:), ALLOCATABLE :: grid_index, temp_grid_index, grid_index_new, J_index
REAL(8), DIMENSION(:), ALLOCATABLE :: coeff, temp_coeff, J_coeff
REAL(8) :: temp_min, temp_max, V, T, B, F, x1
INTEGER :: k, k_1, k_2, h, i, j, L, n, dd, L1, L2, dsize, count, first, repeated, add, ind
INTEGER :: time1, time2, clock_rate, clock_max
REAL(8), DIMENSION(L_max,L_max,2**(L_max),2**(L_max)) :: coeff_grid
INTEGER, DIMENSION(d) :: level, LL, ii
REAL(8), DIMENSION(testing_sample,d) :: x_rand
REAL(8), DIMENSION(testing_sample) :: interp1, interp2
! ============================================================================
! EXECUTABLE
! ============================================================================
ident = 0
DO i = 1,d
ident(i,i) = 1
ENDDO
! Initial grid point
dsize = 1
ALLOCATE(grid_index(dsize,2*d),grid_index_new(dsize,2*d))
grid_index(1,:) = 1
grid_index_new = grid_index
ALLOCATE(coeff(dsize))
xd = (/ 0.5D0, 0.5D0 /)
CALL FF(xd,coeff(1))
CALL FF(xd,coeff_grid(1,1,1,1))
L = 1
n = SIZE(grid_index_new,1)
ALLOCATE(J_index(n*2*d,2*d))
ALLOCATE(J_coeff(n*2*d))
CALL SYSTEM_CLOCK (time1,clock_rate,clock_max)
DO WHILE (L .LT. L_max)
L = L+1
n = SIZE(grid_index_new,1)
count = 0
first = 1
DEALLOCATE(J_index,J_coeff)
ALLOCATE(J_index(n*2*d,2*d))
ALLOCATE(J_coeff(n*2*d))
J_index = 0
J_coeff = 0.0D0
DO k = 1,n
DO i = 1,d
DO j = 1,2
IF ((bound .EQ. 0) .OR. (bound .EQ. 2)) THEN
temp = grid_index_new(k,:)+(/ident(i,:),ident(i,:)*(grid_index_new(k,d+i)-(-1)**j)/)
ELSEIF (bound .EQ. 1) THEN
IF (grid_index_new(k,i) .EQ. 1) THEN
temp = grid_index_new(k,:)+(/ident(i,:),ident(i,:)*(-(-1)**j)/)
ELSE
temp = grid_index_new(k,:)+(/ident(i,:),ident(i,:)*(grid_index_new(k,d+i)-(-1)**j)/)
ENDIF
ENDIF
CALL XX(d,temp(1:d),temp(d+1:2*d),xd)
temp_min = MINVAL(xd)
temp_max = MAXVAL(xd)
IF ((temp_min .GE. 0.0D0) .AND. (temp_max .LE. 1.0D0)) THEN
IF (first .EQ. 1) THEN
first = 0
count = count+1
J_index(count,:) = temp
V = 0.0D0
DO k_1 = 1,SIZE(grid_index,1)
T = 1.0D0
DO k_2 = 1,d
CALL XX(1,temp(k_2),temp(d+k_2),x1)
CALL BASE(x1,grid_index(k_1,k_2),grid_index(k_1,k_2+d),B)
T = T*B
ENDDO
V = V+coeff(k_1)*T
ENDDO
CALL FF(xd,F)
J_coeff(count) = F-V
ELSE
repeated = 0
DO h = 1,count
IF (SUM(ABS(J_index(h,:)-temp)) .EQ. 0) THEN
repeated = 1
ENDIF
ENDDO
IF (repeated .EQ. 0) THEN
count = count+1
J_index(count,:) = temp
V = 0.0D0
DO k_1 = 1,SIZE(grid_index,1)
T = 1.0D0
DO k_2 = 1,d
CALL XX(1,temp(k_2),temp(d+k_2),x1)
CALL BASE(x1,grid_index(k_1,k_2),grid_index(k_1,k_2+d),B)
T = T*B
ENDDO
V = V+coeff(k_1)*T
ENDDO
CALL FF(xd,F)
J_coeff(count) = F-V
ENDIF
ENDIF
ENDIF
ENDDO
ENDDO
ENDDO
ALLOCATE(temp_grid_index(dsize,2*d))
ALLOCATE(temp_coeff(dsize))
temp_grid_index = grid_index
temp_coeff = coeff
DEALLOCATE(grid_index,coeff)
ALLOCATE(grid_index(dsize+count,2*d))
ALLOCATE(coeff(dsize+count))
grid_index(1:dsize,:) = temp_grid_index
coeff(1:dsize) = temp_coeff
DEALLOCATE(temp_grid_index,temp_coeff)
grid_index(dsize+1:dsize+count,:) = J_index(1:count,:)
coeff(dsize+1:dsize+count) = J_coeff(1:count)
dsize = dsize + count
DO i = 1,count
coeff_grid(J_index(i,1),J_index(i,2),J_index(i,3),J_index(i,4)) = J_coeff(i)
ENDDO
IF (L .LE. L_0) THEN
DEALLOCATE(grid_index_new)
ALLOCATE(grid_index_new(count,2*d))
grid_index_new = J_index(1:count,:)
ELSE
add = 0
DO h = 1,count
IF (ABS(J_coeff(h)) .GT. eps) THEN
add = add + 1
J_index(add,:) = J_index(h,:)
ENDIF
ENDDO
DEALLOCATE(grid_index_new)
ALLOCATE(grid_index_new(add,2*d))
grid_index_new = J_index(1:add,:)
ENDIF
ENDDO
CALL SYSTEM_CLOCK (time2,clock_rate,clock_max)
PRINT *, 'Elapsed real time1 = ', DBLE(time2-time1)/DBLE(clock_rate)
PRINT *, 'Grid Points = ', SIZE(grid_index,1)
! ============================================================================
! Compute interpolated values:
! ============================================================================
CALL RANDOM_NUMBER(x_rand)
CALL SYSTEM_CLOCK (time1,clock_rate,clock_max)
DO i = 1,testing_sample
V = 0.0D0
DO L1=1,L_max
DO L2=1,L_max
IF (L1+L2 .LE. L_max+1) THEN
level = (/L1,L2/)
T = 1.0D0
DO dd = 1,d
T = T*(1.0D0-ABS(x_rand(i,dd)/2.0D0**(-DBLE(level(dd)))-DBLE(2*FLOOR(x_rand(i,dd)*2.0D0**DBLE(level(dd)-1))+1)))
ENDDO
V = V + coeff_grid(L1,L2,2*FLOOR(x_rand(i,1)*2.0D0**DBLE(L1-1))+1,2*FLOOR(x_rand(i,2)*2.0D0**DBLE(L2-1))+1)*T
ENDIF
ENDDO
ENDDO
interp2(i) = V
ENDDO
CALL SYSTEM_CLOCK (time2,clock_rate,clock_max)
PRINT *, 'Elapsed real time2 = ', DBLE(time2-time1)/DBLE(clock_rate)
END PROGRAM

For any 5 dimensional index I need to obtain the associated
coefficient, without knowing or calculating i. For instance, given
[2,4,8,16,32] I need to obtain 3.0 without computing i.
function findloc_vector(matrix, vector) result(out)
integer, intent(in) :: matrix(:, :)
integer, intent(in) :: vector(size(matrix, dim=2))
integer :: out, i
do i = 1, size(matrix, dim=1)
if (all(matrix(i, :) == vector)) then
out = i
return
end if
end do
stop "No match for this vector"
end
And that's how you use it:
print*, coeff(findloc_vector(index, [2,4,8,16,32])) ! outputs 3.0
I must confess I was reluctant to post this code because, even though this answers your question, I honestly think this is not what you really want/need, but you dind't provide enough information for me to know what you really do want/need.
Edit (After actual code from OP):
If I decrypted your code correctly (and considering what you said in your previous question), you are declaring:
REAL(8), DIMENSION(L_max,L_max,2**(L_max),2**(L_max)) :: coeff_grid
(where L_max = 9, so size(coeff_grid) = 21233664 =~160MB) and then populating it with:
DO i = 1,count
coeff_grid(J_index(i,1),J_index(i,2),J_index(i,3),J_index(i,4)) = J_coeff(i)
ENDDO
(where count is of the order of 1000, i.e. 0.005% of its elements), so this way you can fetch the values by its 4 indices with the array notation.
Please, don't do that. You don't need a sparse matrix in this case either. The new approach you proposed is much better: storing the indices in each row of an smaller array, and fetching on the array of coefficients by the corresponding location of those indices in its own array. This is way faster (avoiding the large allocation) and much more memory-efficient.
PS: Is it mandatory for you to stick to Fortran 90? Its a very old version of the standard and chances are that the compiler you're using implements a more recent version. You could improve the quality of your code a lot with the intrinsic move_alloc (for less array copies), the kind constants from the intrinsic module iso_fortran_env (for portability), the [], >, <, <=,... notation (for readability)...

Related

Fortran Subroutines/Functions: Returned Value Changes If Subroutines/Functions Is Called More Often?

I am currently implementing integrals in Fortran as subroutines. The subroutines on their own return the correct values. If i now call the e.g. same subroutine twice after each other, with the same input values, their returned value differs significantly?
The main program only calls the function like this:
program main
use types
use constants
use integrals
use basis
real(dp), dimension(2,3) :: molecule_coords
real(dp), dimension(2) :: z
type(primitive_gaussian), allocatable :: molecule(:,:)
molecule_coords(1,:) = (/0.,0.,0./)
molecule_coords(2,:) = (/0.,0.,1.6/)
molecule = def_molecule(molecule_coords)
z = (/1.0, 1.0/)
call overlap(molecule) ! Correct Value returned
call overlap(molecule) ! Wrong Value returned
end program main
My function for the overlap looks like this:
module integrals
use types
use constants
use basis
use stdlib_specialfunctions_gamma!, only: lig => lower_incomplete_gamma
contains
subroutine overlap(molecule)
implicit none
type(primitive_gaussian), intent(in) :: molecule(:,:)
integer :: nbasis, i, j, k, l
real(dp) :: norm, p, q, coeff, Kab
real(dp), dimension(3) :: Q_xyz
real(dp), dimension(INT(size(molecule,1)),INT(size(molecule,1))) :: S
nbasis = size(molecule,1)
do i = 1, nbasis
do j = 1, nbasis
! Iterate over l and m primitives in basis
do k = 1, size(molecule(i,:))
do l = 1, size(molecule(j,:))
norm = molecule(i, k)%norm() * molecule(j, l)%norm()
! Eq. 63
Q_xyz = (molecule(i, k)%coords - molecule(j, l)%coords)
! Eq. 64, 65
p = (molecule(i, k)%alpha + molecule(j, l)%alpha)
q = (molecule(i, k)%alpha * molecule(j, l)%alpha) / p
! Eq. 66
Kab = exp(-q * dot_product(Q_xyz,Q_xyz))
coeff = molecule(i, k)%coeff * molecule(j, l)%coeff
S(i,j) = S(i,j) + norm * coeff * Kab * (pi / p) ** (1.5)
end do
end do
end do
end do
print *, S
end subroutine overlap
end module integrals
I am a bit lost, why this would be the case, but I am also rather new to Fortran.
Any help is appreciated! Thanks!

Unexpected Behavior With Basic Gaussian Elimination in Fortran

I am trying to solve a system of linear equations in matrix form using one of the many widely-available routines, specifically this function gauss (along with sample data with a known result), but regardless of which elimination strategy I use I consistently get incorrect results and don't know where to turn at this point. My code:
MODULE gaussMod
CONTAINS
function gauss(a,b) result(x)
implicit none
real*8 :: b(:), a(size(b), size(b))
real*8 :: x(size(b))
real*8 :: r(size(b))
integer i,j, neq
neq = size(b)
do i =1, neq
r = a(:,i)/a(i,i)
do j = i+1, neq
a(j,:) = a(j,:) - r(j)*a(i,:)
b(j) = b(j) - r(j)*b(j)
enddo
enddo
do i= neq, 1, -1
x(i) = (b(i) - sum(a(i, i+1:) * x(i+1:))) / a(i,i)
enddo
END function
END MODULE
SUBROUTINE outFile(n,x)
IMPLICIT none
INTEGER n
INTEGER i
REAL*8, DIMENSION(n) :: x
OPEN(UNIT=20,FILE="solution.csv",action="write",status="replace")
DO i=1, n
WRITE (20,"(1(f0.30,',',:))") x(i)
END DO
CLOSE(20)
END SUBROUTINE
PROGRAM elimtest
USE gaussMod
IMPLICIT none
INTEGER, PARAMETER :: coeff_kind = selected_real_kind(p=30, r=99)
INTEGER n
REAL*8, DIMENSION(:,:), ALLOCATABLE :: a
REAL*8, DIMENSION(:), ALLOCATABLE :: b
REAL*8, DIMENSION(:), ALLOCATABLE :: x
n = 4
ALLOCATE(a(n,n))
ALLOCATE(b(n))
ALLOCATE(x(n))
a(1,1) = 18.
a(1,2) = -6.
a(1,3) = -6.
a(1,4) = 0.
a(2,1) = -6.
a(2,2) = 12.
a(2,3) = 0.
a(2,4) = -6.
a(3,1) = -6.
a(3,2) = 0.
a(3,3) = 12.
a(3,4) = -6.
a(4,1) = 0.
a(4,2) = -6.
a(4,3) = -6.
a(4,4) = 18.
b(1) = 60.
b(2) = 0.
b(3) = 20.
b(4) = 0.
x = gauss(a,b)
CALL outFile(n,x)
END PROGRAM
Any help would be greatly appreciated!
It looks like the following line
b(j) = b(j) - r(j)*b(j)
is a typo of
b(j) = b(j) - r(j)*b(i)
With this modification your code gives the correct result (probably!):
x(:) = 8.3333333333333339 6.6666666666666670 8.3333333333333339 5.0000000000000000

Fortran error: size of variable is too large

I have a long program and the goal is to solve the matrix system ax=b. When I run it, it reveals that "error: size of variable is too large".
program ddm
integer :: i,j,k
integer, parameter :: FN=1,FML=80,FMH=80
integer, parameter :: NBE=1*80*80 !NBE=FN*FML*FMH
double precision, dimension(1:3*NBE,1:3*NBE) :: AA
double precision, dimension(1:3*NBE) :: BB
double precision :: XX(3*NBE)
double precision, dimension(1:NBE) :: DSL,DSH,DNN
double precision, dimension(1:FML,1:FMH) :: DSL1,DSH1,DNN1
! Construct a block matrix
AA(1:NBE,1:NBE) = SLSL
AA(1:NBE,NBE+1:2*NBE) = SLSH
AA(1:NBE,2*NBE+1:3*NBE) = SLNN
AA(NBE+1:2*NBE,1:NBE) = SHSL
AA(NBE+1:2*NBE,NBE+1:2*NBE) = SHSH
AA(NBE+1:2*NBE,2*NBE+1:3*NBE) = SHNN
AA(2*NBE+1:3*NBE,1:NBE) = NNSL
AA(2*NBE+1:3*NBE,NBE+1:2*NBE) = NNSH
AA(2*NBE+1:3*NBE,2*NBE+1:3*NBE) = NNNN
! Construct a block matrix for boundary condition
BB(1:NBE) = SLBC
BB(NBE+1:2*NBE) = SHBC
BB(2*NBE+1:3*NBE) = NNBC
call GE(AA,BB,XX,3*NBE)
DSL = XX(1:NBE)
DSH = XX(NBE+1:2*NBE)
DNN = XX(2*NBE+1:3*NBE)
DSL1 = reshape(DSL,(/FML,FMH/))
DSH1 = reshape(DSH,(/FML,FMH/))
DNN1 = reshape(DNN,(/FML,FMH/))
open(unit=2, file='DNN2.txt', ACTION="write", STATUS="replace")
do i=1,80
write(2,'(*(F14.7))') real(DNN1(i,:))
end do
end program ddm
Note: GE(AA,BB,XX,3*NBE) is the function for solving the matrix system. Below is the GE function.
subroutine GE(a,b,x,n)
!===========================================================
! Solutions to a system of linear equations A*x=b
! Method: Gauss elimination (with scaling and pivoting)
!-----------------------------------------------------------
! input ...
! a(n,n) - array of coefficients for matrix A
! b(n) - array of the right hand coefficients b
! n - number of equations (size of matrix A)
! output ...
! x(n) - solutions
! coments ...
! the original arrays a(n,n) and b(n) will be destroyed
! during the calculation
!===========================================================
implicit none
integer n
double precision a(n,n),b(n),x(n)
double precision s(n)
double precision c, pivot, store
integer i, j, k, l
! step 1: begin forward elimination
do k=1, n-1
! step 2: "scaling"
! s(i) will have the largest element from row i
do i=k,n ! loop over rows
s(i) = 0.0
do j=k,n ! loop over elements of row i
s(i) = max(s(i),abs(a(i,j)))
end do
end do
! step 3: "pivoting 1"
! find a row with the largest pivoting element
pivot = abs(a(k,k)/s(k))
l = k
do j=k+1,n
if(abs(a(j,k)/s(j)) > pivot) then
pivot = abs(a(j,k)/s(j))
l = j
end if
end do
! Check if the system has a sigular matrix
if(pivot == 0.0) then
write(*,*) "The matrix is singular"
return
end if
! step 4: "pivoting 2" interchange rows k and l (if needed)
if (l /= k) then
do j=k,n
store = a(k,j)
a(k,j) = a(l,j)
a(l,j) = store
end do
store = b(k)
b(k) = b(l)
b(l) = store
end if
! step 5: the elimination (after scaling and pivoting)
do i=k+1,n
c=a(i,k)/a(k,k)
a(i,k) = 0.0
b(i)=b(i)- c*b(k)
do j=k+1,n
a(i,j) = a(i,j)-c*a(k,j)
end do
end do
end do
! step 6: back substiturion
x(n) = b(n)/a(n,n)
do i=n-1,1,-1
c=0.0
do j=i+1,n
c= c + a(i,j)*x(j)
end do
x(i) = (b(i)- c)/a(i,i)
end do
end subroutine GE
Turn your arrays (at least AA, BB, XX) into allocatable arrays and allocate them by yourself in the code. You are hitting the memory limit of statically allocated arrays. There is a limit of 2GB on some systems if I remember well (experts will confirm or give the right numbers).

Declaring parameters through input arguments in FORTRAN

I am trying to write a fortran routine where I declare arrays whose length comes from operations made upon the input parameters.
subroutine my_program(N, A,B,m)
implicit none
integer, intent(in) :: N
integer, parameter :: minA = 1, maxA = N
integer, parameter :: minB = 0, maxB = N-1
double precision, intent(out) :: max,A(minA:maxA),B(minB:maxB)
A = 0.d0
B = 1.d0
m = maxA*maxB-minA*minB
end subroutine my_program
Right now, I have an error coming from the the 5th line Parameter 'N' at (1) has not been declared or is a variable, which does not reduce to a constant expression
N is not known at compile time, so you cannot use it to initialize a parameter. Instead, use N directly to declare A and B:
subroutine my_program(N, A, B, m)
implicit none
integer, intent(in) :: N
double precision, intent(out) :: m, A(1:N), B(0:N-1)
integer :: minA, maxA
integer :: minB, maxB
minA = 1 ; maxA = N
minB = 0 ; maxB = N-1
A = 0.d0
B = 1.d0
m = maxA*maxB - minA*minB
end subroutine my_program

LAPACK: Are operations on packed storage matrices faster?

I want to tridiagonalize a real symmetric matrix using Fortran and LAPACK. LAPACK basically provides two routines, one operating on the full matrix, the other on the matrix in packed storage. While the latter surely uses less memory, I was wondering if anything can be said about the speed difference?
It's an empirical question, of course: but in general, nothing comes for free, and less memory/more runtime is a pretty common tradeoff.
In this case, the indexing for the data is more complex for the packed case, so as you traverse the matrix, the cost of getting your data is a little higher. (Complicating this picture is that for symmetric matrices, the lapack routines also assume a certain kind of packing - that you only have the upper or lower component of the matrix available).
I was messing around with an eigenproblem earlier today, so I'll use that as a measurement benchmark; trying with a simple symmetric test case (The Herdon matrix, from http://people.sc.fsu.edu/~jburkardt/m_src/test_mat/test_mat.html ), and comparing ssyevd with sspevd
$ ./eigen2 500
Generating a Herdon matrix:
Unpacked array:
Eigenvalues L_infty err = 1.7881393E-06
Packed array:
Eigenvalues L_infty err = 3.0994415E-06
Packed time: 2.800000086426735E-002
Unpacked time: 2.500000037252903E-002
$ ./eigen2 1000
Generating a Herdon matrix:
Unpacked array:
Eigenvalues L_infty err = 4.5299530E-06
Packed array:
Eigenvalues L_infty err = 5.8412552E-06
Packed time: 0.193900004029274
Unpacked time: 0.165000006556511
$ ./eigen2 2500
Generating a Herdon matrix:
Unpacked array:
Eigenvalues L_infty err = 6.1988831E-06
Packed array:
Eigenvalues L_infty err = 8.4638596E-06
Packed time: 3.21040010452271
Unpacked time: 2.70149993896484
There's about an 18% difference, which I must admit is larger than I expected (also with a slightly larger error for the packed case?). This is with intel's MKL. The performance difference will depend on your matrix in general, of course, as eriktous points out, and on the problem you're doing; the more random access to the matrix you have to do, the worse the overhead would be. The code I used is as follows:
program eigens
implicit none
integer :: nargs,n ! problem size
real, dimension(:,:), allocatable :: A, B, Z
real, dimension(:), allocatable :: PA
real, dimension(:), allocatable :: work
integer, dimension(:), allocatable :: iwork
real, dimension(:), allocatable :: eigenvals, expected
real :: c, p
integer :: worksize, iworksize
character(len=100) :: nstr
integer :: unpackedclock, packedclock
double precision :: unpackedtime, packedtime
integer :: i,j,info
! get filename
nargs = command_argument_count()
if (nargs /= 1) then
print *,'Usage: eigen2 n'
print *,' Where n = size of array'
stop
endif
call get_command_argument(1, nstr)
read(nstr,'(I)') n
if (n < 4 .or. n > 25000) then
print *, 'Invalid n ', nstr
stop
endif
! Initialize local arrays
allocate(A(n,n),B(n,n))
allocate(eigenvals(n))
! calculate the matrix - unpacked
print *, 'Generating a Herdon matrix: '
A = 0.
c = (1.*n * (1.*n + 1.) * (2.*n - 5.))/6.
forall (i=1:n-1,j=1:n-1)
A(i,j) = -1.*i*j/c
endforall
forall (i=1:n-1)
A(i,i) = (c - 1.*i*i)/c
A(i,n) = 1.*i/c
endforall
forall (j=1:n-1)
A(n,j) = 1.*j/c
endforall
A(n,n) = -1./c
B = A
! expected eigenvalues
allocate(expected(n))
p = 3. + sqrt((4. * n - 3.) * (n - 1.)*3./(n+1.))
expected(1) = p/(n*(5.-2.*n))
expected(2) = 6./(p*(n+1.))
expected(3:n) = 1.
print *, 'Unpacked array:'
allocate(work(1),iwork(1))
call ssyevd('N','U',n,A,n,eigenvals,work,-1,iwork,-1,info)
worksize = int(work(1))
iworksize = int(work(1))
deallocate(work,iwork)
allocate(work(worksize),iwork(iworksize))
call tick(unpackedclock)
call ssyevd('N','U',n,A,n,eigenvals,work,worksize,iwork,iworksize,info)
unpackedtime = tock(unpackedclock)
deallocate(work,iwork)
if (info /= 0) then
print *, 'Error -- info = ', info
endif
print *,'Eigenvalues L_infty err = ', maxval(eigenvals-expected)
! pack array
print *, 'Packed array:'
allocate(PA(n*(n+1)/2))
allocate(Z(n,n))
do i=1,n
do j=i,n
PA(i+(j-1)*j/2) = B(i,j)
enddo
enddo
allocate(work(1),iwork(1))
call sspevd('N','U',n,PA,eigenvals,Z,n,work,-1,iwork,-1,info)
worksize = int(work(1))
iworksize = iwork(1)
deallocate(work,iwork)
allocate(work(worksize),iwork(iworksize))
call tick(packedclock)
call sspevd('N','U',n,PA,eigenvals,Z,n,work,worksize,iwork,iworksize,info)
packedtime = tock(packedclock)
deallocate(work,iwork)
deallocate(Z,A,B,PA)
if (info /= 0) then
print *, 'Error -- info = ', info
endif
print *,'Eigenvalues L_infty err = ', &
maxval(eigenvals-expected)
deallocate(eigenvals, expected)
print *,'Packed time: ', packedtime
print *,'Unpacked time: ', unpackedtime
contains
subroutine tick(t)
integer, intent(OUT) :: t
call system_clock(t)
end subroutine tick
! returns time in seconds from now to time described by t
real function tock(t)
integer, intent(in) :: t
integer :: now, clock_rate
call system_clock(now,clock_rate)
tock = real(now - t)/real(clock_rate)
end function tock
end program eigens