I have a program which crashes if executed in shell with a simple "aborted". This usually points to some memory issues, so i ran the program under valgrind. The strange thing is that now it runs and gives the expected output. I used the exactly same build for both tests. Here's the Valgrind output:
valgrind --leak-check=yes --track-origins=yes ./datar_test
==1203== Memcheck, a memory error detector
==1203== Copyright (C) 2002-2013, and GNU GPL'd, by Julian Seward et al.
==1203== Using Valgrind-3.10.1 and LibVEX; rerun with -h for copyright info
==1203== Command: ./datar_test
==1203==
==1203== Conditional jump or move depends on uninitialised value(s)
==1203== at 0x60382D: __intel_sse2_strcpy (in /home/SERVER-hoffmann/sync/04_DATAR/datar_shared/Unittest/datar_test)
==1203== by 0x57EE17: for__add_to_lf_table (in /home/SERVER-hoffmann/sync/04_DATAR/datar_shared/Unittest/datar_test)
==1203== by 0x5BC0E8: for__open_proc (in /home/SERVER-hoffmann/sync/04_DATAR/datar_shared/Unittest/datar_test)
==1203== by 0x585DAF: for__open_default (in /home/SERVER-hoffmann/sync/04_DATAR/datar_shared/Unittest/datar_test)
==1203== by 0x5A97C3: for_write_seq_lis (in /home/SERVER-hoffmann/sync/04_DATAR/datar_shared/Unittest/datar_test)
==1203== by 0x40E302: fruit_mp_init_fruit_ (fruit.f90:635)
==1203== by 0x44B58B: MAIN__ (datar_test.f90:18)
==1203== by 0x4032DD: main (in /home/SERVER-hoffmann/sync/04_DATAR/datar_shared/Unittest/datar_test)
==1203== Uninitialised value was created by a heap allocation
==1203== at 0x4C29110: malloc (in /usr/lib64/valgrind/vgpreload_memcheck-amd64-linux.so)
==1203== by 0x59221C: for__get_vm (in /home/SERVER-hoffmann/sync/04_DATAR/datar_shared/Unittest/datar_test)
==1203== by 0x5BA84D: for__open_proc (in /home/SERVER-hoffmann/sync/04_DATAR/datar_shared/Unittest/datar_test)
==1203== by 0x585DAF: for__open_default (in /home/SERVER-hoffmann/sync/04_DATAR/datar_shared/Unittest/datar_test)
==1203== by 0x5A97C3: for_write_seq_lis (in /home/SERVER-hoffmann/sync/04_DATAR/datar_shared/Unittest/datar_test)
==1203== by 0x40E302: fruit_mp_init_fruit_ (fruit.f90:635)
==1203== by 0x44B58B: MAIN__ (datar_test.f90:18)
==1203== by 0x4032DD: main (in /home/SERVER-hoffmann/sync/04_DATAR/datar_shared/Unittest/datar_test)
==1203==
Test module initialized
. : successful assert, F : failed assert
--------------
Starting contact force spectrum test
==1203== Syscall param sched_setaffinity(mask) points to unaddressable byte(s)
==1203== at 0x6B6EFB9: syscall (in /lib64/libc-2.19.so)
==1203== by 0x40A0787: __kmp_affinity_determine_capable (in /global/linux/64_bit/opt/intel/mkl/10.2.2.025/lib/em64t/libiomp5.so)
==1203== by 0x4089F97: __kmp_env_initialize(char const*) (in /global/linux/64_bit/opt/intel/mkl/10.2.2.025/lib/em64t/libiomp5.so)
==1203== by 0x4081175: ??? (in /global/linux/64_bit/opt/intel/mkl/10.2.2.025/lib/em64t/libiomp5.so)
==1203== by 0x4074428: __kmp_get_global_thread_id_reg (in /global/linux/64_bit/opt/intel/mkl/10.2.2.025/lib/em64t/libiomp5.so)
==1203== by 0x407FF98: __kmp_parallel_initialize (in /global/linux/64_bit/opt/intel/mkl/10.2.2.025/lib/em64t/libiomp5.so)
==1203== by 0x406EC7D: omp_get_num_procs (in /global/linux/64_bit/opt/intel/mkl/10.2.2.025/lib/em64t/libiomp5.so)
==1203== by 0x54211FA: MKL_get_N_Cores (in /global/linux/64_bit/opt/intel/mkl/10.2.2.025/lib/em64t/libmkl_intel_thread.so)
==1203== by 0x5420CA8: mkl_read_threads_env (in /global/linux/64_bit/opt/intel/mkl/10.2.2.025/lib/em64t/libmkl_intel_thread.so)
==1203== by 0x5420694: mkl_serv_mkl_domain_get_max_threads (in /global/linux/64_bit/opt/intel/mkl/10.2.2.025/lib/em64t/libmkl_intel_thread.so)
==1203== by 0x57BB632: mkl_dft_commit_descriptor_d_c2c_1d_omp (in /global/linux/64_bit/opt/intel/mkl/10.2.2.025/lib/em64t/libmkl_intel_thread.so)
==1203== by 0x4F4279C: mkl_dft_dfti_commit_descriptor_external (in /global/linux/64_bit/opt/intel/mkl/10.2.2.025/lib/em64t/libmkl_intel_lp64.so)
==1203== Address 0x0 is not stack'd, malloc'd or (recently) free'd
==1203==
OMP: Warning #2: Cannot open message catalog "libiomp5.cat":
OMP: System error #2: No such file or directory
OMP: Hint: Check NLSPATH environment variable, its value is "/global/linux/64_bit/opt/intel/mkl/10.2.2.025/lib/em64t/locale/%l_%t/%N".
OMP: Info #3: Default messages will be used.
==2910==
==2910== HEAP SUMMARY:
==2910== in use at exit: 2,594 bytes in 8 blocks
==2910== total heap usage: 2,462 allocs, 2,454 frees, 7,276,535 bytes allocated
==2910==
==2910== LEAK SUMMARY:
==2910== definitely lost: 0 bytes in 0 blocks
==2910== indirectly lost: 0 bytes in 0 blocks
==2910== possibly lost: 0 bytes in 0 blocks
==2910== still reachable: 2,594 bytes in 8 blocks
==2910== suppressed: 0 bytes in 0 blocks
==2910== Reachable blocks (those to which a pointer was found) are not shown.
==2910== To see them, rerun with: --leak-check=full --show-leak-kinds=all
==2910==
==2910== For counts of detected and suppressed errors, rerun with: -v
==2910== ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 0 from 0)
Note that i'm compiling with MKL. It can be found that Valgrind gives false positives with MKL (everything that points to mkl libraries).
I'm struggling now to understand why it runs under Valgrind and not without it. Can you explain this to me?
Edit: I got rid of some messages, i updated the Valgrind output.
Update: Here's an example, where it also doesnt run under Valgrind. It comes down to some Memory Leak i think, but i can't solve it.
Code:
module math
real, parameter :: pi = 4*atan(1.0)
contains
function fft(x, DirectionKey)
use MKL_DFTI
implicit none
! Declaring part
complex, dimension(:) :: x
complex, dimension(size(x)) :: fft
type(DFTI_DESCRIPTOR), pointer :: DFTI
integer :: DirectionKey, Status
! Executing part
fft = 0
Status = DftiCreateDescriptor(DFTI,DFTI_DOUBLE, DFTI_COMPLEX, 1, size(x) )
Status = DftiCommitDescriptor(DFTI)
select case (DirectionKey)
case (1)
Status = DftiComputeForward(DFTI,x)
fft = x/size(x)
case (-1)
Status = DftiComputeBackward(DFTI,x)
fft = x
end select
Status = DftiFreeDescriptor(DFTI)
end function fft
! ============================================================================
subroutine NonlinearContactForcePoint(RelativeDisplacementPoint, NormalForcePoint, FricCoeff, Stiffness, ContactForceOUT, ContactForceJacobianOUT)
implicit none
! Declaring part
real, dimension(:), intent(in) :: RelativeDisplacementPoint
real, dimension(size(RelativeDisplacementPoint)), intent(in) :: NormalForcePoint
real, intent(in) :: FricCoeff, Stiffness
real, dimension(size(RelativeDisplacementPoint)) :: ContactForce
real, dimension(size(RelativeDisplacementPoint),2) :: ContactForcePrime
integer, dimension(size(RelativeDisplacementPoint)) :: ContactStatus
real, dimension(2), intent(out) :: ContactForceOUT
real, dimension(2,2), intent(out) :: ContactForceJacobianOUT
! Executing part
call ContactModel(RelativeDisplacementPoint, NormalForcePoint, FricCoeff, Stiffness, ContactForce, ContactStatus)
call ContactModelPrime(ContactStatus, Stiffness, ContactForcePrime)
call TimeToFourier(ContactForce, ContactForcePrime, 1, ContactForceOUT, ContactForceJacobianOUT)
end subroutine NonlinearContactForcePoint
! ============================================================================
subroutine ContactModel(Displacement, NormalForce, FrictionCoefficient, TangentialStiffness, ContactForce, ContactStatus)
implicit none
! Declaring part
real, dimension(:), intent(in) :: Displacement
real, dimension(size(Displacement)), intent(in) :: NormalForce
real, dimension(size(Displacement)) :: DisplacementSlider
real :: FrictionCoefficient, TangentialStiffness
real, dimension(size(Displacement)), intent(out) :: ContactForce
integer, dimension(size(Displacement)), intent(out) :: ContactStatus
integer :: ii
! Executing part
ContactForce = 0.0
ContactStatus = 0
DisplacementSlider = 0.0
do ii = 2,size(Displacement)
if (NormalForce(ii)==0) then
ContactForce(ii) = 0.0
DisplacementSlider(ii) = Displacement(ii)
ContactStatus(ii) = -1
else
! Slip in positive direction
if (TangentialStiffness*(Displacement(ii) - DisplacementSlider(ii-1))- FrictionCoefficient*NormalForce(ii) > 0) then
ContactForce(ii) = -FrictionCoefficient*NormalForce(ii);
DisplacementSlider(ii) = Displacement(ii) + ContactForce(ii)/TangentialStiffness;
ContactStatus(ii) = 1
! Slip in negative direction
elseif (-TangentialStiffness*(Displacement(ii) - DisplacementSlider(ii-1))- FrictionCoefficient*NormalForce(ii) > 0) then
ContactForce(ii) = FrictionCoefficient*NormalForce(ii);
DisplacementSlider(ii) = Displacement(ii) + ContactForce(ii)/TangentialStiffness;
ContactStatus(ii) = 1
! General stick
elseif (TangentialStiffness*(Displacement(ii) - DisplacementSlider(ii-1))- FrictionCoefficient*NormalForce(ii) <= 0) then
ContactForce(ii) = -TangentialStiffness*(Displacement(ii) - DisplacementSlider(ii-1));
DisplacementSlider(ii) = DisplacementSlider(ii-1);
ContactStatus(ii) = 0
end if
end if
end do
end subroutine ContactModel
! ============================================================================
subroutine TimeToFourier(TimeSeries, TimeSeriesPrime, Harmonics, RealCoefficients, RealJacobianCoefficients)
implicit none
! Declaring part
real, dimension(:), intent(in) :: TimeSeries
real, dimension(:,:), intent(in) :: TimeSeriesPrime
integer, intent(in) :: Harmonics
integer :: N
complex, dimension(1) :: Spectrum, SpectrumPrimeR, SpectrumPrimeI
real, dimension(2), intent(out) :: RealCoefficients
real, dimension(2,2), intent(out) :: RealJacobianCoefficients
! Executing part
N = size(TimeSeries)
Spectrum = CreateSpectrum(TimeSeries(N/2+1:N), Harmonics)
SpectrumPrimeR = CreateSpectrum(TimeSeriesPrime(N/2+1:N,1), Harmonics)
SpectrumPrimeI = CreateSpectrum(TimeSeriesPrime(N/2+1:N,2), Harmonics)
RealCoefficients = [real(Spectrum), -aimag(Spectrum)]
RealJacobianCoefficients = reshape([real(SpectrumPrimeR), -aimag(SpectrumPrimeR), real(SpectrumPrimeI), -aimag(SpectrumPrimeI)],[2,2])
end subroutine TimeToFourier
! ============================================================================
subroutine ContactModelPrime(ContactStatus, TangentialStiffness, Prime)
implicit none
! Declaring part
integer, dimension(:), intent(in) :: ContactStatus
real, dimension(size(ContactStatus)) :: dfdrQ, dfdiQ
real :: TangentialStiffness
real, dimension(size(ContactStatus),2) :: Prime
real :: dfdrQ_OLD, dfdiQ_OLD
integer :: ii
! Executing part
dfdrQ = 0.0
dfdiQ = 0.0
dfdrQ_OLD = 0.0
dfdiQ_OLD = 0.0
Prime = 0.0
do ii = 2,size(ContactStatus)
! Stick
if (ContactStatus(ii) == 0) then
dfdrQ(ii) = -TangentialStiffness*(cos(2*2*pi/size(ContactStatus)*(ii-1))-cos(2*2*pi/size(ContactStatus)*(ii-2))) + dfdrQ_OLD;
dfdiQ(ii) = -TangentialStiffness*(sin(2*2*pi/size(ContactStatus)*(ii-1))-sin(2*2*pi/size(ContactStatus)*(ii-2))) + dfdiQ_OLD;
end if
dfdrQ_OLD = dfdrQ(ii)
dfdiQ_OLD = dfdiQ(ii)
end do
Prime = reshape([dfdrQ, dfdiQ],[size(ContactStatus),2])
end subroutine ContactModelPrime
! ============================================================================
function CreateSpectrum(TimeSignal, Harmonics)
implicit none
! Declaring part
real, dimension(:), intent(in) :: TimeSignal
complex, dimension(size(TimeSignal)) :: Spectrum
integer, intent(in) :: Harmonics
complex, dimension(:), allocatable :: CreateSpectrum
integer :: ii
! Executing part
Spectrum = fft(cmplx(TimeSignal),1)
if (Harmonics == 0) then
allocate(CreateSpectrum(1))
CreateSpectrum = Spectrum(1)
else
allocate(CreateSpectrum(Harmonics))
do ii=1,Harmonics
CreateSpectrum = 2*Spectrum(Harmonics+1)
end do
end if
end function
end module math
! ============================================================================
program test
use MKL_RCI
use MKL_RCI_type
use math
implicit none
real, dimension(12,12) :: ContactForceJacobian
real, dimension(12) :: Displacement
external :: NonlinearContactForce
Displacement = [1.0e-5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]
if (djacobi(NonlinearContactForce,12,12,ContactForceJacobian,Displacement,1e-8) /= TR_SUCCESS) then
write(*,*), '| error in djacobi'
call MKL_FREE_BUFFERS
stop 1;
end if
end program test
! ============================================================================
subroutine NonlinearContactForce(m,n,RelativeDisplacement, ContactForce, ContactForceJacobian)
use math
implicit none
! Declaring part
integer :: m, n
real, dimension(12) :: RelativeDisplacement
real, dimension(6) :: RelativeDisplacementPoint
real, dimension(2) :: ContactForceXi
real, dimension(2,2) :: ContactForceJacobianXi
! Time domain variables
real, dimension(32) :: NormalForce
complex, dimension(32) :: RelativeDisplacementPointSpectrum
real, dimension(32) :: RelativeDisplacementPointTime
real, dimension(2) :: ContactForceLocal
real, dimension(2,2) :: ContactForceJacobianLocal
! Output variables
real, dimension(12), intent(out) :: ContactForce
real, dimension(12,12), intent(out) :: ContactForceJacobian
! Executing part
ContactForce(1:12) = 0
ContactForceJacobian(1:12,1:12) = 0
RelativeDisplacementPointTime = 0
NormalForce = 1
RelativeDisplacementPointSpectrum = 0
RelativeDisplacementPointSpectrum(3) = cmplx(RelativeDisplacement(1)/2.0,-RelativeDisplacement(2)/2.0)
RelativeDisplacementPointSpectrum(32-1) = cmplx(RelativeDisplacement(1)/2.0,RelativeDisplacement(2)/2.0)
RelativeDisplacementPointTime = real(fft(RelativeDisplacementPointSpectrum,-1))
call NonlinearContactForcePoint(RelativeDisplacementPointTime, NormalForce,1.0, 1e6, ContactForceLocal, ContactForceJacobianLocal)
ContactForce(1:2) = ContactForceLocal
ContactForceJacobian(1:2,1:2) = ContactForceJacobianLocal
end subroutine NonlinearContactForce
Compile command (ifort 17.0.1):
ifort -g -O0 -CB -CA -CU -fstack-protector -fp-stack-check -traceback -real-size 64 -mkl -I${MKL_ROOT}/include test.f90
MKL version: 2017
Related
I use Intel Visual Fortran, both IVF2013 and IVF2019. When using allocatable arrays, the program is much slower than the one using static memory allocation. That is to say, if I change from
Method 1: by using fixed array
do i = 1, 1000
call A
end do
subroutine A
real(8) :: x(30)
do things
end subroutine A
to something like
Method 2: by using allocatable arrays
module module_size_is_defined
n = 30
end module
do i = 1, 1000
call A
end do
subroutine A
use module_size_is_defined
real(8), allocatable :: x(:)
allocate(x(n))
do things
end subroutine A
The code is much slower. For my code, the static allocation takes 1 minutes 30 seconds while the dynamic allocation takes 2 minutes and 30 seconds. Then, I thought is might because that the allocation action was run takes too much time as it is in the loop, then I tried following two methods:
Method 3: by using the module to allocate the array only once
module module_x_is_allocated
n = 30
allocat(x(n))
end module
do i = 1, 1000
call A
end do
subroutine A
use module_x_is_allocated
do things
end subroutine A
Method 4: by using automatic array
module module_size_is_defined
n = 30
end module
do i = 1, 1000
call A
end do
subroutine A
use module_size_is_defined
real(x) :: x(n)
do things
end subroutine A
Both Method 3 and Method 4 take almost the same time of the one using dynamic allocated array Method 2. Both around 2 mins 30s. All cases are compiling with same optimization. I tried IVF 2013 and IVF 2019, and same results. I don't know why. Especially for Method 3, although the allocate is only run once, it still takes the same time. It seems that dynamic allocated array is stored at the place that is slower than the static allocated array, and allocation does not take extra time (since method 2 and 3 take the same time).
Any ideas and suggestions that to allocate the arrays in a more efficient manner to reduce the performance penalty? Thanks.
!=========================================================================
Edit 1:
My program is too long to post here. Thus, I tried a few small codes. The results are a little bit strange. I tried three cases,
Method 1: takes 28.98s
module module_size_is_defined
implicit none
integer(4) :: n
end module
program main
use module_size_is_defined
implicit none
integer(4) :: i
real(8) :: y(50,50),z(50,50),t
n = 50
do i =1,50000
t=dble(i) * 2.0D0
call A(y,t)
z = z + y
end do
write(*,*) z(1,1)
end
subroutine A(y,t)
use module_size_is_defined
implicit none
real(8),intent(out):: y(n,n)
real(8),intent(in) :: t
integer(4) :: j
real(8) :: x(1,50)
y=0.0D0
do j = 1, 200
call getX(x,t,j)
y = y + matmul( transpose(x) + dble(j)**2, x )
end do
endsubroutine A
subroutine getX(x,t,j)
use module_size_is_defined
implicit none
real(8),intent(out) :: x(1,n)
real(8),intent(in) :: t
integer(4),intent(in) :: j
integer(4) :: i
do i =1, n
x(1,i) = dble(i+j) * t ** (1.5D00)
end do
endsubroutine getX
Method 2: takes 30.56s
module module_size_is_defined
implicit none
integer(4) :: n
end module
program main
use module_size_is_defined
implicit none
integer(4) :: i
real(8) :: y(50,50),z(50,50),t
n = 50
do i =1,50000
t=dble(i) * 2.0D0
call A(y,t)
z = z + y
end do
write(*,*) z(1,1)
end
subroutine A(y,t)
use module_size_is_defined
implicit none
real(8),intent(out):: y(n,n)
real(8),intent(in) :: t
integer(4) :: j
real(8),allocatable :: x(:,:)
allocate(x(1,n))
y=0.0D0
do j = 1, 200
call getX(x,t,j)
y = y + matmul( transpose(x) + dble(j)**2, x )
end do
endsubroutine A
subroutine getX(x,t,j)
use module_size_is_defined
implicit none
real(8),intent(out) :: x(1,n)
real(8),intent(in) :: t
integer(4),intent(in) :: j
integer(4) :: i
do i =1, n
x(1,i) = dble(i+j) * t ** (1.5D00)
end do
endsubroutine getX
Method 3: takes 78.72s
module module_size_is_defined
implicit none
integer(4) :: n
endmodule
module module_array_is_allocated
use module_size_is_defined
implicit none
real(8), allocatable,save :: x(:,:)
contains
subroutine init
implicit none
allocate(x(1,n))
endsubroutine
endmodule module_array_is_allocated
program main
use module_size_is_defined
use module_array_is_allocated
implicit none
integer(4) :: i
real(8) :: y(50,50),z(50,50),t
n = 50
call init
do i =1,50000
t=dble(i) * 2.0D0
call A(y,t)
z = z + y
end do
write(*,*) z(1,1)
end
subroutine A(y,t)
use module_size_is_defined
use module_array_is_allocated
implicit none
real(8),intent(out):: y(n,n)
real(8),intent(in) :: t
integer(4) :: j
y=0.0D0
do j = 1, 200
call getX(x,t,j)
y = y + matmul( transpose(x) + dble(j)**2, x )
end do
endsubroutine A
subroutine getX(x,t,j)
use module_size_is_defined
implicit none
real(8),intent(out) :: x(1,n)
real(8),intent(in) :: t
integer(4),intent(in) :: j
integer(4) :: i
do i =1, n
x(1,i) = dble(i+j) * t ** (1.5D00)
end do
endsubroutine getX
Now, with samller size problem, Method 1 and Method 2 is almost same time. But Method 3 should be better than Method 2, since it only allocate x(1,n) once. But it is much slower. But in my previous program, Method 2 gives almost the same time as Method 3. It is strange.
I complied in both Windows and Linux, with release setup, -O2 Optimization, with different version of IVF.
I try to call subroutine SPLEV of FITPACK library through two functions ('wer' and 'qwe') nested one into another (the code is below).
The following message appears under execution of compiled program:
QWE
Program received signal SIGSEGV: Segmentation fault - invalid memory reference.
Backtrace for this error:
0 0x7F3EE4BF3E08
1 0x7F3EE4BF2F90
2 0x7F3EE453A4AF
3 0x4041B6 in splev_
4 0x400BD0 in value.3386 at pr.f90:?
5 0x400A6B in MAIN__ at pr.f90:?
Ошибка сегментирования (сделан дамп памяти)
If I compile my program with flags -g -fbacktrace -fsanitize=address,zero,undefined the follow output message appears:
QWE
0.37051690837706980
Program received signal SIGSEGV: Segmentation fault - invalid memory reference.
Backtrace for this error:
0 0x7FAB5F45CE08
1 0x7FAB5F45BF90
2 0x7FAB5EDA34AF
3 0x4075F0 in splev_ at splev.f:73 (discriminator 2)
4 0x400DDE in value.3386 at pr.f90:87
5 0x400FFA in qwe.3406 at pr.f90:43
6 0x400F88 in wer.3403 at pr.f90:48
7 0x400D08 in MAIN__ at pr.f90:38
Ошибка сегментирования (сделан дамп памяти)
If I compile my program with flags -g -fbacktrace -Wall -fcheck=all the follow output message appears:
QWE
Program received signal SIGSEGV: Segmentation fault - invalid memory reference.
Backtrace for this error:
0 0x7F2BE6F0FE08
1 0x7F2BE6F0EF90
2 0x7F2BE68564AF
3 0x4075F0 in splev_ at splev.f:73 (discriminator 2)
4 0x400DDE in value.3386 at pr.f90:87
5 0x400C46 in MAIN__ at pr.f90:35
Ошибка сегментирования (сделан дамп памяти)
If I compile my program with flags -g -fbacktrace -fsanitize=address the follow output message appears:
QWE
ASAN:SIGSEGV
=================================================================
==4796==ERROR: AddressSanitizer: SEGV on unknown address 0x000000000000 (pc 0x000000408f67 bp 0x7ffe7a134440 sp 0x7ffe7a1341e0 T0)
0 0x408f66 in splev_ /home/yurchvlad/Science/Coll_Int/F90/f90DP/1/splev.f:73
1 0x40145d in value.3386 (/home/yurchvlad/Science/Coll_Int/F90/f90DP/1/curfit+0x40145d)
2 0x4011a3 in intcoll /home/yurchvlad/Science/Coll_Int/F90/f90DP/1/pr.f90:35
3 0x401849 in main /home/yurchvlad/Science/Coll_Int/F90/f90DP/1/pr.f90:2
4 0x7fcad9b3282f in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x2082f)
5 0x400d38 in _start (/home/yurchvlad/Science/Coll_Int/F90/f90DP/1/curfit+0x400d38)
AddressSanitizer can not provide additional info.
SUMMARY: AddressSanitizer: SEGV /home/yurchvlad/Science/Coll_Int/F90/f90DP/1/splev.f:73 splev_
==4796==ABORTING
Firstly I will show the code and then I will give some information about subroutines CURFIT and SPLEV of library FITPACK which are playing there a principal role.
Here is my code. This is just a test program, i.e. it is not confusion, that I interpolate there array of values of analytical function.
PROGRAM IntColl
USE Constants
IMPLICIT NONE
INTEGER :: i, nen ! i = counter
! nen, nmn, ne is sirvice variables, which
! appear on exit of CURFIT and needed on entry
! of SPLEV and SPLINT
REAL(DP) :: foo
REAL(DP) :: MOM1 ! dimensionless neutrino momentum
REAL(DP) :: dmg ( 1 : 2 * NG) ! dimensionless momentum grid
REAL(DP) :: endf( 1 : 2 * NG) ! electron neutrino distribution function
! muon neutrino distribution function
! electron and positron distribution function
REAL(DP) :: ten ( 1 : 2 * NG + k + 1) ! service arrays:
! ten is array arising on exit of working of CURFIT
! and contain knots of the spline (for endf, mndf and edf correspondingly).
REAL(DP) :: cen ( 1 : 2 * NG + k + 1) ! needed on entry of SPLEV and SPLINT
! cen appear on exit of CURFIT, contain coefficients of spline
! (for endf, mndf and edf correspondingly) and needed on entry of SPLEV and SPLINT.
REAL(DP) :: w ( 1 : 2 * NG + k + 1) ! w is array of weights for points on entry of CURFIT.
DO i = 1, 2 * NG
dmg(i) = i / 10.D+00 ! filling arrays to give their
endf(i) = eq_nu_di_fu(dmg(i)) ! on entry into subroutine
w(i) = 1.d+00 ! CURFIT
END DO
MOM1 = .53D+00
PRINT *, 'QWE'
CALL spline(dmg, endf, nen, ten, cen)
foo = value(MOM1, ten, nen, cen)
PRINT *, foo
PRINT *, wer(MOM1)
CONTAINS
REAL(DP) FUNCTION qwe(q) ! qwe and wer is "wrappers" for using
REAL(DP) :: q ! of subroutines spline > curfit
qwe = value(q, ten, nen, cen) ! in main program
END FUNCTION qwe
REAL(DP) FUNCTION wer(q)
REAL(DP) :: q
wer = qwe(q)
END FUNCTION wer
SUBROUTINE spline(x, y, n, t, c) ! spline is "hand-made wrapper" for
IMPLICIT NONE ! more convenient using of subroutine
! CURFIT in main program
INTEGER :: m, nest, n, lwrk, ier
INTEGER, PARAMETER :: iopt = 0
INTEGER :: iwrk( 1 : 10 * NG )
REAL(DP) :: xb, xe, fp
REAL(DP) :: wrk( 1 : 2 * NG * (k + 1) + (2 * NG + k + 1) * (7 + 3 * k) )
REAL(DP) :: x( 1 : 2 * NG), y(1: 2 * NG )
REAL(DP) :: t( 1 : 2 * NG + k + 1 )
REAL(DP) :: c( 1 : 2 * NG + k + 1 )
xb = 0.d+00
xe = x(2 * NG)
m = 2 * NG
nest = m + k + 1
lwrk = 2 * NG * (k + 1) + nest * (7 + 3 * k)
CALL curfit(iopt, m, x, y, w, xb, xe, k, s, nest, n, t, c, fp, wrk, lwrk, iwrk, ier)
END SUBROUTINE spline
REAL(DP) FUNCTION value(q, t, n, c) ! value is "hand-made wrapper" for
IMPLICIT NONE ! more convenient using of subroutine
! SPLEV in main program
INTEGER :: n, ier ! SPLEV should work only after
INTEGER, PARAMETER :: m = 1 ! CURFIT edned its working
REAL(DP) :: q
REAL(DP) :: t( 1 : 2 * NG + k + 1 )
REAL(DP) :: c( 1 : 2 * NG + k + 1 )
REAL(DP) :: ddmg(1), sddmg(1)
ddmg(1) = q
CALL splev(t, n, c, k, ddmg, sddmg, m, ier)
value = sddmg(1)
END FUNCTION value
REAL(DP) FUNCTION eq_nu_di_fu(y) ! eq_nu_di_fy givev values for array
IMPLICIT NONE ! to interpolate
REAL(DP) :: y
eq_nu_di_fu = 1 / (EXP(y) + 1)
END FUNCTION eq_nu_di_fu
END PROGRAM IntColl
The module Constants is there:
MODULE CONSTANTS
INTEGER, PARAMETER :: DP = SELECTED_REAL_KIND(15, 307)
INTEGER, PARAMETER :: NG = 200 ! NUMBER OF KNOTS OF GRID
INTEGER , PARAMETER :: K = 3 ! THE ORDER OF SPLINE
REAL(DP), PARAMETER :: S = 0.D+00 ! CUBIC SPLINE SMOOTHING FACTOR
END MODULE
Now, subroutines CURFIT and SPLEV appearing in above code with all their dependensies are in follow sources:
https://github.com/jbaayen/fitpackpp/tree/master/fitpack
where these subroutines are in double precision
and
http://www.netlib.org/dierckx/
where these subroutines are in single precision.
It is very important to mention that with single precision above scheme works!
Of course, if I use subroutines of single precision I modify all the types of all variables in corrisponding way.
What else have I observed:
straightforward using of FUNCTION value works.
If the line
PRINT *, 'QWE'
of the main program is commented, the value 'foo' also is not printed.
I have some a problem with my main code, so I tried to isolate the problem.
Therefore, I have this small code :
MODULE Param
IMPLICIT NONE
integer, parameter :: dr = SELECTED_REAL_KIND(15, 307)
integer :: D =3
integer :: Q=10
integer :: mmo=16
integer :: n=2
integer :: x=80
integer :: y=70
integer :: z=20
integer :: tMax=8
END MODULE Param
module m
contains
subroutine compute(f, r)
USE Param, ONLY: dr, mmo, x, y, z, n
IMPLICIT NONE
real (kind=dr), intent(in) :: f(x,y,z, 0:mmo, n)
real (kind=dr), intent(out) :: r(x, y, z, n)
real (kind=dr) :: fGlob(x,y,z, 0:mmo)
!-------------------------------------------------------------------------
print*, 'We are in compute subroutine'
r= 00.0
fGlob=sum(f,dim=5)
r=sum(f, dim=4)
print*, 'fGlob=', fGlob(1,1,1, 1)
print*, 'f=', f(1,1,1, 0,1)
print*, 'r=', r(1,1,1, 1)
end subroutine compute
end module m
PROGRAM test_prog
USE Param
USE m
Implicit None
integer :: tStep
real (kind=dr), dimension(:,:,:, :,:), allocatable :: f
real (kind=dr), dimension(:,:,:,:), allocatable :: r
!----------------------------------------------------------------------------
! Initialise the parameters.
print*, 'beginning of the test'
! Allocate
allocate(f(x,y,z, 0:mmo,n))
allocate(r(x,y,z, n))
f=1.0_dr
! ---------------------------------------------------------
! Iteration over time
! ---------------------------------------------------------
do tStep = 1, tMax
print *, tStep
call compute(f,r)
f=f+1
print *, 'tStep', tStep
enddo
print*, 'f=', f(1,1,1, 0,1)
print*, 'r=', r(1,1,1, 1)
! Deallacation
deallocate(f)
deallocate(r)
print*, 'End of the test program'
END PROGRAM test_prog
For now, I am not able to understand why when I compile with ifort, I have a segmentation fault, and it works when I compile with gfortran. And worst, when I compile with both ifort and gfortran with their fast options, I get again a segmentation fault (core dumped) error. And more confusing, when I also tried with both compilers to compile with traceback options, everything works fine.
I know that segmentation fault (core dumped) error usually means that I try to read or write in a wrong location (matrix indices etc...); but here with this small code, I see no mistake like this.
Does anyone can help me to understand why theses errors occur?
The problem comes from the size of the stack used by some compilers by default (ifort) or by some others when they optimise the compilation (gfortran -Ofast). Here, our writings exceed the size of the stack.
To solve this, I use the options -heap-arrays for ifort compiler and -fno-stack-arrays for gfortran compiler.
I'm trying to use the MKL trust region algorithm to solve a nonlinear system of equations in a Fortran program. I started from the example provided online (ex_nlsqp_f90_x.f90 https://software.intel.com/en-us/node/501498) and everything works correctly. Now, because I have to use this in a much bigger program, I need the user defined objective function to be loaded from a separate module. Hence, I split the example into 2 separate files, but I'm not able to make it compile correctly.
So here is the code for module which contains user defined data structure and the objective function
module modFun
implicit none
private
public my_data, extended_powell
type :: my_data
integer a
integer sum
end type my_data
contains
subroutine extended_powell (m, n, x, f, user_data)
implicit none
integer, intent(in) :: m, n
real*8 , intent(in) :: x(n)
real*8, intent(out) :: f(m)
type(my_data) :: user_data
integer i
user_data%sum = user_data%sum + user_data%a
do i = 1, n/4
f(4*(i-1)+1) = x(4*(i-1)+1) + 10.0 * x(4*(i-1)+2)
f(4*(i-1)+2) = 2.2360679774998 * (x(4*(i-1)+3) - x(4*(i-1)+4))
f(4*(i-1)+3) = ( x(4*(i-1)+2) - 2.0 * x(4*(i-1)+3) )**2
f(4*(i-1)+4) = 3.1622776601684 * (x(4*(i-1)+1) - x(4*(i-1)+4))**2
end do
end subroutine extended_powell
end module modFun
and here the portion of the main program calling it
include 'mkl_rci.f90'
program EXAMPLE_EX_NLSQP_F90_X
use MKL_RCI
use MKL_RCI_type
use modFun
! user's objective function
! n - number of function variables
! m - dimension of function value
integer n, m
parameter (n = 4)
parameter (m = 4)
! precisions for stop-criteria (see manual for more details)
real*8 eps(6)
real*8 x(n)
real*8 fjac(m*n)
! number of iterations
integer fun
! Additional users data
type(my_data) :: m_data
m_data%a = 1
m_data%sum = 0
rs = 0.0
fun = djacobix(extended_powell,n,m,fjac,x,eps(1),%val(loc(m_data)))
end program EXAMPLE_EX_NLSQP_F90_X
Also djacobix code
INTERFACE
INTEGER FUNCTION DJACOBIX(fcn, n, m, fjac, x, eps, user_data)
USE, INTRINSIC :: ISO_C_BINDING
INTEGER, INTENT(IN) :: n
INTEGER, INTENT(IN) :: m
DOUBLE PRECISION, INTENT(IN) :: eps
DOUBLE PRECISION, INTENT(IN), DIMENSION(*) :: x
DOUBLE PRECISION, INTENT(OUT), DIMENSION(m, *) :: fjac
INTEGER(C_INTPTR_T) :: user_data
INTERFACE
SUBROUTINE fcn(m, n, x, f, user_data)
USE, INTRINSIC :: ISO_C_BINDING
INTEGER, INTENT(IN) :: n
INTEGER, INTENT(IN) :: m
DOUBLE PRECISION, INTENT(IN), DIMENSION(*) :: x
DOUBLE PRECISION, INTENT(OUT), DIMENSION(*) :: f
INTEGER(C_INTPTR_T), INTENT(IN) :: user_data
END SUBROUTINE
END INTERFACE
END FUNCTION
END INTERFACE
When i compile the following errors are generated:
mpiifort -g -t -mkl -I/apps/rhel6/intel/composer_xe_2015.3.187/mkl/include/intel64/lp64 -c modFun.f90
mpiifort -g -t -mkl -I/apps/rhel6/intel/composer_xe_2015.3.187/mkl/include/intel64/lp64 -c main.f90
main.f90(30): error #7065: The characteristics of dummy argument 5 of the associated actual procedure differ from the characteristics of dummy argument 5 of the dummy procedure. [EXTENDED_POWELL]
fun = djacobix(extended_powell,n,m,fjac,x,eps(1),%val(loc(m_data)))
-------------------^
I have the feeling I have to create an interface to override the check on the m_data, but I can't figure out where and how. Can anyone help me with this problem providing a working example?
I guess the reason is that the function djacobix passes the pointer instead of the true value of variable user_data.
You can check the manual at https://software.intel.com/content/www/us/en/develop/documentation/onemkl-developer-reference-c/top/nonlinear-optimization-problem-solvers/jacobian-matrix-calculation-routines/jacobix.html where a sentence shows that "You need to declare fcn as extern in the calling program."
I'm struggling with LAPACK's dgetrf and dgetri routines. Below is a subroutine I've created (the variable fit_coeffs is defined externally and is allocatable, it's not the problem). When I run I get memory allocation errors, that appear when I assign fit_coeffs, due to the matmul(ATA,AT) line. I know this from inserting a bunch of print statements. Also, both error checking statements after calls to LAPACK subroutines are printed, suggesting an error.
Does anyone understand where this comes from? I'm compiling using the command:
gfortran -Wall -cpp -std=f2003 -ffree-form -L/home/binningtont/lapack-3.4.0/ read_grib.f -llapack -lrefblas.
Thanks in advance!
subroutine polynomial_fit(x_array, y_array, D)
integer, intent(in) :: D
real, intent(in), dimension(:) :: x_array, y_array
real, allocatable, dimension(:,:) :: A, AT, ATA
real, allocatable, dimension(:) :: work
integer, dimension(:), allocatable :: pivot
integer :: l, m, n, lda, lwork, ok
l = D + 1
lda = l
lwork = l
allocate(fit_coeffs(l))
allocate(pivot(l))
allocate(work(l))
allocate(A(size(x_array),l))
allocate(AT(l,size(x_array)))
allocate(ATA(l,l))
do m = 1,size(x_array),1
do n = 1,l,1
A(m,n) = x_array(m)**(n-1)
end do
end do
AT = transpose(A)
ATA = matmul(AT,A)
call dgetrf(l, l, ATA, lda, pivot, ok)
! ATA is now represented as PLU (permutation, lower, upper)
if (ok /= 0) then
write(6,*) "HERE"
end if
call dgetri(l, ATA, lda, pivot, work, lwork, ok)
! ATA now contains the inverse of the matrix ATA
if (ok /= 0) then
write(6,*) "HERE"
end if
fit_coeffs = matmul(matmul(ATA,AT),y_array)
deallocate(pivot)
deallocate(fit_coeffs)
deallocate(work)
deallocate(A)
deallocate(AT)
deallocate(ATA)
end subroutine polynomial_fit
1) Where is fit_coeffs declared? I can't see how the above can even compile
1b) Implicit None is your friend!
2) You do have an interface in scope at the calling point, don't you?
3) dgertf and dgetri want "double precision" while you have single. So you need sgetrf and sgetri
"Fixing" all these and completeing the program I get
Program testit
Implicit None
Real, Dimension( 1:100 ) :: x, y
Integer :: D
Interface
subroutine polynomial_fit(x_array, y_array, D)
Implicit None ! Always use this!!
integer, intent(in) :: D
real, intent(in), dimension(:) :: x_array, y_array
End subroutine polynomial_fit
End Interface
Call Random_number( x )
Call Random_number( y )
D = 6
Call polynomial_fit( x, y, D )
End Program testit
subroutine polynomial_fit(x_array, y_array, D)
Implicit None ! Always use this!!
integer, intent(in) :: D
real, intent(in), dimension(:) :: x_array, y_array
real, allocatable, dimension(:,:) :: A, AT, ATA
real, allocatable, dimension(:) :: work, fit_coeffs
integer, dimension(:), allocatable :: pivot
integer :: l, m, n, lda, lwork, ok
l = D + 1
lda = l
lwork = l
allocate(fit_coeffs(l))
allocate(pivot(l))
allocate(work(l))
allocate(A(size(x_array),l))
allocate(AT(l,size(x_array)))
allocate(ATA(l,l))
do m = 1,size(x_array),1
do n = 1,l,1
A(m,n) = x_array(m)**(n-1)
end do
end do
AT = transpose(A)
ATA = matmul(AT,A)
call sgetrf(l, l, ATA, lda, pivot, ok)
! ATA is now represented as PLU (permutation, lower, upper)
if (ok /= 0) then
write(6,*) "HERE"
end if
call sgetri(l, ATA, lda, pivot, work, lwork, ok)
! ATA now contains the inverse of the matrix ATA
if (ok /= 0) then
write(6,*) "HERE"
end if
fit_coeffs = matmul(matmul(ATA,AT),y_array)
deallocate(pivot)
deallocate(fit_coeffs)
deallocate(work)
deallocate(A)
deallocate(AT)
deallocate(ATA)
end subroutine polynomial_fit
This runs to completion. If I omit the interface I get "HERE" printed twice. If I use the d versions I get seg faults.
Does this answer your question?