Hi all. I was reading the discussion on
Why is the Fortran intrinsic function "spread" often slower than explicit iteration
https://stackoverflow.com/questions/55717904/why-is-the-fortran-intrinsic-function-spread-often-slower-than-explicit-iterat
and wanted to try it myself. I modified the code provided by Steve Lionel in the answer (see the end of this message) and got the following timings (compiled using gfortran -O3, but similar results with -O0, version 9.4.0)
Iteration 1 4.42133093
Spread 1 0.309983253
Iteration 3 0.212992191
Spread 3 0.917150021
So it seems that the answer to the question should you use spread or do is it depends on the dimension that you are iterating over. Is there a consistent optimal code for performing these type of calculations?
-------------------------------------------------------------------
module benchmarks
implicit none
integer, parameter :: n=500
integer :: j
real :: d2(n,n)
real :: d3(n,n,n)
contains
! Iteration 1
subroutine benchmark_i1(res,n)
integer n
real, intent(out) :: res(n,n,n)
do j = 1, n
res(j,:,:) = d2*d3(j,:,:)
end do
end subroutine
! Spread 1
subroutine benchmark_s1(res,n)
integer n
real, intent(out) :: res(n,n,n)
res = d3*spread(d2, 1, n)
end subroutine
!Iteration 3
subroutine benchmark_i3(res,n)
integer n
real, intent(out) :: res(n,n,n)
do j = 1, n
res(:,:,j) = d2*d3(:,:,j)
end do
end subroutine
! Spread 3
subroutine benchmark_s3(res,n)
integer n
real, intent(out) :: res(n,n,n)
res = d3*spread(d2, 3, n)
end subroutine
end module
program main
use benchmarks
real :: tstart,tend
real :: res(n,n,n)
call random_number(d2)
call random_number(d3)
! Iteration
call cpu_time(tstart)
call benchmark_i1(res,n)
call cpu_time(tend)
write(*,*) 'Iteration 1', tend-tstart, sum(res)
! Spread
call cpu_time(tstart)
call benchmark_s1(res,n)
call cpu_time(tend)
write(*,*) 'Spread 1', tend-tstart, sum(res)
! Iteration
call cpu_time(tstart)
call benchmark_i3(res,n)
call cpu_time(tend)
write(*,*) 'Iteration 3', tend-tstart, sum(res)
! Spread
call cpu_time(tstart)
call benchmark_s3(res,n)
call cpu_time(tend)
write(*,*) 'Spread 3', tend-tstart, sum(res)
end program