Carlie Coats wrote:
> But the biggest improvement I would like for F9x compilers is
> to be able to control the argument-passing mechanism. Or at
> least, diagnostic listings that _document_ which mechanism is
> being used, on a case-by-case basis. It takes too damned long
> to have to decipher the assembly listing for every case of
> potential significance.
Some compilers do better than others. You will find below a snippet
that gives some ideas of the compiler performance related to argument
passing. I added a small shell script that may be helpful to run this
small benchmark under Unix.
Interestingly enough, compilers made some real progress in the last
three years. On Unix, most of them shows good results. On Windows,
the situation looked less good last time I tried.
Regards,
Arnaud
! tab2.f90
!-------------------------------------------------------------------------- ---
! Test program to show the difference of treatment between a "regular
array"
! and an array in a derived type
!
! This snippet derives from the experience gained during a port of a
large
! production system (http://www.telemac-system.com).
!
! Originally issued by Jean-Michel Hervouet <j-m.hervo...@edf.fr>
! Modified by Arnaud Desitter - Nag Ltd. - <arnaud.desit...@nag.co.uk>
!
! use separate compilation to avoid inlining
! typical example on Unix:
! f95 -O -c tab1.f90; f95 -O -c tab2.f90; f95 -O tab1.o tab2.o; ./a.out
!-------------------------------------------------------------------------- ---
PROGRAM TEST
USE BIEF_DEF
USE TIMING
IMPLICIT NONE
INTEGER :: I
INTEGER, PARAMETER :: NPOIN=100000, N=1000
double precision :: t1, t2
double precision :: timings(4)
TYPE(BIEF_OBJ), TARGET :: A_STRUCT ,B_STRUCT
DOUBLE PRECISION,ALLOCATABLE :: A_NORMAL(:),B_NORMAL(:)
!
ALLOCATE(A_STRUCT%R(NPOIN))
ALLOCATE(B_STRUCT%R(NPOIN))
ALLOCATE(A_NORMAL(NPOIN))
ALLOCATE(B_NORMAL(NPOIN))
DO I=1,NPOIN
B_STRUCT%R(I) = 1.D0
B_NORMAL(I) = 1.D0
ENDDO
call TIME_IN_SECONDS(T1)
DO I=1,N
CALL OV(A_NORMAL,B_NORMAL,NPOIN)
ENDDO
call TIME_IN_SECONDS(T2)
timings(1)=(T2-T1)
call TIME_IN_SECONDS(T1)
DO I=1,N
CALL OV(A_STRUCT%R,B_STRUCT%R,NPOIN)
ENDDO
call TIME_IN_SECONDS(T2)
timings(2)=(T2-T1)
call TIME_IN_SECONDS(T1)
DO I=1,N
CALL OV_sh(A_NORMAL,B_NORMAL,NPOIN)
ENDDO
call TIME_IN_SECONDS(T2)
timings(3)=(T2-T1)
call TIME_IN_SECONDS(T1)
DO I=1,N
CALL OV_sh(A_STRUCT%R,B_STRUCT%R,NPOIN)
ENDDO
call TIME_IN_SECONDS(T2)
timings(4)=(T2-T1)
!
timings=timings/timings(1)
write(unit=*,fmt=10) timings(2:4)
10 format(&
1x,"Abstraction penality versus regular array + assumed size",&
" dummy argument",/,&
1x,"(1 means no penalty)",/,&
1x,"Derived type + assumed size : ",F8.3,/,&
1x,"Regular array + assumed shape: ",F8.3,/,&
1x,"Derived type + assumed shape : ",F8.3)
END PROGRAM TEST
! tab1.f90
MODULE BIEF_DEF
IMPLICIT NONE
!
TYPE BIEF_OBJ
DOUBLE PRECISION, POINTER,DIMENSION(:)::R
END TYPE BIEF_OBJ
contains
SUBROUTINE OV(X,Y,NPOIN)
!
IMPLICIT NONE
INTEGER, INTENT(IN) :: NPOIN
DOUBLE PRECISION, INTENT(IN) :: Y(NPOIN)
DOUBLE PRECISION, INTENT(INOUT) :: X(NPOIN)
INTEGER :: I
!
DO I=1,NPOIN
X(I) = 2.D0*Y(I)
ENDDO
!
RETURN
END SUBROUTINE OV
SUBROUTINE OV_sh(X,Y,NPOIN)
!
IMPLICIT NONE
INTEGER, INTENT(IN) :: NPOIN
DOUBLE PRECISION, INTENT(IN) :: Y(:)
DOUBLE PRECISION, INTENT(INOUT) :: X(:)
INTEGER :: I
!
DO I=1,NPOIN
X(I) = 2.D0*Y(I)
ENDDO
!
RETURN
END SUBROUTINE OV_sh
END MODULE BIEF_DEF
MODULE TIMING
CONTAINS
subroutine TIME_IN_SECONDS ( t )
IMPLICIT NONE
!
double precision, intent(out) :: T
INTEGER :: TEMPS,PARSEC
intrinsic dble
!
CALL SYSTEM_CLOCK(COUNT=TEMPS,COUNT_RATE=PARSEC)
T = dble(TEMPS) / PARSEC
!
RETURN
END subroutine TIME_IN_SECONDS
END MODULE TIMING
go_test:
#! /bin/sh
compile_run(){
echo "======================"
echo "******* build *******"
(
set -x
${FC} ${FFLAGS} -c tab1.${SUFF}
${FC} ${FFLAGS} -c tab2.${SUFF}
${FC} ${FFLAGS} tab1.o tab2.o
) 2>&1
echo "********* run ********"
./a.out
echo "**********************"
echo
}
exe_test(){
compile_run
}
clean(){
rm -f a.out
rm -f *.o
rm -f *.mod *.MOD *.M
}
go_nagf95(){
FC='f95 -w'
VERSIONFLAGS='-V'
echo Platform: `uname`
${FC} ${VERSIONFLAGS} 2>&1
SUFF='f90'
FFLAGS='-O4'
exe_test
FFLAGS='-O4 -Oassumed=contig'
exe_test
}
go_hpf90(){
FC='f90'
VERSIONFLAGS='+version'
echo Platform: `uname`
${FC} ${VERSIONFLAGS} 2>&1
SUFF='f90'
FFLAGS='+O2'
exe_test
}
go_lx_sgif90(){
FC='sgif90'
VERSIONFLAGS='-version'
echo Platform: `uname`
${FC} ${VERSIONFLAGS} 2>&1
SUFF='f90'
FFLAGS='-O2'
exe_test
}
go_sgif90(){
FC='f90'
VERSIONFLAGS='-version'
echo Platform: `uname`
${FC} ${VERSIONFLAGS} 2>&1
SUFF='f90'
FFLAGS='-O2'
exe_test
}
go_duxf90(){
FC='f90'
VERSIONFLAGS='-version'
echo Platform: `uname`
${FC} ${VERSIONFLAGS} 2>&1
SUFF='f90'
FFLAGS='-O3'
exe_test
}
go_sunf90(){
FC='f90'
VERSIONFLAGS='-V'
echo Platform: `uname`
${FC} ${VERSIONFLAGS} 2>&1
SUFF='f90'
FFLAGS='-O3'
exe_test
}
go_xlf90(){
FC='xlf'
echo Platform: `uname`
lslpp -l | grep Fortran
ln -s tab1.f90 tab1.f 2> /dev/null
ln -s tab2.f90 tab2.f 2> /dev/null
SUFF='f'
FFLAGS='-qfree=f90 -O'
exe_test
rm -f tab1.f tab2.f
}
case `uname` in
HP*)
go_hpf90
;;
IRIX*)
go_sgif90
;;
OSF1)
go_duxf90
;;
Linux)
go_nagf95
case `uname -m` in
ia64)
go_lx_sgif90
;;
esac
;;
SunOS)
go_sunf90
;;
AIX)
go_xlf90
;;
esac
clean