<
robert....@oracle.com> wrote in message
news:2a1c2927-a22b-4eb0...@z5g2000yqj.googlegroups.com...
> I apologize for posting C code to this group.
I thought the point of the thread was to come up with high-level
constructs to make the original code simpler in some sense.
However, it seems to have devolved into an attempt at optimization.
There have been problems with this in that the attempts have been
written in C, the timing code was inappropriate because it didn't
attempt to strain branch prediction and the attempts were all
slow. Therefore we attempt to remedy the situation:
module funcs
use ISO_C_BINDING
implicit none
contains
function ilog10_1(x) bind(C,name='ilog10_1') result(count)
integer(C_INT64_T), value :: x
integer(C_INT64_T) count
count = 1
if (x >= 10000000000000000_C_INT64_T) then
count = count + 16
x = x / 100000000
end if
if (x >= 100000000) then
count = count + 8
x = x / 100000000
end if
if (x >= 10000) then
count = count + 4
x = x / 10000
end if
if (x >= 100) then
count = count + 2
x = x / 100
end if
if (x >= 10) count = count + 1
end function ilog10_1
function ilog10_2(x) bind(C,name='ilog10_2')
integer(C_INT64_T), value :: x
integer(C_INT64_T) ilog10_2
ilog10_2 = 0
if(x < 10_C_INT64_T) then
ilog10_2 = 1
return
end if
if(x < 100_C_INT64_T) then
ilog10_2 = 2
return
end if
if(x < 1000_C_INT64_T) then
ilog10_2 = 3
return
end if
if(x < 10000_C_INT64_T) then
ilog10_2 = 4
return
end if
if(x < 100000_C_INT64_T) then
ilog10_2 = 5
return
end if
if(x < 1000000_C_INT64_T) then
ilog10_2 = 6
return
end if
if(x < 10000000_C_INT64_T) then
ilog10_2 = 7
return
end if
if(x < 100000000_C_INT64_T) then
ilog10_2 = 8
return
end if
if(x < 1000000000_C_INT64_T) then
ilog10_2 = 9
return
end if
if(x < 10000000000_C_INT64_T) then
ilog10_2 = 10
return
end if
if(x < 100000000000_C_INT64_T) then
ilog10_2 = 11
return
end if
if(x < 1000000000000_C_INT64_T) then
ilog10_2 = 12
return
end if
if(x < 10000000000000_C_INT64_T) then
ilog10_2 = 13
return
end if
if(x < 100000000000000_C_INT64_T) then
ilog10_2 = 14
return
end if
if(x < 1000000000000000_C_INT64_T) then
ilog10_2 = 15
return
end if
if(x < 10000000000000000_C_INT64_T) then
ilog10_2 = 16
return
end if
if(x < 100000000000000000_C_INT64_T) then
ilog10_2 = 17
return
end if
if(x < 1000000000000000000_C_INT64_T) then
ilog10_2 = 18
return
end if
ilog10_2 = 19
return
end function ilog10_2
function ilog10_3(x) bind(C,name='ilog10_3')
integer(C_INT64_T), value :: x
integer(C_INT64_T) ilog10_3
if (x < 100000000_C_INT64_T) then
if (x < 10000_C_INT64_T) then
if (x < 100_C_INT64_T) then
if (x < 10_C_INT64_T) then
ilog10_3 = 1
return
else
ilog10_3 = 2
return
end if
else if (x < 1000_C_INT64_T) then
ilog10_3 = 3
return
else
ilog10_3 = 4
return
end if
else if (x < 1000000_C_INT64_T) then
if (x < 100000_C_INT64_T) then
ilog10_3 = 5
return
else
ilog10_3 = 6
return
end if
else if (x < 10000000_C_INT64_T) then
ilog10_3 = 7
return
else
ilog10_3 = 8
return
end if
else if (x < 10000000000000000_C_INT64_T) then
if (x < 1000000000000_C_INT64_T) then
if (x < 10000000000_C_INT64_T) then
if (x < 1000000000_C_INT64_T) then
ilog10_3 = 9
return
else
ilog10_3 = 10
return
end if
else if (x < 100000000000_C_INT64_T) then
ilog10_3 = 11
return
else
ilog10_3 = 12
return
end if
else if (x < 100000000000000_C_INT64_T) then
if (x < 10000000000000_C_INT64_T) then
ilog10_3 = 13
return
else
ilog10_3 = 14
return
end if
else if (x < 1000000000000000_C_INT64_T) then
ilog10_3 = 15
return
else
ilog10_3 = 16
return
end if
else if (x < 1000000000000000000_C_INT64_T) then
if (x < 100000000000000000_C_INT64_T) then
ilog10_3 = 17
return
else
ilog10_3 = 18
return
end if
else
ilog10_3 = 19
return
end if
end function ilog10_3
function ilog10_4(x) bind(C,name='ilog10_4')
integer(C_INT64_T), value :: x
integer(C_INT64_T) ilog10_4
integer(C_INT64_T) alternate
integer lz2
integer(C_INT8_T), parameter :: a(-1:32) = [&
19,19,19,18,17,17,16,16,15,14,14,13,13,12, &
11,11,10,10,9,8,8,7,7,6,5,5,4,4,3,2,2,1,1,0]
integer(C_INT64_T), parameter :: b(0:32) = [&
1000000000000000000_C_INT64_T,1000000000000000000_C_INT64_T,&
1000000000000000000_C_INT64_T, 100000000000000000_C_INT64_T,&
10000000000000000_C_INT64_T, 10000000000000000_C_INT64_T,&
1000000000000000_C_INT64_T, 1000000000000000_C_INT64_T,&
100000000000000_C_INT64_T, 10000000000000_C_INT64_T,&
10000000000000_C_INT64_T, 1000000000000_C_INT64_T,&
1000000000000_C_INT64_T, 100000000000_C_INT64_T,&
10000000000_C_INT64_T, 10000000000_C_INT64_T,&
1000000000_C_INT64_T, 1000000000_C_INT64_T,&
100000000_C_INT64_T, 10000000_C_INT64_T,&
10000000_C_INT64_T, 1000000_C_INT64_T,&
1000000_C_INT64_T, 100000_C_INT64_T,&
10000_C_INT64_T, 10000_C_INT64_T,&
1000_C_INT64_T, 1000_C_INT64_T,&
100_C_INT64_T, 10_C_INT64_T,&
10_C_INT64_T, 1_C_INT64_T, 0_C_INT64_T]
lz2 = ishft(leadz(x),-1)
ilog10_4 = a(lz2)
alternate = a(lz2-1)
if(x >= b(lz2)) ilog10_4 = alternate
end function ilog10_4
subroutine sub(a,n)
integer n
integer(C_INT64_T) a(n)
a = cshift(a,1)
end subroutine sub
end module funcs
module utils
use ISO_C_BINDING
implicit none
private
public utils_init, rdtsc
integer(C_INT8_T) BAD_STUFF(32)
data BAD_STUFF / &
Z'31', Z'C0', Z'40', Z'75', Z'0A', Z'0F', Z'31', Z'48', &
Z'C1', Z'E2', Z'20', Z'48', Z'09', Z'D0', Z'C3', Z'0F', &
Z'31', Z'C3', Z'90', Z'90', Z'90', Z'90', Z'90', Z'90', &
Z'90', Z'90', Z'90', Z'90', Z'90', Z'90', Z'90', Z'90'/
abstract interface
function rdtsc_template() bind(C)
use ISO_C_BINDING
implicit none
integer(C_INT64_T) :: rdtsc_template
end function rdtsc_template
end interface
procedure(rdtsc_template), pointer :: rdtsc => NULL()
interface
function VirtualAlloc(lpAddress, dwSize, flAllocationType, &
flProtect) bind(C, name = 'VirtualAlloc')
use ISO_C_BINDING
implicit none
!GCC$ ATTRIBUTES STDCALL :: VirtualAlloc
type(C_PTR) VirtualAlloc
type(C_PTR), value :: lpAddress
integer(C_SIZE_T), value :: dwSize
integer(C_LONG), value :: flAllocationType
integer(C_LONG), value :: flProtect
end function VirtualAlloc
function GetLastError() bind(C,name='GetLastError')
use ISO_C_BINDING
implicit none
!GCC$ ATTRIBUTES STDCALL :: GetLastError
integer(C_LONG) GetLastError
end function GetLastError
end interface
contains
subroutine utils_init()
type(C_PTR) address
integer(C_INTPTR_T) temp
integer(C_LONG) error
integer(C_INT8_T), pointer :: temp_ptr(:)
type(C_FUNPTR) fun_address
logical :: first = .TRUE.
if(.NOT.first) return
first = .FALSE.
address = VirtualAlloc(C_NULL_PTR, &
size(BAD_STUFF,kind=C_SIZE_T),int(Z'1000',C_LONG), &
int(Z'40',C_LONG))
if(.NOT.C_ASSOCIATED(address)) then
error = GetLastError()
write(*,'(a,z0,a)') "Error Z'",error,"' allocating memory"
stop
end if
call C_F_POINTER(address, temp_ptr, shape(BAD_STUFF))
temp_ptr = BAD_STUFF
temp = transfer(address, temp)
fun_address = transfer(temp, fun_address)
call C_F_PROCPOINTER(fun_address, rdtsc)
end subroutine utils_init
end module utils
program main
use funcs
use ISO_C_BINDING
use utils
implicit none
integer, parameter :: nmethod = 4
type(C_FUNPTR) cfp(4)
integer(C_INT64_T) pow2, pow10, n
integer col
procedure(ilog10_1), pointer :: ilog10
integer out(nmethod)
character(80) fmt
integer(C_INT64_T) ts1,ts2
integer,parameter :: ntrial = 6
integer(C_INT64_T) times(ntrial,nmethod)
integer, parameter :: ndata = 500
integer(C_INT64_T) data(ndata)
integer row, trial
integer nseed
integer, allocatable :: seed(:)
real(C_DOUBLE) harvest(3)
integer(C_INT64_T) chk(ntrial,nmethod)
integer(C_INT64_T) time(ntrial,nmethod)
integer(C_INT64_T) ndigit,lowbits
cfp(1) = C_FUNLOC(ilog10_1)
cfp(2) = C_FUNLOC(ilog10_2)
cfp(3) = C_FUNLOC(ilog10_3)
cfp(4) = C_FUNLOC(ilog10_4)
pow2 = 1
pow10 = 1
n = 0
write(fmt,'(a,i0,a)') '(a19,',nmethod,'(1x,i4),a)'
write(*,fmt) 'input\method',(col,col=1,nmethod)
write(fmt,'(a,i0,a)') '(i19,',nmethod,'(1x,i4),a)'
do
do col = 1, nmethod
call C_F_PROCPOINTER(cfp(col),ilog10)
out(col) = ilog10(n)
end do
write(*,fmt) n,out, &
trim(merge(' *',' ',any(out /= minval(out,1))))
if(n == huge(n)) then
exit
else if(mod(n,2_C_INT64_T) == 1) then
n = n+1
else if(pow10 == range(n)) then
n = 2**pow2-1
pow2 = pow2+1
else if(2**pow2 < 10**pow10) then
n = 2**pow2-1
pow2 = pow2+1
else
n = 10**pow10-1
pow10 = pow10+1
end if
end do
call utils_init
call random_seed()
call random_seed(size=nseed)
allocate(seed(nseed))
call random_seed(get=seed)
ts1 = rdtsc()
seed = ieor(int(seed,C_INT64_T),ts1)
call random_seed(put=seed)
do trial = 1, ntrial
call random_number(harvest)
ndigit = 19*harvest(1)+1
ndigit = min(19,ndigit)
if(ndigit == 19) then
data(trial) = harvest(2)*(huge(ndigit)-10**(ndigit-1))
data(trial) = min(huge(ndigit)-10**(ndigit-1),data(trial))
data(trial) = 10**(ndigit-1)+data(trial)
lowbits = harvest(3)*2_C_INT64_T**30
data(trial) = ieor(data(trial),lowbits)
else
data(trial) = harvest(2)*(10**ndigit-10**(ndigit-1))
data(trial) = 10**(ndigit-1)+data(trial)
lowbits = harvest(3)*2_C_INT64_T**30
if(2_C_INT64_T**30 < data(trial)) then
data(trial) = ieor(data(trial),lowbits)
end if
data(trial) = min(data(trial),10**ndigit-1)
end if
end do
do row = 1, ntrial
do col = 1, nmethod
call C_F_PROCPOINTER(cfp(col),ilog10)
ts1 = rdtsc()
chk(row,col) = 0
do trial = 1, ndata
chk(row,col) = chk(row,col)+ilog10(data(trial))
end do
ts2 = rdtsc()
time(row,col) = ts2-ts1
end do
call sub(data,ndata)
end do
write(fmt,'(a,i0,a)') '((a12,',nmethod,'(1x,i6)))'
write(*,fmt) 'trial\method',(col,col=1,nmethod)
write(fmt,'(a,i0,a)') '((i12,',nmethod,'(1x,i6)))'
write(*,fmt) (row,(time(row,col),col=1,nmethod),row=1,ntrial)
write(*,'(a,i0)') 'checksum = ',sum(chk)
end program main
Output with
gfortran -O2 -fno-range-check ilog10.f90 -oilog10
input\method 1 2 3 4
0 1 1 1 1
1 1 1 1 1
2 1 1 1 1
3 1 1 1 1
4 1 1 1 1
7 1 1 1 1
8 1 1 1 1
9 1 1 1 1
10 2 2 2 2
15 2 2 2 2
16 2 2 2 2
31 2 2 2 2
32 2 2 2 2
63 2 2 2 2
64 2 2 2 2
99 2 2 2 2
100 3 3 3 3
127 3 3 3 3
128 3 3 3 3
255 3 3 3 3
256 3 3 3 3
511 3 3 3 3
512 3 3 3 3
999 3 3 3 3
1000 4 4 4 4
1023 4 4 4 4
1024 4 4 4 4
2047 4 4 4 4
2048 4 4 4 4
4095 4 4 4 4
4096 4 4 4 4
8191 4 4 4 4
8192 4 4 4 4
9999 4 4 4 4
10000 5 5 5 5
16383 5 5 5 5
16384 5 5 5 5
32767 5 5 5 5
32768 5 5 5 5
65535 5 5 5 5
65536 5 5 5 5
99999 5 5 5 5
100000 6 6 6 6
131071 6 6 6 6
131072 6 6 6 6
262143 6 6 6 6
262144 6 6 6 6
524287 6 6 6 6
524288 6 6 6 6
999999 6 6 6 6
1000000 7 7 7 7
1048575 7 7 7 7
1048576 7 7 7 7
2097151 7 7 7 7
2097152 7 7 7 7
4194303 7 7 7 7
4194304 7 7 7 7
8388607 7 7 7 7
8388608 7 7 7 7
9999999 7 7 7 7
10000000 8 8 8 8
16777215 8 8 8 8
16777216 8 8 8 8
33554431 8 8 8 8
33554432 8 8 8 8
67108863 8 8 8 8
67108864 8 8 8 8
99999999 8 8 8 8
100000000 9 9 9 9
134217727 9 9 9 9
134217728 9 9 9 9
268435455 9 9 9 9
268435456 9 9 9 9
536870911 9 9 9 9
536870912 9 9 9 9
999999999 9 9 9 9
1000000000 10 10 10 10
1073741823 10 10 10 10
1073741824 10 10 10 10
2147483647 10 10 10 10
2147483648 10 10 10 10
4294967295 10 10 10 10
4294967296 10 10 10 10
8589934591 10 10 10 10
8589934592 10 10 10 10
9999999999 10 10 10 10
10000000000 11 11 11 11
17179869183 11 11 11 11
17179869184 11 11 11 11
34359738367 11 11 11 11
34359738368 11 11 11 11
68719476735 11 11 11 11
68719476736 11 11 11 11
99999999999 11 11 11 11
100000000000 12 12 12 12
137438953471 12 12 12 12
137438953472 12 12 12 12
274877906943 12 12 12 12
274877906944 12 12 12 12
549755813887 12 12 12 12
549755813888 12 12 12 12
999999999999 12 12 12 12
1000000000000 13 13 13 13
1099511627775 13 13 13 13
1099511627776 13 13 13 13
2199023255551 13 13 13 13
2199023255552 13 13 13 13
4398046511103 13 13 13 13
4398046511104 13 13 13 13
8796093022207 13 13 13 13
8796093022208 13 13 13 13
9999999999999 13 13 13 13
10000000000000 14 14 14 14
17592186044415 14 14 14 14
17592186044416 14 14 14 14
35184372088831 14 14 14 14
35184372088832 14 14 14 14
70368744177663 14 14 14 14
70368744177664 14 14 14 14
99999999999999 14 14 14 14
100000000000000 15 15 15 15
140737488355327 15 15 15 15
140737488355328 15 15 15 15
281474976710655 15 15 15 15
281474976710656 15 15 15 15
562949953421311 15 15 15 15
562949953421312 15 15 15 15
999999999999999 15 15 15 15
1000000000000000 16 16 16 16
1125899906842623 16 16 16 16
1125899906842624 16 16 16 16
2251799813685247 16 16 16 16
2251799813685248 16 16 16 16
4503599627370495 16 16 16 16
4503599627370496 16 16 16 16
9007199254740991 16 16 16 16
9007199254740992 16 16 16 16
9999999999999999 16 16 16 16
10000000000000000 25 17 17 17 *
18014398509481983 25 17 17 17 *
18014398509481984 25 17 17 17 *
36028797018963967 25 17 17 17 *
36028797018963968 25 17 17 17 *
72057594037927935 25 17 17 17 *
72057594037927936 25 17 17 17 *
99999999999999999 25 17 17 17 *
100000000000000000 26 18 18 18 *
144115188075855871 26 18 18 18 *
144115188075855872 26 18 18 18 *
288230376151711743 26 18 18 18 *
288230376151711744 26 18 18 18 *
576460752303423487 26 18 18 18 *
576460752303423488 26 18 18 18 *
1152921504606846975 27 19 19 19 *
1152921504606846976 27 19 19 19 *
2305843009213693951 27 19 19 19 *
2305843009213693952 27 19 19 19 *
4611686018427387903 27 19 19 19 *
4611686018427387904 27 19 19 19 *
9223372036854775807 27 19 19 19 *
trial\method 1 2 3 4
1 17220 14520 12220 8520
2 28120 24250 19660 14240
3 27480 23370 19140 14120
4 27330 23370 19230 14120
5 27330 23210 19220 14130
6 27260 22860 19060 14130
checksum = 78432