bernard gingold <
ben...@gmail.com> schrieb:
FWITW, here is what a recent gfortran makes of this code,
with -Ofast -fverbose-asm -S -march=native on a Ryzen 7:
.L2:
# a.f90:11: a(i) = CMPLX(real(i,kind=8),real(i,kind=8))!<--- If
# this line of code
vxorps %xmm0, %xmm0, %xmm0 # tmp105
# a.f90:15: b(i) = CMPLX(3.14_8,3.14_8)
vmovsd %xmm2, 1632(%rsp,%rax) # tmp128, MEM[symbol: b, index: ivtmp.58_34, offset: 0B]
vmovsd %xmm1, 1640(%rsp,%rax) # tmp127, MEM[symbol: b, index: ivtmp.58_34, offset: 0B]
# a.f90:11: a(i) = CMPLX(real(i,kind=8),real(i,kind=8))!<--- If
# this line of code
vcvtsi2ss %edx, %xmm0, %xmm0 # i, tmp105, tmp105
vcvtss2sd %xmm0, %xmm0, %xmm0 # tmp105, _2
# a.f90:10: do i = 1_4, length
incl %edx # i
# a.f90:11: a(i) = CMPLX(real(i,kind=8),real(i,kind=8))!<--- If
# this line of code
vmovddup %xmm0, %xmm0 # _2, tmp107
vmovaps %xmm0, 608(%rsp,%rax) # tmp107, MEM[symbol: a, index: ivtmp.58_34, offset: 0B]
addq $16, %rax #, ivtmp.58
# a.f90:10: do i = 1_4, length
cmpl $65, %edx #, i
jne .L2 #,
# a.f90:15: b(i) = CMPLX(3.14_8,3.14_8)
xorl %eax, %eax # ivtmp.41
.p2align 4
.p2align 3
.L3:
vmovaps 608(%rsp,%rax), %xmm0 # MEM[symbol: a, index: ivtmp.41_33, offset: 0B], vect__46.12
vmovaps 1632(%rsp,%rax), %xmm5 # MEM[symbol: b, index: ivtmp.41_33, offset: 0B], vect__9.8
vpermilpd $1, %xmm0, %xmm4 #, vect__46.12, vect__46.17
vpermilpd $3, %xmm5, %xmm1 #,, vect__9.21
vpermilpd $0, %xmm5, %xmm2 #,, vect__9.9
vmovaps %xmm5, (%rsp) # vect__9.8, %sfp
# a.f90:18: c = a / b
vmulpd %xmm1, %xmm1, %xmm3 # vect__9.21, vect__9.21, vect_powmult_47.25
vmulpd %xmm4, %xmm1, %xmm1 # vect__46.17, vect__9.21, vect__45.22
vmovaps %xmm0, %xmm4 # vect__46.12, vect__44.23
vfmadd132pd %xmm2, %xmm1, %xmm4 # vect__9.9, vect__45.22, vect__44.23
vfmsub132pd %xmm2, %xmm1, %xmm0 # vect__9.9, vect__45.22, vect__44.24
vfmadd231pd %xmm2, %xmm2, %xmm3 # vect__9.9, vect__9.9, vect__12.27
vmovsd %xmm4, %xmm0, %xmm0 # vect__44.23, vect__44.24, tmp115
vdivpd %xmm3, %xmm0, %xmm0 # vect__12.27, tmp115, vect__54.28
vmovaps %xmm0, (%rbx,%rax) # vect__54.28, MEM[symbol: c, index: ivtmp.41_33, offset: 0B]
addq $16, %rax #, ivtmp.41
cmpq $1024, %rax #, ivtmp.41
jne .L3 #, .f90:22: print*, c