function SUM(const Data: array of Double): Extended;
asm // IN: EAX = ptr to Data, EDX = High(Data) = Count - 1
// Uses 4 accumulators to minimize read-after-write delays and loop
overhead
// 5 clocks per loop, 4 items per loop = 1.2 clocks per item
FLDZ
MOV ECX, EDX
FLD ST(0)
AND EDX, not 3
FLD ST(0)
AND ECX, 3
FLD ST(0)
SHL EDX, 3 // count * sizeof(Double) = count * 8
JMP @Vector.Pointer[ECX*4]
@Vector:
DD @@1
DD @@2
DD @@3
DD @@4
@@4: FADD qword ptr [EAX+EDX+24] // 1
FXCH ST(3) // 0
@@3: FADD qword ptr [EAX+EDX+16] // 1
FXCH ST(2) // 0
@@2: FADD qword ptr [EAX+EDX+8] // 1
FXCH ST(1) // 0
@@1: FADD qword ptr [EAX+EDX] // 1
FXCH ST(2) // 0
SUB EDX, 32
JNS @@4
FADDP ST(3),ST // ST(3) := ST + ST(3); Pop ST
FADD // ST(1) := ST + ST(1); Pop ST
FADD // ST(1) := ST + ST(1); Pop ST
FWAIT
end;
Thanks in advance.