Thanks for the algorithm! I thought I might be able to do that,
but my brain started to melt down. Here's what I came up with,
which causes a hang, but at least it happened after I got the
results of some calculations. I'll see if I can figure out what
is happening.
; multiply cx:bx by dx:ax, result in dx:ax
public __I4M
__I4M:
public __U4M
__U4M:
public f_lxmul@
f_lxmul@ proc
push bp
mov bp,sp
push bx
push cx
push si
push di
push ax
push bx
; I think this multiples bx * ax and puts the upper 16 bits in ax
; and lower 16 bits in bx
mul bx
; Save upper 16 in si and lower 16 in di
mov si, ax
mov di, bx
; This does the equivalent of bx * dx
pop bx
mov ax, dx
mul bx
mov dx, ax
; Now we do cx * ax with upper 16 bits in ax and lower in cx
pop ax
mul cx
; Now we need to add the results of those two multiplies together
; lower 16 bits first, so we can get the carry
push bp ; ran out of registers!
mov bp, bx
mov bx, ax
mov ax, 1
add dx, cx
jc noone
mov ax, 1
noone:
push ax
; Now the other lower 16 bits we saved
mov ax, 1
add dx, di
jc noone2
mov ax, 1
noone2:
push ax
; Upper 16 bits
mov ax, bx
add bx, ax
pop ax
add bx, ax ; one carry
pop ax
add bx, ax ; the other carry
mov ax, bp
add bx, ax
; store in proper output register
mov dx, bx
pop bp
pop di
pop si
pop cx
pop bx
pop bp
ret
f_lxmul@ endp
BFN. Paul.