Google Groups no longer supports new Usenet posts or subscriptions. Historical content remains viewable.
Dismiss

Urgent help needed

0 views
Skip to first unread message

phi5...@yahoo.ca

unread,
Dec 8, 2008, 11:49:03 AM12/8/08
to
For reasons that I explain at comp.lang.functional and
comp.lang.scheme, I need to manipulate large unidimensional arrays (at
least 50000000 elements). I am working with Scheme (Bigloo and
Stalin). The results are satisfactory. In fact, Scheme code seems to
be as fast as C, sometimes faster (specially in the case of Stalin).
However, I think that OCAML has a nicer syntax, although not as nice
as I would like it to be; for instance, I do not like to write 2.0 +.
5.6, 4.0 *. 6.7, etc.

When I ported the Scheme programs to OCAML I stumbled into a
limitation of the Array library. When I tried to run the programs, I
would get the message:

D:\stalin\ocaml>testarray.exe
Fatal error: exception Invalid_argument("Array.make")

Then somebody told me that I should use the Bigarray library.
Unhappily, Bigarrays are not as nice to work with as Arrays, but one
can live with it. The problem, however, is that my programs in OCAML
are 20 times slower than in Stalin, or Bigloo. A program that takes
one hour in Bigloo, takes almost one day in OCAML! To tell the whole
truth, I did not wait for the results of the OCAML program. I wrote a
small benchmark to try to see what the problem is. The benchmark
followed suit: 20 times slower than Stalin, Bigloo or Gambit. Since
I heard that OCAML is much faster than Bigloo (from J. Harrop's
postings), I understand that I must have committed a very basic
blunder. You will find both the Bigloo and the OCAML version of the
benchmark program below. The benchmark is so simple that I cannot see
where I made a mistake. To run the OCAML version of the benchmark,
type:

matone.exe 5000

To run the Bigloo version, type:

matb.exe 5000


Here is the OCAML program:

(* ocamlopt bigarray.cmxa -unsafe -ccopt -O3 matone.ml -o matone.exe
*)
open Bigarray;;

let make_system r=
let c= r+1 in
let m= Array1.create float32 c_layout (r * c) in
let xx= ref 0.0 and s= ref 0.0 in
for i= 0 to r-1 do
s := 0.0;
for j=0 to r-1 do
xx := Random.float 100.0;
s := s.contents +. xx.contents;
Array1.set m (i*c+j) xx.contents
done;
Array1.set m (i*c+r) s.contents
done;
m
;;

let iii= ref 0;;

let swapit m c k l=
let t = ref 0.0 in
iii := iii.contents + 1;
for j=0 to c-1 do
t := Array1.get m (k*c+j);
Array1.set m (k*c+j) (Array1.get m (l*c+j));
Array1.set m (l*c+j) t.contents
done
;;

let find_max m c k idx=
let i= ref idx in
for l= k+1 to c-2 do
if (abs_float (Array1.get m (l*c+k))) >
(abs_float (Array1.get m (i.contents * c + k))) then
i := l
done;
if i.contents <> k then swapit m c k i.contents
;;

let solvit mm r c=
let rat = ref 0.0 and tx= ref 0.0 and mkk= ref 0.0 in
for k=0 to r-2 do
find_max mm c k k;
mkk := Array1.get mm (c*k + k);
for i= k+1 to r-1 do
rat := (Array1.get mm (i*c + k)) /. mkk.contents;
for j=k to c-1 do
Array1.set mm (i*c+j) ( (Array1.get mm (i*c+j)) -.
rat.contents *. (Array1.get mm
(k*c + j)))
done
done
done;
for i=r-1 downto 0 do
tx := 0.0;
for j=i+1 to r-1 do
tx := tx.contents -. (Array1.get mm (i*c + j)) *.
(Array1.get mm (j*c + r))
done;
Array1.set mm (i*c+r) ((( Array1.get mm (i*c + r)) +.
tx.contents) /.
(Array1.get mm (i*c + i)))
done
;;


let nels () =
try
int_of_string Sys.argv.(1)
with
_ -> 5000
;;

let r= nels () in
let c= (r+1) in
let mm= make_system r in
(
solvit mm r c;
for i= 0 to (min (r-1) 10) do
Printf.printf "%8.3f " (Array1.get mm (i*c+r))
done;
print_newline();
Printf.printf "Number of swaps= %d\n" iii.contents;
Printf.printf "\nOcaml Time= %f\n" (Sys.time())

);;


Here is the Bigloo version:

;; Compile: bigloo -Obench -farithmetic -O3 matb.scm -o matb.exe

(module example
(main main)
(option (set! *genericity* #f))
(extern
(macro printf::int (::string ::double) "printf")
(clock::int () "clock") ))

(define iii 0)

(define-macro ($ v c i j)
`(f64vector-ref ,v (+fx (*fx ,i ,c) ,j) ) )

(define-macro ($! v c i j val)
`(f64vector-set! ,v (+fx (*fx ,i ,c) ,j) ,val))

(define-macro (!= x y)
`(not (=fx ,x ,y)))

(define (prt m r c)
(do ( (i 0 (+fx i 1)) ) ( (>=fx i r) )
(newline)
(do ((j 0 (+fx j 1)) ) ( (>=fx j c) )
(printf " %4.3f " ($ m c i j) ) )))

(define (make-system r)
(let* ( (c (+fx r 1))
(m (make-f64vector (*fx r c) 0.0 ))
(xx 0.0)
(s 0.0) )

(do ( (i 0 (+fx i 1)) ) ( (>=fx i r) m)
(set! s 0.0)
(do ((j 0 (+fx j 1) ) ) ( (>=fx j r) ($! m c i j s) )
(set! xx (fixnum->flonum (random 3873)))
(set! s (+fl s xx))
($! m c i j xx )) ) ))

(define (swapit m c k l)
(let ((t 0.0))
(set! iii (+fx iii 1))
(do ( (j 0 (+fx j 1)) ) ( (>=fx j c) )
(set! t ($ m c k j ) )
($! m c k j ($ m c l j) )
($! m c l j t) ) ) )

(define (find-max m c k i)
(do ( (l (+fx k 1) (+fx l 1)) )
( (>=fx l (-fx c 1)) (when (!= i k) (swapit m c k i )))
(when (>fl (absfl ($ m c l k)) (absfl ($ m c i k)) )
(set! i l) ) ))

(define (solvit m r)
(let ( (c (+fx r 1))
(rat 0.0)
(mkk 0.0))

(do ( (k 0 (+fx k 1)) ) ( (>=fx k (-fx r 1)))
(find-max m c k k)
(set! mkk ($ m c k k) )

(do ( ( i (+fx k 1)(+fx i 1)) ) ( (>=fx i r))
(set! rat (/fl ($ m c i k) mkk ))
(do ( (j k (+fx j 1))) ( (>=fx j c) )
($! m c i j (-fl ($ m c i j)
(*fl rat ($ m c k j ) ) ) )
)
)
)

(do ( (i (-fx r 1) (-fx i 1) ) ) ((<fx i 0) m)
(do ( (j (+fx i 1) (+fx j 1))
(tx 0.0 (-fl tx (*fl ($ m c i j)
($ m c j r )) )) )
( (>=fx j r)
($! m c i r
(/fl (+fl ($ m c i r ) tx)
($ m c i i)) ) ) ))
)
)

(define (elms argv)
(cond ( (<fx (length argv) 2) 2000)
( (string->number (cadr argv)) (string->number (cadr argv)) )
(else 2000)))

(define (main argv)
(let* ( (r (elms argv)) (c (+fx r 1)) (m (solvit (make-system r)
r) ) )
(do ( (i 0 (+fx i 1))) ( (>=fx i (min r 10)) )
(printf " %4.3f " ($ m c i r) ) )
(newline) (display "Bigloo time= ") (display (clock))
(newline) (print "Number of swaps= " iii)) )

phi5...@yahoo.ca

unread,
Dec 8, 2008, 7:23:43 PM12/8/08
to
I received a lot of answers by private mail. People taught me a lot of
things about OCAML, and fixed my program in many ways. Now it is only
two times slower than the Bigloo counterpart. That is a great
improvement, since my version of the program was twenty times slower.

D:\stalin\ocaml>matone.exe 2000
1.000 1.000 1.000 1.000 1.000 1.000 1.000
1.000 1.000
1.000 1.000
Number of swaps= 1993

Ocaml Time= 48.765000

D:\stalin\ocaml>cd ..

D:\stalin>matb 2000
1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000
Bigloo time= 14953
Number of swaps= 1995

The most important change was type declaration for Bigarrays. People
told me that type declaration helps the compiler to inline Array1.set
and Array1.let; I do not know what this means, but it works. In any
case, I still need a speed-up by a factor of three, in order to reach
Bigloo. Therefore, I am open to more suggestions. BTW, my
correspondents taught me a better notation for the array operations
too. By the way, Bigloo programmers made a few suggestions to improve
Scheme performance too, and I was able to cut a few seconds in Bigloo
time as well. Below you will find the modified OCAML program.


(* ocamlopt bigarray.cmxa -unsafe -ccopt -O3 matone.ml -o matone.exe
*)

open Bigarray;;

type farray = (float, float64_elt, c_layout) Array1.t

let make_system r=
let c= r+1 in

let m= Array1.create float64 c_layout (r * c) in
let s= ref 0.0 in


for i= 0 to r-1 do
s := 0.0;
for j=0 to r-1 do

let xx = Random.float 100.0 in
( s := s.contents +. xx;
m.{i*c+j} <- xx)
done;
m.{i*c+r} <- s.contents
done;
m
;;

let iii= ref 0;;

let swapit (m:farray) c k l=


let t = ref 0.0 in
iii := iii.contents + 1;
for j=0 to c-1 do
t := Array1.get m (k*c+j);

m.{k*c+j} <- m.{l*c+j};
m.{l*c+j} <- t.contents
done
;;

let find_max (m:farray) c k idx=


let i= ref idx in
for l= k+1 to c-2 do

if (abs_float m.{l*c+k}) >
(abs_float m.{i.contents * c + k}) then


i := l
done;
if i.contents <> k then swapit m c k i.contents
;;

let solvit (mm: farray) r c=


let rat = ref 0.0 and tx= ref 0.0 and mkk= ref 0.0 in
for k=0 to r-2 do
find_max mm c k k;

mkk := mm.{c*k + k};


for i= k+1 to r-1 do

rat := mm.{i*c + k} /. mkk.contents;


for j=k to c-1 do

mm.{i*c+j} <- ( mm.{i*c+j} -.
rat.contents *. mm.{k*c + j})


done
done
done;
for i=r-1 downto 0 do
tx := 0.0;
for j=i+1 to r-1 do

tx := tx.contents -. mm.{i*c + j} *. mm.{j*c + r}
done;
mm.{i*c+r} <- (mm.{i*c + r} +. tx.contents) /.
mm.{i*c + i}
done
;;


let nels () =
try
int_of_string Sys.argv.(1)
with
_ -> 5000
;;

let r= nels () in
let c= (r+1) in
let mm= make_system r in
(
solvit mm r c;
for i= 0 to (min (r-1) 10) do

Printf.printf "%8.3f " mm.{i*c+r}

0 new messages