http://ompf.org/ray/sphereflake/
I always wondered how the performance of my OCaml compared to the optimised
C++ but was never able to compare them because Thierry's code didn't do the
same thing. So I just spent a couple of hours translating it back into
OCaml and the results are very interesting.
The resulting program is 108 LOC (2,557 non-whitespace bytes) of OCaml and
takes 6.12s to rendering a 1024x1024 image of a scene containing 66430
spheres with 2x2 supersampling compared to 3.83s for the C++:
Spheres C++ OCaml
10 1.1s 2.6s
91 2.0s 3.8s
820 2.7s 4.7s
7,381 3.4s 5.5s
66,430 3.9s 6.1s
This OCaml is really medium performance and medium brevity. It could be made
more concise by removing the specialised shadow ray intersection,
the "exists" function implementation (e.g. using the built-in List.exists
instead) and so on. Performance could be improved by inlining the vector
arithmetic in ray_sphere and reducing unboxing to alleviate the GC.
let pi = 4. *. atan 1. and delta = sqrt epsilon_float
type vec = {x: float; y: float; z: float}
let ( *| ) s r = {x = s *. r.x; y = s *. r.y; z = s *. r.z}
let ( +| ) a b = {x = a.x +. b.x; y = a.y +. b.y; z = a.z +. b.z}
let ( -| ) a b = {x = a.x -. b.x; y = a.y -. b.y; z = a.z -. b.z}
let dot a b = a.x *. b.x +. a.y *. b.y +. a.z *. b.z
let unitise r = 1. /. sqrt(dot r r) *| r
let cross u v = { x = u.y *. v.z -. u.z *. v.y;
y = u.z *. v.x -. u.x *. v.z;
z = u.x *. v.y -. u.y *. v.x }
let ray_sphere dir v radius =
let b = dot v dir in
let disc = b *. b -. dot v v +. radius *. radius in
if disc < 0. then infinity else
let disc = sqrt disc in
let t2 = b +. disc in
if t2 < 0. then infinity else
let t1 = b -. disc in
if t1 > 0. then t1 else t2
let ray_sphere' orig dir center radius =
let v = center -| orig in
let b = dot v dir in
let disc = b *. b -. dot v v +. radius *. radius in
if disc < 0. then false else b +. sqrt disc >= 0.
let intersect dir =
let rec aux (l, _ as first) (bc, br, sc, sr, scenes) =
let l' = ray_sphere dir bc br in
if l' >= l then first else
let l' = ray_sphere dir sc sr in
let first = if l' >= l then first else l', unitise(l' *| dir -| sc) in
Array.fold_left aux first scenes in
aux (infinity, {x=0.; y=0.; z=0.})
let exists f a =
try
for i=0 to Array.length a - 1 do
if f a.(i) then raise Exit
done;
false
with Exit -> true
let intersect' orig dir =
let rec aux (bc, br, sc, sr, scenes) =
ray_sphere' orig dir bc br &&
(ray_sphere' orig dir sc sr || exists aux scenes) in
aux
let rec ray_trace light dir scene =
let lambda, normal = intersect dir scene in
if lambda = infinity then 0. else
let g = dot normal light in
if g >= 0. then 0. else
let p = lambda *| dir +| delta *| normal in
if intersect' p (-1. *| light) scene then 0. else -. g
let basis v =
let n = unitise v in
let b1 =
if n.x *. n.x <> 1. && n.y *. n.y <> 1. && n.z *. n.z <> 1. then
if n.y *. n.y > n.x *. n.x then
if n.y *. n.y > n.z *. n.z then
{n with y = -. n.y}
else
{n with z = -. n.z}
else
if n.z *. n.z > n.x *. n.x then
{n with z = -. n.z}
else
{n with x = -. n.x}
else
{x=n.z; y=n.x; z=n.y} in
let b2 = cross n b1 in
n, cross n b2, b2
let rec create level c d r =
let n = c, 2. *. r, c, r, [||] in
if level <= 1 then n else
let up, b1, b2 = basis d in
let nr = r /. 3. and daL = 2. *. pi /. 6. and daU = 2. *. pi /. 3. in
let a = ref 0. and cs = Array.make 9 n in
for i=0 to 5 do
let ndir = unitise(-0.2 *| d +| sin !a *| b1 +| cos !a *| b2) in
cs.(i) <- create (level - 1) (c +| (r +. nr) *| ndir) ndir nr;
a := !a +. daL;
done;
a := !a -. daL /. 3.;
for i=0 to 2 do
let ndir = unitise(0.6 *| d +| sin !a *| b1 +| cos !a *| b2) in
cs.(6+i) <- create (level - 1) (c +| (r +. nr) *| ndir) ndir nr;
a := !a +. daU;
done;
c, 2. *. r, c, r, cs
let level, n = match Sys.argv with
[|_; level; n|] -> int_of_string level, int_of_string n
| _ -> 6, 1024
let light = unitise {x = -0.5; y = -0.65; z = 0.9} and ss = 2
let scene =
create level {x=0.; y=0.; z=4.5} (unitise{x=0.25; y=1.; z= -0.5}) 1.;;
Printf.printf "P5\n%d %d\n255\n" n n;;
for y = n - 1 downto 0 do
for x = 0 to n - 1 do
let g = ref 0. in
for dx = 0 to ss - 1 do
for dy = 0 to ss - 1 do
let aux x d = float x -. float n /. 2. +. float d /. float ss in
let dir = unitise {x = aux x dx; y = aux y dy; z = float n} in
g := !g +. ray_trace light dir scene
done;
done;
let g = 0.5 +. 255. *. !g /. float (ss*ss) in
Printf.printf "%c" (char_of_int (int_of_float g))
done;
done
--
Dr Jon D Harrop, Flying Frog Consultancy
Objective CAML for Scientists
http://www.ffconsultancy.com/products/ocaml_for_scientists
This faster OCaml implementation is still much more concise than TBP's C++
and is now only 20% slower:
let pi = 4. *. atan 1. and delta = sqrt epsilon_float
type vec = {x: float; y: float; z: float}
let ( *| ) s r = {x = s *. r.x; y = s *. r.y; z = s *. r.z}
let ( +| ) a b = {x = a.x +. b.x; y = a.y +. b.y; z = a.z +. b.z}
let ( -| ) a b = {x = a.x -. b.x; y = a.y -. b.y; z = a.z -. b.z}
let dot a b = a.x *. b.x +. a.y *. b.y +. a.z *. b.z
let unitise r = 1. /. sqrt(dot r r) *| r
let cross u v = { x = u.y *. v.z -. u.z *. v.y;
y = u.z *. v.x -. u.x *. v.z;
z = u.x *. v.y -. u.y *. v.x }
let ray_sphere d v r =
let disc = v.x *. v.x +. v.y *. v.y +. v.z *. v.z -. r *. r in
if disc < 0. then infinity else
let b = v.x *. d.x +. v.y *. d.y +. v.z *. d.z in
let b2 = b *. b in
if b2 < disc then infinity else
let disc = sqrt(b2 -. disc) in
let t1 = b -. disc in
if t1 > 0. then t1 else b +. disc
let ray_sphere' o d c r =
let vx = c.x -. o.x and vy = c.y -. o.y and vz = c.z -. o.z in
let vv = vx *. vx +. vy *. vy +. vz *. vz in
let b = vx *. d.x +. vy *. d.y +. vz *. d.z in
let disc = b *. b -. vv +. r *. r in
disc >= 0. && b +. sqrt disc >= 0.
let rec intersect dir (l, _ as first) (bc, br, sc, sr, scenes) =
let l' = ray_sphere dir bc br in
if l' >= l then first else
let l' = ray_sphere dir sc sr in
let first =
if l' >= l then first else l', lazy(unitise(l' *| dir -| sc)) in
intersects dir first scenes
and intersects dir first = function
| [] -> first
| h::t -> intersects dir (intersect dir first h) t
let rec intersect' orig dir (bc, br, sc, sr, scenes) =
ray_sphere' orig dir bc br &&
(ray_sphere' orig dir sc sr || intersects' orig dir scenes)
and intersects' orig dir = function
| [] -> false
| h::t -> intersects' orig dir t || intersect' orig dir h
let rec ray_trace light dir scene =
let lambda, normal =
intersect dir (infinity, lazy {x=0.; y=0.; z=0.}) scene in
if lambda = infinity then 0. else
let normal = Lazy.force normal in
let a = ref 0. and cs = ref [] in
for i=0 to 5 do
let ndir = unitise(-0.2 *| d +| sin !a *| b1 +| cos !a *| b2) in
cs := create (level - 1) (c +| (r +. nr) *| ndir) ndir nr :: !cs;
a := !a +. daL;
done;
a := !a -. daL /. 3.;
for i=0 to 2 do
let ndir = unitise(0.6 *| d +| sin !a *| b1 +| cos !a *| b2) in
cs := create (level - 1) (c +| (r +. nr) *| ndir) ndir nr :: !cs;
a := !a +. daU;
done;
c, 2. *. r, c, r, !cs
let level, n = match Sys.argv with
[|_; level; n|] -> int_of_string level, int_of_string n
| _ -> 6, 1024
let light = unitise {x = -0.5; y = -0.65; z = 0.9} and ss = 2
let scene =
create level {x=0.; y=0.; z=4.5} (unitise{x=0.25; y=1.; z= -0.5}) 1.;;
Printf.printf "P5\n%d %d\n255\n" n n;;
for y = n - 1 downto 0 do
for x = 0 to n - 1 do
let g = ref 0. in
for dx = 0 to ss - 1 do
for dy = 0 to ss - 1 do
let aux x d = float x -. float n /. 2. +. float d /. float ss in
let dir = unitise {x = aux x dx; y = aux y dy; z = float n} in
g := !g +. ray_trace light dir scene
done;
done;
let g = 0.5 +. 255. *. !g /. float (ss*ss) in
Printf.printf "%c" (char_of_int (int_of_float g))
done;
done
--
Dr Jon D Harrop, Flying Frog Consultancy
Objective CAML for Scientists
http://www.ffconsultancy.com/products/ocaml_for_scientists/index.html?usenet
The OCaml implementation is now outperforming the C++ implementation
compiled with g++ 4.2.3:
http://www.ffconsultancy.com/languages/ray_tracer/results.html
Albeit, because the g++ compiled code is getting slower with newer versions
of g++...
Looking back, Thierry Berger-Perrin claimed he could write a much faster C++
implementation. How is that coming along? ;-)
--
Dr Jon D Harrop, Flying Frog Consultancy Ltd.
http://www.ffconsultancy.com/products/?u
> Jon Harrop wrote:
> > This faster OCaml implementation is still much more concise than
> > TBP's C++ and is now only 20% slower:
>
> The OCaml implementation is now outperforming the C++ implementation
> compiled with g++ 4.2.3:
>
> http://www.ffconsultancy.com/languages/ray_tracer/results.html
>
> Albeit, because the g++ compiled code is getting slower with newer
> versions of g++...
>
> Looking back, Thierry Berger-Perrin claimed he could write a much
> faster C++ implementation. How is that coming along? ;-)
>
Hey,
I was unable to write faster OCaml implementation. ;)
I was supposed to write C++ raytracer for studies. Just to learn how
to write raytracers I've written at first a version in OCaml (code +
desc. here: http://bla.thera.be/archives/17). It took < 900 lines of
code to make it working while C++ version had 4000 lines when it started
rendering anything interesting (code here:
http://repo.or.cz/w/blaRAY.git, some doc., no pictures:
http://temp.thera.be/blaRAY/refman.html.d/).
But still C++ was faster. ;s This programs are too big to serve as
benchmarks, but someone might be interested.
--
Tomasz bla Fortuna
jid: bla(at)af.gliwice.pl
pgp: 0x90746E79 @ pgp.mit.edu
www: http://bla.thera.be
Are these implementations close enough to even compare meaningfully?
[It seems like about 95% of the cpu time in raytracing goes to searching
for ray-surface intersections -- acceleration structure searching plus
primitive intersection tests -- so I suppose one could easily make a
fairly small test case that tested only those things, and make sure to
use the same algorithms for both languages, etc.]
-Miles
--
Everywhere is walking distance if you have the time. -- Steven Wright
As much as possible, yes. They all implement the same algorithm, with
ray-sphere intersections only and the same hierarchical spherical bounds
for fast search.
The different versions introduce different algorithmic optimizations but
these are kept the same across all languages. The final version is a
free-for-all.
> [It seems like about 95% of the cpu time in raytracing goes to searching
> for ray-surface intersections -- acceleration structure searching plus
> primitive intersection tests -- so I suppose one could easily make a
> fairly small test case that tested only those things, and make sure to
> use the same algorithms for both languages, etc.]
Yes. There is theoretically more potential for low-level optimization of
those data structures in languages like C++ but, in practice, nobody has
been able to make C++ significantly faster on this particular benchmark. I
believe the reason is largely that this benchmark includes the
precomputation of the search data structure, which is often very slow for
other algorithms because they are designed for heavy reuse, e.g. over many
frames of animation.
In 2005, Thierry Berger-Perrin made a lot of noise about my "unbelievably
inefficient code" in C++:
http://groups.google.co.uk/group/comp.graphics.rendering.raytracing/msg/91d5cba802bdb0e3
and claimed that it should render "in the blink of an eye". Apparently
Theirry doesn't like the idea of using a high-level language but he failed
to write a faster implementation in a low-level language:
http://ompf.org/ray/sphereflake/
Rerunning those old benchmarks now, I get:
C++: 4.852s g++ -O3 -ffast-math
OCaml: 4.854s ocamlopt -rectypes -inline 1000
As you can see, Thierry's C++ is not faster at all. For anyone interested in
writing ray tracers, particularly as an educational exercise, I'd highly
recommend using a modern high-level language like OCaml rather than C++. It
makes life a lot easier and you actually get to learn about ray tracing
rather than debugging...
Here's my old OCaml equivalent:
let pi = 4. *. atan 1. and delta = sqrt epsilon_float
type vec = {x: float; y: float; z: float}
let ( *| ) s r = {x = s *. r.x; y = s *. r.y; z = s *. r.z}
let ( +| ) a b = {x = a.x +. b.x; y = a.y +. b.y; z = a.z +. b.z}
let ( -| ) a b = {x = a.x -. b.x; y = a.y -. b.y; z = a.z -. b.z}
let dot a b = a.x *. b.x +. a.y *. b.y +. a.z *. b.z
let unitise r = 1. /. sqrt(dot r r) *| r
let length r = sqrt(dot r r)
let cross u v = { x = u.y *. v.z -. u.z *. v.y;
y = u.z *. v.x -. u.x *. v.z;
z = u.x *. v.y -. u.y *. v.x }
let ray_sphere d v r =
let disc = v.x *. v.x +. v.y *. v.y +. v.z *. v.z -. r *. r in
if disc < 0. then infinity else
let b = v.x *. d.x +. v.y *. d.y +. v.z *. d.z in
let b2 = b *. b in
if b2 < disc then infinity else
let disc = sqrt(b2 -. disc) in
let t1 = b -. disc in
if t1 > 0. then t1 else b +. disc
let ray_sphere' o d c r =
let vx = c.x -. o.x and vy = c.y -. o.y and vz = c.z -. o.z in
let vv = vx *. vx +. vy *. vy +. vz *. vz in
let b = vx *. d.x +. vy *. d.y +. vz *. d.z in
let disc = b *. b -. vv +. r *. r in
disc >= 0. && b +. sqrt disc >= 0.
let rec intersect dir (l, _ as first) (bc, br, sc, sr, scenes) =
let l' = ray_sphere dir bc br in
if l' >= l then first else
if Array.length scenes=0 then l', lazy(unitise(l' *| dir -| sc)) else
let l' = ray_sphere dir sc sr in
let first =
ref(if l' >= l then first else l', lazy(unitise(l' *| dir -| sc))) in
for i=0 to Array.length scenes-1 do
first := intersect dir !first scenes.(i)
done;
!first
let rec intersect' orig dir (bc, br, sc, sr, scenes) =
ray_sphere' orig dir bc br &&
(ray_sphere' orig dir sc sr ||
try
for i=0 to Array.length scenes-1 do
if intersect' orig dir scenes.(i) then raise Exit
done;
false
with Exit -> true)
let rec ray_trace light dir scene =
let lambda, normal =
intersect dir (infinity, lazy {x=0.; y=0.; z=0.}) scene in
if lambda = infinity then 0. else
let normal = Lazy.force normal in
let g = dot normal light in
if g >= 0. then 0. else
let p = lambda *| dir +| delta *| normal in
if intersect' p (-1. *| light) scene then 0. else -. g
let basis v =
let n = unitise v in
let b1 =
let nx2, ny2, nz2 = n.x *. n.x, n.y *. n.y, n.z *. n.z in
match ny2>nx2, ny2>nz2, nz2>nx2 with
| true, true, _ -> {n with y = -. n.y}
| true, false, _ | false, _, true -> {n with z = -. n.z}
| false, _, false -> {n with x = -. n.x} in
let b2 = cross n b1 in
n, cross n b2, b2
let rec bound (c, r) (_, _, c', r', cs) =
Array.fold_left bound (c, max r (length (c -| c') +. r')) cs
let rec create level c d r =
let n = c, r, c, r, [||] in
if level <= 1 then n else
let up, b1, b2 = basis d in
let nr = r /. 3. and daL = 2. *. pi /. 6. and daU = 2. *. pi /. 3. in
let a = ref 0. and cs = ref [] in
for i=0 to 5 do
let ndir = unitise(-0.2 *| d +| sin !a *| b1 +| cos !a *| b2) in
cs := create (level - 1) (c +| (r +. nr) *| ndir) ndir nr :: !cs;
a := !a +. daL;
done;
a := !a -. daL /. 3.;
for i=0 to 2 do
let ndir = unitise(0.6 *| d +| sin !a *| b1 +| cos !a *| b2) in
cs := create (level - 1) (c +| (r +. nr) *| ndir) ndir nr :: !cs;
a := !a +. daU;
done;
let c', r' = List.fold_left bound (c, r) !cs in
c', r', c, r, Array.of_list !cs
let level, n = match Sys.argv with
[|_; level; n|] -> int_of_string level, int_of_string n
| _ -> 6, 1024
let grid = [|-1., -1./.3.; 1./.3., -1.; -1./.3., 1.; 1., 1./.3.|]
let light = unitise {x = -0.5; y = -0.65; z = 0.9} and ss = 2
let scene =
create level {x=0.; y=0.; z=4.5} (unitise{x=0.25; y=1.; z= -0.5}) 1.;;
Printf.printf "P5\n%d %d\n255\n" n n;;
for y = n - 1 downto 0 do
for x = 0 to n - 1 do
let aux g (dx, dy) =
let aux x d = float x -. float n /. 2. +. d /. float ss in
let dir = unitise {x = aux x dx; y = aux y dy; z = float n} in
g +. ray_trace light dir scene in
let g = 0.5 +. 255. *. (Array.fold_left aux 0. grid) /. float (ss*ss) in
Printf.printf "%c" (char_of_int (int_of_float g))
done
done
--