Google Groups no longer supports new Usenet posts or subscriptions. Historical content remains viewable.
Dismiss

Three implementations of sets with bits.

25 views
Skip to first unread message

Pascal Bourguignon

unread,
Feb 29, 2004, 12:08:25 AM2/29/04
to

I compared three ways of implementating sets with bits, using
integers, bit vectors, or arrays of small integers.


-----------------------------------------------------------
loops x #bits
CLISP CLISP SBCL SBCL programming
time(s) space(B) time(s) space(B)
100x65536
integer(func) 1.79 8480k 0.30 1195k ultra simple
bvector(func) 1.64 0k 1.36 45892k simple
bvector(proc) 1.36 360k 1.31 13854k simple
bset(proc) 0.25 440k 0.03 0k complex
1000x256
integer(func) 1.24 6344k 0.31 7685k
bvector(func) 1.27 0k 1.18 35096k
bvector(proc) 1.02 120k 0.87 8617k
bset(proc) 0.15 128k 0.21 7850k
10000x32
integer(func) 30.52 154891k 7.62 194816k
bvector(func) 32.51 0k 27.94 892479k
bvector(proc) 26.31 2460k 21.38 212498k
bset(proc) 3.70 2460k 5.26 198094k
-----------------------------------------------------------


Normalized (by run and by cl implementation):
-----------------------------------------------------------
loops x #bits
CLISP CLISP SBCL SBCL
time space time space
100x65536
integer(func) 1.00 1.00 0.22 0.03
bvector(func) 0.92 0.00 1.00 1.00
bvector(proc) 0.76 0.04 0.96 0.30
bset(proc) 0.14 0.05 0.02 0.00
1000x256
integer(func) 0.98 1.00 0.26 0.22
bvector(func) 1.00 0.00 1.00 1.00
bvector(proc) 0.80 0.02 0.74 0.25
bset(proc) 0.12 0.02 0.18 0.22
10000x32
integer(func) 0.94 1.00 0.27 0.22
bvector(func) 1.00 0.00 1.00 1.00
bvector(proc) 0.81 0.02 0.77 0.24
bset(proc) 0.11 0.02 0.19 0.22
-----------------------------------------------------------


I find it somewhat depressing that the ultra-simple and elegant
formulation be also the slowest and greediest (on clisp, but the
second nicest implementation is worse on sbcl).

That integer representation of sets could be much more efficient (both
in space and time) if it was possible to modify an integer value.
Must all integer values absolutely be immutable objects?


In the case of the bit vector representation, it could probably be as
efficient as the most efficient if we could manipulate several bits at
once. Is there a way to extract an integer from a bit vector? Or
"displace" bits to an integer?

In any case, I'd suggest the implementations to make a special case in
map and map-into when the function is identity, or one of the log*
family, and when the arguments are bit vectors, to process them words
by words. The speed gain on 32bit architecture would be good enough,
on 64bit it would be impressive!

integer(functional):
--------------------
(defun integer-intersection (p q) (logand p q))
(defun integer-union (p q) (logior p q))
(defun integer-difference (p q) (logandc2 p q))
(defun integer-contains (s e) (logbitp e s))
(defun integer-singleton (e) (dpb 1 (byte 1 e) 0))
(defun integer-include (s e) (dpb 1 (byte 1 e) s))
(defun integer-exclude (s e) (dpb 0 (byte 1 e) s))
(defun integer-cardinal (s) (logcount s))


bitvector(functional):
----------------------
(defun bit-vector-intersection (p q)
(map '(array bit (*)) (function logand) p q))

(defun bit-vector-union (p q)
(map '(array bit (*)) (function logior) p q))

(defun bit-vector-difference (p q)
(map '(array bit (*)) (function logandc2) p q))

(defun bit-vector-contains (v e) (not (zerop (aref v e))))
(defun bit-vector-include (v e) (setf (aref v e) 1))
(defun bit-vector-exclude (v e) (setf (aref v e) 0))


bitvector(procedural):
----------------------
(defun bit-vector-assign-2 (p q) (map-into p (function identity) q))
(defun bit-vector-intersection-2 (p q) (map-into p (function logand) p q))
(defun bit-vector-union-2 (p q) (map-into p (function logior) p q))
(defun bit-vector-difference-2 (p q) (map-into p (function logandc2) p q))


bset(procedural):
-----------------

(defconstant +bit-per-bitset+ 32)
(deftype bitset () `(unsigned-byte ,+bit-per-bitset+))

(defstruct bset
(bitsets (make-array :type (array bitset *))
(cardinal nil :type (or null (integer 0)))
(first-element 0 :type (integer 0)) ;; approximate
(last-element 0 :type (integer 0)) ;; approximate
;; (for all i (==> (< i (bset-first-element bset)) (not (is-element i bset))))
;; (for all i (==> (> i (bset-last-element bset)) (not (is-element i bset))))
);;bset

(proclaim '(inline elem-to-bit))
(defun elem-to-bit (element) (mod element +bit-per-bitset+))
(proclaim '(inline bitset-to-elem))
(defun bitset-to-elem (index) (* +bit-per-bitset+ (1+ index)))

(defun intersection (set1 set2)
"
DO: set1 := set1 inter set2 inter
Accumulate in set1 the intersection of set1 and set2
(elements in set1 and in set2).
RETURN: SET1
"
(let ((bits1 (bset-bitsets set1))
(bits2 (bset-bitsets set2)))
(for (i
(elem-to-bitset (max (bset-first-element set1)
(bset-first-element set2)))
(elem-to-bitset (min (bset-last-element set1)
(bset-last-element set2))))
(setf (bsref bits1 i) (logand (bsref bits1 i) (bsref bits2 i)))))
(setf (bset-cardinal set1) nil
(bset-first-element set1) (max (bset-first-element set1)
(bset-first-element set2))
(bset-last-element set1) (min (bset-last-element set1)
(bset-last-element set2)))
set1)

(defun include (bset element)
"
PRE: (<= 0 element (size bset))
POST: (is-element element bset)
RETURN: BSET
"
(declare (type (integer 0) element))
(let ((bits (bset-bitsets bset)))
(setf (bsref bits (elem-to-bitset element))
(dpb 1 (byte 1 (elem-to-bit element))
(bsref bits (elem-to-bitset element)))))
(setf (bset-cardinal bset) nil
(bset-first-element bset) (cond
((is-element 0 bset) 0)
((zerop (bset-first-element bset)) element)
(t (min element (bset-first-element bset))))
(bset-last-element bset) (max element (bset-last-element bset)))
bset)


--
__Pascal_Bourguignon__ http://www.informatimago.com/
There is no worse tyranny than to force a man to pay for what he doesn't
want merely because you think it would be good for him.--Robert Heinlein
http://www.theadvocates.org/

Raymond Toy

unread,
Feb 29, 2004, 2:42:55 AM2/29/04
to
Pascal Bourguignon wrote:
> bitvector(functional):
> ----------------------
> (defun bit-vector-intersection (p q)
> (map '(array bit (*)) (function logand) p q))
>
> (defun bit-vector-union (p q)
> (map '(array bit (*)) (function logior) p q))
>
> (defun bit-vector-difference (p q)
> (map '(array bit (*)) (function logandc2) p q))

Why not use bit-and, bit-ior and bit-andc2 for these? A good
implementation will probably make these operate on words at a time
instead of a bit at a time, as you wanted.

> (defstruct bset
> (bitsets (make-array :type (array bitset *))

Some kind of typo here. I guess you wanted to some array with element
type bitset?

Ray

Pascal Bourguignon

unread,
Feb 29, 2004, 10:21:28 PM2/29/04
to
Raymond Toy <rt...@earthlink.net> writes:

> Pascal Bourguignon wrote:
> > bitvector(functional):
> > ----------------------
> > (defun bit-vector-intersection (p q) (map '(array bit (*))
> > (function logand) p q))
> > (defun bit-vector-union (p q)
> > (map '(array bit (*)) (function logior) p q))
> > (defun bit-vector-difference (p q)
> > (map '(array bit (*)) (function logandc2) p q))
>
> Why not use bit-and, bit-ior and bit-andc2 for these? A good
> implementation will probably make these operate on words at a time
> instead of a bit at a time, as you wanted.

I overlooked it. Thank you for recalling it to me.

Here are the results using implementations based on bit-* functions.


> > (defstruct bset
> > (bitsets (make-array :type (array bitset *))
>
> Some kind of typo here. I guess you wanted to some array with element
> type bitset?


Yes, I did not cut enough, I wanted to avoid you the burden of the
initial array creation.

(defstruct bset
(bitsets :type (array bitset *))
;;...
)

Well, using (bit-and vector-1 vector-2 t) [bvector-bit(proc)] looks
like the most efficient. The only gain we could have with an array of
(byte 32), is that we could keep information about ranges (at least
min-elem/max-elem) to avoid processing the whole array when the
elements are not spread over the whole range (imagine: [0-9a-z]).


CLISP CLISP SBCL SBCL
time space time space
100x65536

integer(func) 1.68 0k 1.41 45900k
bvector-bit(func) 0.68 0k 0.66 10898k
bvector-map(func) 1.39 360k 1.32 13859k
bvector-bit(proc) 0.22 360k 0.25 719k
bvector-map(proc) 0.21 440k 0.29 11291k
1000x256
integer(func) 1.31 0k 1.10 35100k
bvector-bit(func) 0.48 0k 0.36 8261k
bvector-map(func) 1.03 120k 0.88 8638k
bvector-bit(proc) 0.17 120k 0.08 138k
bvector-map(proc) 0.13 128k 0.24 8775k
10000x32
integer(func) 32.90 0k 28.00 892483k
bvector-bit(func) 13.09 0k 9.12 209989k
bvector-map(func) 26.94 2460k 21.79 212497k
bvector-bit(proc) 4.32 2460k 1.51 2459k
bvector-map(proc) 3.42 2460k 5.12 197241k

------------------------------------------------------------------------
For the interested here is my evaluation code:


(DEFUN MAKE-LIST-OF-RANDOM-NUMBERS (LENGTH &key (modulo MOST-POSITIVE-FIXNUM))
"
RETURN: A list of length `length' filled with random numbers
MODULO: The argument to RANDOM.
"
(LOOP WHILE (< 0 LENGTH)
COLLECT (RANDOM MODULO) INTO RESULT
DO (SETQ LENGTH (1- LENGTH))
FINALLY (RETURN RESULT))
);;MAKE-LIST-OF-RANDOM-NUMBERS


(defun make-random-bit-vector (size)
(let ((result (make-array (list size) :element-type 'bit)))
(map nil (lambda (element) (setf (aref result element) 1))
(MAKE-LIST-OF-RANDOM-NUMBERS (/ size 2) :modulo size))
result));;make-random-bit-vector


(defun integer-intersection (p q) (logand p q))
(defun integer-union (p q) (logior p q))
(defun integer-difference (p q) (logandc2 p q))
(defun integer-contains (s e) (logbitp e s))
(defun integer-singleton (e) (dpb 1 (byte 1 e) 0))
(defun integer-include (s e) (dpb 1 (byte 1 e) s))
(defun integer-exclude (s e) (dpb 0 (byte 1 e) s))
(defun integer-cardinal (s) (logcount s))

(DEFUN COPY-BIT-VECTOR-TO-integer (BV)
(let ((integer-set 0))
(DOTIMES (ELEMENT (LENGTH BV))
(unless (ZEROP (AREF BV ELEMENT))
(setf integer-set (integer-include integer-set element))))
integer-set));;COPY-BIT-VECTOR-TO-integer


;; bvector-bit(func)
(defun bit-vector-intersection (p q) (bit-and p q))
(defun bit-vector-union (p q) (bit-ior p q))
(defun bit-vector-difference (p q) (bit-andc2 p q))

;; bvector-map(func)
(defun bit-vector-intersection-m(p q)(map'(array bit (*))(function logand) p q))
(defun bit-vector-union-m (p q)(map'(array bit (*))(function logior) p q))
(defun bit-vector-difference-m(p q)(map'(array bit (*))(function logandc2) p q))

;; bvector-bit(proc)
(defun bit-vector-assign-b2 (p q) (map-into p (function identity) q))
(defun bit-vector-intersection-b2 (p q) (bit-and p q t))
(defun bit-vector-union-b2 (p q) (bit-ior p q t))
(defun bit-vector-difference-b2 (p q) (bit-andc2 p q t))

;; bvector-map(proc)
(defun bit-vector-assign-m2 (p q) (map-into p (function identity) q))
(defun bit-vector-intersection-m2 (p q) (map-into p (function logand) p q))
(defun bit-vector-union-m2 (p q) (map-into p (function logior) p q))
(defun bit-vector-difference-m2 (p q) (map-into p (function logandc2) p q))

(defun bit-vector-contains (v e) (not (zerop (aref v e))))
(defun bit-vector-include (v e) (setf (aref v e) 1))
(defun bit-vector-exclude (v e) (setf (aref v e) 0))


(defun bit-vector-cardinal (v)
(let ((cardinal 0))
(DOTIMES (ELEMENT (LENGTH v))
(UNLESS (ZEROP (AREF v ELEMENT)) (incf cardinal)))
cardinal));;bit-vector-cardinal

#+bset
(DEFUN COPY-BIT-VECTOR-TO-BSET (BV BS)
(BSET:ASSIGN-EMPTY BS)
(DOTIMES (ELEMENT (LENGTH BV))
(UNLESS (ZEROP (AREF BV ELEMENT)) (BSET:INCLUDE BS ELEMENT)))
bs);;COPY-BIT-VECTOR-TO-BSET


#+clisp (ext:gc)
#+sbcl (sb-ext:gc)

(defparameter stat-text
(with-output-to-string (*trace-output*)
(dolist (ls '((100 65536) (1000 256) (10000 32)))
(let ((loops (first ls))(size (second ls)))
(defparameter p (make-random-bit-vector size))
(defparameter q (make-random-bit-vector size))
(defparameter r (make-random-bit-vector size))
(defparameter s (make-random-bit-vector size))
#+bset
(progn
(DEFPARAMETER BP (BSET:MAKE-BSET size))
(DEFPARAMETER BQ (BSET:MAKE-BSET size))
(DEFPARAMETER BR (BSET:MAKE-BSET size))
(DEFPARAMETER BS (BSET:MAKE-BSET size))
(COPY-BIT-VECTOR-TO-BSET P BP)
(COPY-BIT-VECTOR-TO-BSET Q BQ))
(defparameter ip (copy-bit-vector-to-integer p))
(defparameter iq (copy-bit-vector-to-integer q))
(format *trace-output* "~&:try (:loops ~D :size ~D)~%" loops size)
(format *trace-output* "~&:kind (integer func)~%")
(finish-output *trace-output*)
(let ((c 0))
(time (dotimes (i loops)
(let ((s (integer-difference
(integer-union ip iq)
(integer-intersection ip iq))))
(dotimes (e size)
(when (integer-contains s e) (incf c)))))))
(format *trace-output* "~&:kind (bit-vector-bit func)~%")
(finish-output *trace-output*)
(let ((c 0))
(time (dotimes (i loops)
(let ((s (bit-vector-difference
(bit-vector-union p q)
(bit-vector-intersection p q))))
(dotimes (e size)
(when (bit-vector-contains s e) (incf c)))))))
(format *trace-output* "~&:kind (bit-vector-map func)~%")
(finish-output *trace-output*)
(let ((c 0))
(time (dotimes (i loops)
(let ((s (bit-vector-difference-m
(bit-vector-union-m p q)
(bit-vector-intersection-m p q))))
(dotimes (e size)
(when (bit-vector-contains s e) (incf c)))))))
(format *trace-output* "~&:kind (bit-vector-bit proc)~%")
(finish-output *trace-output*)
(let ((c 0))
(time (dotimes (i loops)
(bit-vector-assign-b2 r p)
(bit-vector-intersection-b2 r q)
(bit-vector-assign-b2 s p)
(bit-vector-union-b2 s q)
(bit-vector-difference-b2 s r)
(dotimes (e size)
(when (bit-vector-contains s e) (incf c))))))
(format *trace-output* "~&:kind (bit-vector-map proc)~%")
(finish-output *trace-output*)
(let ((c 0))
(time (dotimes (i loops)
(bit-vector-assign-m2 r p)
(bit-vector-intersection-m2 r q)
(bit-vector-assign-m2 s p)
(bit-vector-union-m2 s q)
(bit-vector-difference-m2 s r)
(dotimes (e size)
(when (bit-vector-contains s e) (incf c))))))
#+bset
(progn
(format *trace-output* "~&:kind (bset proc)~%")
(finish-output *trace-output*)
(let ((c 0))
(time (dotimes (i loops)
(bset:assign br bp)
(bset:intersection br bq)
(bset:assign bs bp)
(bset:union bs bq)
(bset:difference bs br)
(dotimes (e size)
(when (bset:is-element e bs) (incf c))))))
(finish-output *trace-output*))
)))) ;;stat-text


(defparameter stat-sexp
(block :post
(with-input-from-string (*standard-input* stat-text)
(do ((line (read-line *standard-input* nil nil)
(read-line *standard-input* nil nil))
(result '())
(word))
((null line) (nreverse result))
(block :got-value
(let* ((eof (gensym)))
(macrolet ((read-word
()
`(let ((val (read *standard-input* nil eof)))
(if (eq eof val) (return-from :got-value) val))))
(with-input-from-string
(*standard-input* (substitute (character " ")
(character ":") line))
(setf word (read-word))
(case word
((kind try) (push (list(intern (string word)
(find-package "KEYWORD"))
(read-word)) result))
((real gc evaluation))
((run) (read-word) (push (list :time (read-word)) result))
((space) (push (list :space (read-word)) result))
(otherwise
(when (numberp word)
(read-word)
(cond
((eq (read-word) 'consed.)
(push (list :space word) result))
((eq (read-word) 'user)
(push (list :time word) result))))))))))))));;stat-sexp

;; (format t "~&~S~%" stat-sexp)

(defparameter stats
(read-from-string
(format nil "#4A~S"
(list
(let ((result (list)))
(dolist (item stat-sexp)
(case (car item)
((:try) (push (list) result))
((:kind) (push (list) (car result)))
((:time) (push (cadr item) (caar result)))
((:space) (push (cadr item) (cdaar result)))))
result)))));;stats

;;(format t "~&~S~%" stats)


(defun normalize-stats (stats)
(let ((normalized (make-array (array-dimensions stats))))
(dotimes (try (array-dimension stats 1))
(dotimes (cl (array-dimension stats 0))
(dotimes (kind (array-dimension stats 3))
(let ((max (apply
(function max)
(loop for imp
from 0 to (1- (array-dimension stats 2))
collect (aref stats cl try imp kind)
into res
return res) )))
(dotimes (imp (array-dimension stats 2))
(setf (aref normalized cl try imp kind)
(coerce
(if (= 0 max)
0 (/ (aref stats cl try imp kind) max)) 'float))))
)))
normalized));;normalize-stats


(defun print-stats (titles stats &key normalized order)
;;order: '(0 1 2 3) '(1 2 0 3)
(let ((ivar (make-array '(4)))
(iorder (map 'vector (function identity) order)))
(format t "~&~20A " "")
(dotimes (k (length (elt titles (aref iorder 2))))
(dotimes (l (length (elt titles (aref iorder 3))))
(format t "~8A " (elt (elt titles (aref iorder 2)) k))))
(format t "~&~20A " "")
(dotimes (k (length (elt titles (aref iorder 2))))
(dotimes (l (length (elt titles (aref iorder 3))))
(format t "~8A " (elt (elt titles (aref iorder 3)) l))))
(dotimes (i (length (elt titles (aref iorder 0))))
(setf (aref ivar (aref iorder 0)) i)
(format t "~&~20A "(elt (elt titles (aref iorder 0)) i))
(dotimes (j (length (elt titles (aref iorder 1))))
(setf (aref ivar (aref iorder 1)) j)
(format t "~& ~18A "(elt (elt titles (aref iorder 1)) j))
(dotimes (k (length (elt titles (aref iorder 2))))
(setf (aref ivar (aref iorder 2)) k)
(dotimes (l (length (elt titles (aref iorder 3))))
(setf (aref ivar (aref iorder 3)) l)
(let ((value (aref stats (aref ivar 0)(aref ivar 1)(aref ivar 2)(aref ivar 3))))
(cond
(normalized (format t "~8,2f " value))
((integerp value) (format t "~8Dk " (truncate value 1000)))
(t (format t "~8,2f " value))))))))));;print-stats


(defparameter titles
'((#+clisp clisp #+sbcl sbcl)
("100x65536" "1000x256" "10000x32")
("integer(func)"
"bvector-bit(func)" "bvector-map(func)"
"bvector-bit(proc)" "bvector-map(proc)"
#+bset "bset(proc)")
("time" "space")));;titles


(defparameter normalized (normalize-stats stats))


(progn (print-stats titles stats :order '(1 2 0 3))
(print-stats titles normalized :order '(1 2 0 3)))

;;;; bv-test.lisp -- 2004-03-01 04:09:45 -- pascal ;;;;

0 new messages