Google Groups no longer supports new Usenet posts or subscriptions. Historical content remains viewable.
Dismiss

Mutex, Compare and swap

19 views
Skip to first unread message

Johan Ur Riise

unread,
Mar 3, 2009, 11:11:31 PM3/3/09
to
;; Here is a fun experiment. It demonstrates the effect of two strategies
;; for handling concurrent update in sbcl, mutexes and compare-and-swap,
;; plus the effect of mindlessly updating.

;; All experiments run a number of threads, each thread wants to
;; increment a variable a number of times, then decrement the
;; variable the same number of times. The variable is shared between
;; the threads. Since the threads doesnt do anything else, the
;; contention is high.

;; First the functions and macros that make up my test-harness.

;; Just to save some keystrokes:
(defun thread-name ()
(sb-thread:thread-name sb-thread:*current-thread*))

;; Traditional locking for print to avoid cut off lines etc

(defparameter *say-mutex* (sb-thread:make-mutex))

;; stdout is a closed over variable in the thread closures. It contains
;; *standard-output* from the repl.
(defmacro say (&rest forms)
"Needed to print from threads in slime"
`(sb-thread:with-mutex (*say-mutex*)
(funcall 'format stdout ,@forms)
(fresh-line stdout)
(finish-output stdout)))

;; Saving more keystrokes
(defmacro elapsed ()
`(- (get-internal-real-time) start))

;; Creates a thread function (closure) with the supplied body,
;; starts a number of threads and waits for them to complete.

(defmacro with-threads ((thread-count)
&body body)
(let ((threadno (gensym)))
`(mapcar #'sb-thread:join-thread
(loop for ,threadno below ,thread-count collect
(sb-thread:make-thread
(lambda ()
,@body)
:name (format nil "Thread-~d" ,threadno))))))

;; Binding some variables
(defmacro with-mumble (&body body)
`(let ((start (get-internal-real-time))
(stdout *standard-output*))
,@body))

;; The data we are going to work on
(defparameter *data* 0)

;; Reports the data
(defmacro conclude (text data)
`(say "~a rest-value: ~d (~a), operations: ~d time used: ~d ms"
,text
,data
(if (= 0 ,data) "OK" "WRONG")
(* 2 iteration-count thread-count)
(elapsed)))

;;The first test increments and decrements a lexical variable.
;; With only 100 iterations, it seems to work mostly ok
;;CL-USER> (t0 100 4)
;;Thread-0 done
;;Thread-1 done
;;Thread-2 done
;;Thread-3 done
;;Unprotected, lexical, rest-value: 0 (OK), operations: 800 time used: 2 ms

;; but with about 3000 iterations it fails sometimes, and with 10000 iterations
;; it fails each time

;;CL-USER> (t0 10000 4)
;;Thread-2 done
;;Thread-1 done
;;Thread-0 done
;;Thread-3 done
;;Unprotected, lexical, rest-value: -3334 (WRONG), operations: 80000 time used: 5 ms
;;Here is the test function
(defun t0 (iteration-count thread-count)
(with-mumble
(let ((data 0))
(with-threads (thread-count)
(loop for x below iteration-count do (incf data))
(loop for x below iteration-count do (decf data))
(say ";;~a done" (thread-name)))
(conclude ";;Unprotected, lexical," data))))

;; The next test works on a special variable, the result is
;; just like previous example
;; CL-USER> (t1 10000 6)
;;Thread-0 done
;;Thread-1 done
;;Thread-2 done
;;Thread-4 done
;;Thread-3 done
;;Thread-5 done
;;Unprotected, special, rest-value: 3880 (WRONG), operations: 120000 time used: 9 ms
;;Here is the test function
(defun t1 (iteration-count thread-count)
(with-mumble
(setf *data* 0)
(with-threads (thread-count)
(loop for x below iteration-count do (incf *data*))
(loop for x below iteration-count do (decf *data*))
(say ";;~a done" (thread-name)))
(conclude ";;Unprotected, special," *data*)))

;; Another one, incrementing with discrete instructions, don't know if these
;; are optimized to be like incf, anyway the result is the same
(defun t1% (iteration-count thread-count)
(with-mumble
(setf *data* 0)
(with-threads (thread-count)
(loop for x below iteration-count do (let ((new (1+ *data*))) (setf *data* new)))
(loop for x below iteration-count do (let ((new (1- *data*))) (setf *data* new)))
(say ";;~a done" (thread-name)))
(conclude ";;Unprotected, special," *data*)))

;;We have to protect the data from concurrent update from multiple threads, this
;;test uses traditional locks
;;Here we get the correct result each time.
;;CL-USER> (t2 10000 4)
;;Thread-0 done
;;Thread-2 done
;;Thread-1 done
;;Thread-3 done
;;Mutex, rest-value: 0 (OK), operations: 80000 time used: 91 ms

;;CL-USER> (t2 100000 12)
;;Thread-9 done
;;Thread-0 done
;;Thread-5 done
;;Thread-3 done
;;Thread-11 done
;;Thread-6 done
;;Thread-8 done
;;Thread-7 done
;;Thread-1 done
;;Thread-10 done
;;Thread-4 done
;;Thread-2 done
;;Mutex, rest-value: 0 (OK), operations: 2400000 time used: 5209 ms

;;the test-function with locking:
(defun t2 (iteration-count thread-count)
(with-mumble
(let ((mutex (sb-thread:make-mutex)))
(setf *data* 0)
(with-threads (thread-count)
(loop for x below iteration-count do (sb-thread:with-mutex (mutex) (incf *data*)))
(loop for x below iteration-count do (sb-thread:with-mutex (mutex) (decf *data*)))
(say ";;~a done" (thread-name)))
(conclude ";;Mutex," *data*))))


;; Now to the mysterious compare-and-swap. This function takes a place, a new value
;; and an old value. It tries to set the place to the new value in an atomic operation,
;; but only if the existing value of place is as specified in old. If the return value
;; is something different from what we specified as old value, the swap is not
;; performed. The reason for this would be that another thread has changed the value
;; in the meantime.

;; The operation does not use locks, but since we might not succeed, we have to retry
;; it until we do. This macro applies function to place using compare-and-swap,
;; retrying until it succeeds. Each time it does not succeed, it increments a
;; counter that has to be bound in advance.

(defmacro protect (place function)
`(let (old new)
(loop
do
(setf old ,place)
(setf new (funcall ,function old))
until (eql old (compare-and-swap ,place old new))
do (incf unsuccessful-cas))))

;; Here is the test, doing the same increment and decrement of the common
;; variable.

(defun t3 (iteration-count thread-count)
(with-mumble
(setf *data* 0)
(with-threads (thread-count)
(let ((unsuccessful-cas 0))
(loop for x below iteration-count do
(protect (symbol-value '*data*) #'1+))
(loop for x below iteration-count do
(protect (symbol-value '*data*) #'1-))
(say ";;~a unsuccessful-cas ~d" (thread-name) unsuccessful-cas)))
(conclude ";;CAS," *data*)))

;;The first run has only 1000 inc's and dec's, and all cas operations
;;succeeds
;;CL-USER> (t3 1000 4)
;;Thread-0 unsuccessful-cas 0
;;Thread-1 unsuccessful-cas 0
;;Thread-2 unsuccessful-cas 0
;;Thread-3 unsuccessful-cas 0
;;CAS, rest-value: 0 (OK), operations: 8000 time used: 3 ms

;; Next we increase the count to 10000. Note the high
;; number of retries:
;;CL-USER> (t3 10000 4)
;;Thread-0 unsuccessful-cas 5952
;;Thread-1 unsuccessful-cas 5937
;;Thread-2 unsuccessful-cas 15762
;;Thread-3 unsuccessful-cas 17472
;;CAS, rest-value: 0 (OK), operations: 80000 time used: 17 ms

;; Let's compare with mutexes, these are the same number of
;; iterations and threads as the largest mutex-example:
;;CL-USER> (t3 100000 12)
;;Thread-0 unsuccessful-cas 220289
;;Thread-3 unsuccessful-cas 213210
;;Thread-1 unsuccessful-cas 297819
;;Thread-6 unsuccessful-cas 243704
;;Thread-7 unsuccessful-cas 267340
;;Thread-2 unsuccessful-cas 312256
;;Thread-5 unsuccessful-cas 236505
;;Thread-11 unsuccessful-cas 201204
;;Thread-10 unsuccessful-cas 234383
;;Thread-4 unsuccessful-cas 342688
;;Thread-8 unsuccessful-cas 320228
;;Thread-9 unsuccessful-cas 251330
;;CAS, rest-value: 0 (OK), operations: 2400000 time used: 952 ms

;;As you see, the number of retries is about the same as iterations
;; in total, still the time used is quite lower than the mutex
;;example, 952 ms in stead of 5209 with mutex.


;; insurance against printing the big array
(setf *print-length* 40)

;; Here I use a big array of conses to operate on. The idea is
;; that there should be less unsuccessful cas's when the
;; updating is spread around. I can not use just an array of
;; values, because CAS can not operate on arrays. See
;; (describe 'compare-and-swap)

;; There are still a lot of unsuccessful cas, I don't understand
;; that.

;; In these tests, the CAS function runs about 8 times faster
;; than the mutex function

(defun t4 (iteration-count thread-count)
(let ((array (make-array 1000000)))
(loop for index below (length array) do (setf (aref array index) (cons 0 nil)))
(with-mumble
(let ()
(with-threads (thread-count)
(let ((unsuccessful-cas 0)
(*random-state* (make-random-state t)))
(loop for x below iteration-count do
(let ((cons (aref array (random (length array)))))
(protect (car cons) #'1+)))
(loop for x below iteration-count do
(let ((cons (aref array (random (length array)))))
(protect (car cons) #'1-)))
(say ";;~a unsuccessful-cas ~d" (thread-name) unsuccessful-cas)))
(let ((rest (loop for x across array sum (car x))))
(say ";;Array CAS rest-value: ~d (~a), operations: ~d time used: ~d ms"
rest
(if (= 0 rest) "OK" "WRONG")
(* 2 iteration-count thread-count)
(elapsed)
))))))
;;CL-USER> (t4 100000 12)
;;Thread-10 unsuccessful-cas 3638
;;Thread-4 unsuccessful-cas 5831
;;Thread-5 unsuccessful-cas 13015
;;Thread-0 unsuccessful-cas 6305
;;Thread-11 unsuccessful-cas 5650
;;Thread-1 unsuccessful-cas 15040
;;Thread-9 unsuccessful-cas 3079
;;Thread-2 unsuccessful-cas 20865
;;Thread-3 unsuccessful-cas 14628
;;Thread-6 unsuccessful-cas 15771
;;Thread-8 unsuccessful-cas 28106
;;Thread-7 unsuccessful-cas 14053
;;Array CAS rest-value: 0 (OK), operations: 2400000 time used: 866 ms

(defun t5 (iteration-count thread-count)
(let ((array (make-array 1000000)))
(loop for index below (length array) do (setf (aref array index) (cons 0 nil)))
(with-mumble
(let ((mutex (sb-thread:make-mutex)))
(with-threads (thread-count)
(let ((*random-state* (make-random-state t)))
(loop for x below iteration-count do
(let ((cons (aref array (random (length array)))))
(sb-thread:with-mutex (mutex) (incf (car cons)))))
(loop for x below iteration-count do
(let ((cons (aref array (random (length array)))))
(sb-thread:with-mutex (mutex) (decf (car cons)))))
(say ";;~a finished" (thread-name))))
(let ((rest (loop for x across array sum (car x))))
(say ";;Array mutex rest-value: ~d (~a), operations: ~d time used: ~d ms"
rest
(if (= 0 rest) "OK" "WRONG")
(* 2 iteration-count thread-count)
(elapsed)
))))))

;;CL-USER> (t5 100000 12)
;;Thread-2 finished
;;Thread-6 finished
;;Thread-8 finished
;;Thread-1 finished
;;Thread-7 finished
;;Thread-0 finished
;;Thread-3 finished
;;Thread-4 finished
;;Thread-10 finished
;;Thread-5 finished
;;Thread-9 finished
;;Thread-11 finished
;;Array mutex rest-value: 0 (OK), operations: 2400000 time used: 6853 ms

;; The hardware used in the test are a dual processor Xeon
;; with hyperthreads from a few years back.

;;CL-USER> (time (t4 100000 12))
;;Thread-1 unsuccessful-cas 30462
;;Thread-0 unsuccessful-cas 29751
;;Thread-2 unsuccessful-cas 16
;;Thread-7 unsuccessful-cas 2
;;Thread-3 unsuccessful-cas 23
;;Thread-4 unsuccessful-cas 3
;;Thread-6 unsuccessful-cas 20613
;;Thread-8 unsuccessful-cas 0
;;Thread-9 unsuccessful-cas 1
;;Thread-5 unsuccessful-cas 20965
;;Thread-10 unsuccessful-cas 2
;;Thread-11 unsuccessful-cas 0
;;Array CAS rest-value: 0 (OK), operations: 2400000 time used: 607 ms
;; Evaluation took:
;; 0.631 seconds of real time
;; 1.964122 seconds of total run time (1.948121 user, 0.016001 system)
;; 311.25% CPU
;; 1,511,523,688 processor cycles
;; 12,171,832 bytes consed

;;cat /proc/cpuinfo lists 4 cpus

;;model name : Intel(R) Xeon(TM) CPU 2.40GHz
;;cpu MHz : 2396.048
;;cache size : 512 KB

0 new messages