This is experimental for now, but the code should work just fine:
typedef signed int ac_i686_intword_t;
/* must be three adjacent words and aligned on 128 boundary */
typedef struct
__attribute__( (packed) )
__attribute__( (aligned( 128 )) )
ac_i686_mutex_
{
ac_i686_intword_t m1;
ac_i686_intword_t m2;
ac_i686_intword_t t;
} ac_i686_mutex_t;
#define ac_i686_mutex_init( ac_macro_this ) \
(ac_macro_this)->m1 = 0; \
(ac_macro_this)->m2 = 0; \
(ac_macro_this)->t = 1
extern void ac_i686_mutex_lock_1( ac_i686_mutex_t* );
extern void ac_i686_mutex_unlock_1( ac_i686_mutex_t* );
extern void ac_i686_mutex_lock_2( ac_i686_mutex_t* );
extern void ac_i686_mutex_unlock_2( ac_i686_mutex_t* );
align 16
ac_i686_mutex_lock_1 PROC
mov eax, [esp + 4]
mov ecx, 1
mov [eax], ecx
mov ecx, 2
mov [eax + 8], ecx
mfence ; load-after-store
ac_i686_mutex_lock_1_retry:
mov ecx, 0
cmp ecx, [eax + 4]
je ac_i686_mutex_lock_1_done
pause
mov ecx, 1
cmp ecx, [eax + 8]
je ac_i686_mutex_lock_1_done
pause
jmp ac_i686_mutex_lock_1_retry
ac_i686_mutex_lock_1_done:
ret
ac_i686_mutex_lock_1 ENDP
align 16
ac_i686_mutex_unlock_1 PROC
mov ecx, [esp + 4]
mov eax, 0
mov [ecx], eax
ret
ac_i686_mutex_unlock_1 ENDP
align 16
ac_i686_mutex_lock_2 PROC
mov eax, [esp + 4]
mov ecx, 1
mov [eax + 4], ecx
mov ecx, 1
mov [eax + 8], ecx
mfence ; load-after-store
ac_i686_mutex_lock_2_retry:
mov ecx, 0
cmp ecx, [eax]
je ac_i686_mutex_lock_2_done
pause
mov ecx, 2
cmp ecx, [eax + 8]
je ac_i686_mutex_lock_2_done
pause
jmp ac_i686_mutex_lock_2_retry
ac_i686_mutex_lock_2_done:
ret
ac_i686_mutex_lock_2 ENDP
align 16
ac_i686_mutex_unlock_2 PROC
mov eax, [esp + 4]
mov ecx, 0
mov [eax + 4], ecx
ret
ac_i686_mutex_unlock_2 ENDP
--
http://appcore.home.comcast.net/
(portable lock-free data-structures)
this is redundant, you can remove it.