C++ native-win32 waitset class for eventcount...

139 views
Skip to first unread message

Chris Thomasson

unread,
Nov 4, 2006, 3:02:16 PM11/4/06
to
Here is the C++ for the win32 waitset; it should compile:


<-------------------->
#include <windows.h>
#include <limits.h>


namespace win32 {
class waitset {
enum prv_flags_e {
HANDLE_DEPTH = 2,
HANDLE_SIGNAL = 0,
HANDLE_MUTEX = 1
};

HANDLE m_handles[HANDLE_DEPTH];
LONG m_waiters;


public:
waitset() : m_waiters(0) {
m_handles[HANDLE_SIGNAL] = ::CreateSemaphore(NULL, 0, LONG_MAX, NULL);
m_handles[HANDLE_MUTEX] = ::CreateMutex(NULL, FALSE, NULL);
}

~waitset() {
::CloseHandle(m_handles[HANDLE_MUTEX]);
::CloseHandle(m_handles[HANDLE_SIGNAL]);
}


private:
bool handle_wait(HANDLE *handles, DWORD depth, DWORD tout) {
DWORD ret = ::WaitForMultipleObjects(depth, handles, TRUE, tout);
if (ret == WAIT_TIMEOUT) { return false; }
else if (ret == WAIT_FAILED) { throw; }
return true;
}


public:
bool timedlock(DWORD tout) {
return handle_wait(&m_handles[HANDLE_MUTEX], 1, tout);
}

bool trylock() { return timedlock(0); }
void lock() { timedlock(INFINITE); }
void unlock() { ::ReleaseMutex(m_handles[HANDLE_MUTEX]); }


public:
bool timedwait(DWORD tout) {
++m_waiters;
unlock();
if (! handle_wait(m_handles, HANDLE_DEPTH, tout)) {
lock();
if (! handle_wait(&m_handles[HANDLE_SIGNAL], 1, 0)) {
--m_waiters;
return false;
}
}
return true;
}

bool trywait() { return timedwait(0); }
void wait() { timedwait(INFINITE); }


public:
void signal_and_unlock(int count = 1) {
if (m_waiters) {
if (m_waiters > count) {
m_waiters -= count;
} else {
count = m_waiters;
m_waiters = 0;
}
::ReleaseSemaphore(m_handles[HANDLE_SIGNAL], count, 0);
}
unlock();
}

void broadcast_and_unlock() { signal_and_unlock(m_waiters); }
};

} // namespace win32 {

</--------------------------->


Here is a tweaked pseudo-code version of my eventcount that makes use of the
win32 waitset class interface:


class eventcount
{
typedef win32::waitset waitset_t;
enum flags_e { WAITER_FLAG = 0x80000000, WAITER_MASK = 0x7FFFFFFF };


public:
typedef int count_t;


private:
mutable count_t volatile m_count;
waitset_t m_wset;


public:
eventcount() : m_count( 0 ) {}


private:
bool inc(count_t local) {
if (local & WAITER_FLAG) {
m_wset.lock();

while (! ATOMIC_CAS(&m_count, local, (local + 1) & WAITER_MASK)) {
local = ATOMIC_LOAD(&m_count);
}

return true;
}
return false;
}


public:
count_t get() const throw() {
count_t local = ATOMIC_LOAD(&m_count);
membar #StoreLoad | #LoadLoad;
ATOMIC_OR(&m_count, WAITER_FLAG);
return local & WAITER_MASK;
}


public:
void signal() {
membar #StoreLoad | #StoreStore;
if (inc(ATOMIC_LOAD(&m_count))) {
m_wset.signal_and_unlock();
}
}

void broadcast() {
membar #StoreLoad | #StoreStore;
if (inc(ATOMIC_LOAD(&m_count))) {
m_wset.broadcast_and_unlock();
}
}


public:
bool trywait(count_t cmp) {
bool ret = true;
int local = ATOMIC_LOAD(&m_count) & WAITER_MASK;
membar #LoadStore | #LoadLoad;
if (local == cmp) {
ret = m_wset.trylock();
if (ret) {
local = m_count & WAITER_MASK;
while(ret && local == cmp) {
ret = m_wset.trywait();
local = m_count & WAITER_MASK;
}
m_wset.unlock();
}
}
return (ret || (local != cmp));
}

bool timedwait(count_t cmp, const struct timespec *tout) {
bool ret = true;
int local = ATOMIC_LOAD(&m_count) & WAITER_MASK;
membar #LoadStore | #LoadLoad;
if (local == cmp) {
ret = m_wset.timedlock(tout);
if (ret) {
local = m_count & WAITER_MASK;
while(ret && local == cmp) {
if (tout) {
ret = m_wset.timedwait(TIMESPEC_TO_MILLISECONDS(tout));
} else {
m_wset.wait();
}
local = m_count & WAITER_MASK;
}
m_wset.unlock();
}
}
return (ret || (local != cmp));
}

void wait(count_t cmp) { (void)timedwait(cmp, 0); }
};


Now my eventcount algorithm for windows doesn't have to depend on
pthread-win32!

;^)


Any thoughts?


Chris Thomasson

unread,
Nov 4, 2006, 8:22:31 PM11/4/06
to
Just though of something... You can do a futex emulation with the win32
waitset thing I posted... Something like this:

Add the following function to win32::waitset


public:
int get_waiters_sys() const throw() {
return m_waiters;
}

int get_signal_sys(int count) {


if (m_waiters) {
if (m_waiters < count) {

count = m_waiters;
}
return count;
}
return 0;
}

And add the following namespace somewhere:

// this should compile fine...
namespace futex {
enum flags_e {
WAITERS_FLAG = 0x80000000,
WAITERS_MASK = 0x7FFFFFFF
};

typedef LONG type_t;
typedef win32::waitset waitset_t;


namespace sys {
type_t load_and_adjust_waiters(type_t volatile *pfutex, int waiters) {
type_t tmp, cmp = *pfutex;
do {
tmp = cmp;
cmp = ::InterlockedCompareExchange(
pfutex, (! waiters) ? cmp & WAITERS_MASK : cmp | WAITERS_FLAG,
cmp);
} while(cmp != tmp);
return cmp;
}

bool validate_and_adjust_waiters(type_t volatile *pfutex, type_t cmp,
int waiters) {
return (load_and_adjust_waiters(pfutex, waiters) & WAITERS_MASK != cmp
& WAITERS_MASK);
}

bool validate(type_t volatile *pfutex, type_t cmp) {
return (*pfutex & WAITERS_MASK != cmp & WAITERS_MASK);
}
}


bool timedwait(type_t volatile *pfutex, type_t cmp, DWORD tout, waitset_t
&wset) {

// non-atomic waiters get; harmless here.
if (sys::validate_and_adjust_waiters(pfutex, cmp,
wset.get_waiters_sys())) {
return true;
}

wset.lock();

bool ret = true;

// atomic waiters get; double-check the non-atomic get.
bool valid = sys::validate_and_adjust_waiters(pfutex, cmp,
wset.get_waiters_sys());
while(ret && ! valid) {
sys::load_and_adjust_waiters(pfutex, 1);
ret = wset.timedwait(tout);
valid = sys::validate_and_adjust_waiters(pfutex, cmp,
wset.get_waiters_sys());
}
wset.unlock();
return valid;
}


bool wait(type_t volatile *pfutex, type_t cmp, waitset_t &wset) {
return timedwait(pfutex, cmp, INFINITE, wset);
}


int signal(type_t volatile *pfutex, int signals, waitset_t &wset) {
wset.lock();
signals = wset.get_signal_sys(signals);
sys::load_and_adjust_waiters(pfutex, wset.get_waiters_sys() - signals);
wset.signal_and_unlock(signals);
return signals;
}


} // namespace futex {


// sample usage...
static futex::waitset_t g_futex_wset;
static futex::type_t g_futex = 0;


void mutex_lock() {
while(! ATOMIC_BTS(&g_futex, 0x1)) {
futex::wait(&g_futex, 0x1, g_futex_wset);
}
}


void mutex_unlock() {
if (ATOMIC_SWAP(&g_futex, 0) & futex::WAITERS_FLAG) {
futex::signal(&g_futex, 1, g_futex_wset);
}
}


Cool! Futexs on Windows!


lol...

;^)


Joe Seigh

unread,
Nov 5, 2006, 8:17:31 AM11/5/06
to
Chris Thomasson wrote:
> Just though of something... You can do a futex emulation with the win32
> waitset thing I posted... Something like this:
>
>
>
>
>
> Cool! Futexs on Windows!
>
>

I was expecting a SignalObjectAndWait in the waitset stuff somethere.
But Semaphores would have the same problem that Events have with
kernel APC calls.


--
Joe Seigh

When you get lemons, you make lemonade.
When you get hardware, you make software.

Chris Thomasson

unread,
Nov 5, 2006, 10:50:05 AM11/5/06
to
"Joe Seigh" <jsei...@xemaps.com> wrote in message
news:CfOdnSV8V7pgftDY...@comcast.com...

> Chris Thomasson wrote:
>> Just though of something... You can do a futex emulation with the win32
>> waitset thing I posted... Something like this:

[...]

>> Cool! Futexs on Windows!

[...]

> I was expecting a SignalObjectAndWait in the waitset stuff somethere.
> But Semaphores would have the same problem that Events have with
> kernel APC calls.


I guess I could add this to win32::waitset for the APC stuff:


private:
bool handle_wait_ex(HANDLE *handles, DWORD depth, DWORD tout) {
DWORD tout_start = GetTickCount();
DWORD ret;

for(;;) {
ret = ::WaitForMultipleObjectsEx(depth, handles, TRUE, tout, TRUE);


if (ret == WAIT_TIMEOUT) { return false; }

else if (ret == WAIT_IO_COMPLETION) {
DWORD tout_elapse = GetTickCount() - tout_start;
if (tout_elapse >= tout) { return false; }
tout -= tout_elapse;
tout_start = GetTickCount();
continue;
}

else if (ret != WAIT_FAILED) { break; }
throw;
};

return true;
}

I that what you were getting at?


> I was expecting a SignalObjectAndWait in the waitset stuff somethere.

Luckily, this waitset is specialized for an eventcount, and happens to work
well with a futex emulation thing... That means I don't have to emulate
strict condvar behavior (e.g., Alex Terekhovs win32 condvar algorithm). The
eventcount logic automatically takes care of all that stuff... In fact, why
not just use the eventcount for win32 to implement a speedy, and correct
win32 condvar? I think pthread-win32 algorithm has to use 3 semaphores...
Well, my "special" waitset has one less kernel object. Also, the eventcount
can heavily fast-path basically any condvar logic I throw on top of it... I
don't think that pthread-win32 condvar has a fast-path?


Joe Seigh

unread,
Nov 5, 2006, 12:56:18 PM11/5/06
to
Chris Thomasson wrote:
> "Joe Seigh" <jsei...@xemaps.com> wrote in message
> news:CfOdnSV8V7pgftDY...@comcast.com...
>
>>Chris Thomasson wrote:
>>
>>>Just though of something... You can do a futex emulation with the win32
>>>waitset thing I posted... Something like this:
>
>
> [...]
>
>
>>>Cool! Futexs on Windows!
>
>
> [...]
>
>
>>I was expecting a SignalObjectAndWait in the waitset stuff somethere.
>>But Semaphores would have the same problem that Events have with
>>kernel APC calls.
>
>
[...]

>
>
> I that what you were getting at?
>
>
>
>
>
>>I was expecting a SignalObjectAndWait in the waitset stuff somethere.
>
>
> Luckily, this waitset is specialized for an eventcount, and happens to work
> well with a futex emulation thing... That means I don't have to emulate
> strict condvar behavior (e.g., Alex Terekhovs win32 condvar algorithm). The
> eventcount logic automatically takes care of all that stuff... In fact, why
> not just use the eventcount for win32 to implement a speedy, and correct
> win32 condvar? I think pthread-win32 algorithm has to use 3 semaphores...
> Well, my "special" waitset has one less kernel object. Also, the eventcount
> can heavily fast-path basically any condvar logic I throw on top of it... I
> don't think that pthread-win32 condvar has a fast-path?
>
>

The problem with waitsets is keeping later arrivals from stealing signals
meant for threads already in the wait set. The problem with win32 kernel
APC calls is they take threads out of waiting on a synchronization object
and put it back on later so the thread can miss a signal.

Chris Thomasson

unread,
Nov 5, 2006, 2:48:12 PM11/5/06
to
> The problem with waitsets is keeping later arrivals from stealing signals
> meant for threads already in the wait set.

Well, you can always use Alex's trick and include a bin-sema to force new
waiters to wait until a broadcast to a wait-generation is completed.
However, this senerio is addressed through the functioality of the
eventcount. It has the proper generation logic, so the underlying waitset
should be able to harmlessly steal its own private signals.


On the other hand, you simply cannot use my waitset code as a condition
variable directly because of the exact problem you described... However, I
believe that you can indeed use it for an eventcount. Then you can use the
eventcount to build the condition variable...

Any thoughts?


> The problem with win32 kernel
> APC calls is they take threads out of waiting on a synchronization object
> and put it back on later so the thread can miss a signal.

Yeah, no good for condition variables directly... But, I think it works with
an eventcount because its monotonic version counter double-checked compare
logic should keep the waiter generations bound to their previous gets...
Basically, if they stick to your usage pattern, everything should be fine.
For example, here is "extreme pseudo-code" on how my eventcount interacts
with the win32 waitset:

int get() {
G1: load eventcount and set waiters-bit
G2: mask waiters-bit and return
}


void wait(int cmp) {
W1: load eventcount and mask waiters-bit
W2: compare count to the previous get 'cmp'
W3: if not equal return, otherwise lock the win32::waitset
W4: reload eventcount and mask waiters-bit
W5: compare count to the previous get 'cmp'
W6: if not equal unlock and return, otherwise wait on win32::waitset
W7: goto W4
}


void signal() {
S1: load eventcount
S2: check count for waiters-bit
S3: if not set return, otherwise lock the win32::waitset
S4: inc eventcount and clear waiters-bit
S5: signal_and_unlock the win32::waitset
}


If you couple that with your usage pattern and there should be no problems;
what do you think here?


Chris Thomasson

unread,
Nov 6, 2006, 12:29:25 AM11/6/06
to
"Joe Seigh" <jsei...@xemaps.com> wrote in message
news:CfKdnREEVYnOuNPY...@comcast.com...

> Chris Thomasson wrote:
>> "Joe Seigh" <jsei...@xemaps.com> wrote in message
>> news:CfOdnSV8V7pgftDY...@comcast.com...
>>>Chris Thomasson wrote:

[...]

>>>>Just though of something... You can do a futex emulation with the win32
>>>>waitset thing I posted... Something like this:

[...]

>>>>Cool! Futexs on Windows!


>>>I was expecting a SignalObjectAndWait in the waitset stuff somethere.

>> I that what you were getting at?

[...]

> The problem with waitsets is keeping later arrivals from stealing signals
> meant for threads already in the wait set. The problem with win32 kernel
> APC calls is they take threads out of waiting on a synchronization object
> and put it back on later so the thread can miss a signal.

As you know, this is all solvable by making broadcasts swap out the waitset
and defer it. That way, a waitset can be completely isolated from any "later
arrivals" from stealing a signal from it:


broadcast(old-waitset**) {
B1: pop a new-waitset* from a cache, or create one;
B2: atomically swap old-waitset** with new-waitset*;
B3: broadcast to the swapped waitset; swap-waitset*;
B4: defer swap-waitset*;
}

wait(cur-waitset**) {
W1: atomically acquire local-waitset* from cur-waitset**;
W2: wait on local-waitset
W3: atomically release local-waitset
}


of course this implies atomic reference counting or PDR, but it does work. I
have heavily experimented with this technique while exploring possible
solutions' to the race-condition in POSIX:


http://groups.google.com/group/comp.programming.threads/browse_frm/thread/8b855eab7dc55ad9/14dc31d4327d12f0?lnk=gst&q=and+the+EBUSY+&rnum=2#14dc31d4327d12f0

http://groups.google.com/group/comp.programming.threads/browse_frm/thread/38d68d1289352a53/7e52b3c8552fdae9?lnk=gst&q=and+the+EBUSY+&rnum=1#7e52b3c8552fdae9


However, I think my win32 waitset and eventcount is possibly a better
overall solution. You don't have to manage the memory that makes up the
waitsets and you don't have to create/cache waitsets objects on the fly...
Humm...


Reply all
Reply to author
Forward
0 new messages