Fwiw, here is a simple example of how one can use C++ atomics and
membars to create a fairly efficient semaphore. Its based on the lowest
common denominator, a semaphore created with std C++ mutex and condvar
relationship. If you are interested and happen to have some free time to
kill, go ahead and take a look at the code in here:
http://pastebin.com/raw/Q7p6e7Xc
(no ads in this link to pastebin)
Notice the cpp_sem object:
______________________________
// Basic, bare bones C++ Semaphore
struct cpp_sem
{
int m_count;
std::mutex m_mutex;
std::condition_variable m_cond;
cpp_sem(int count = 0) : m_count(count)
{
assert(count > -1);
}
void dec() // wait
{
std::unique_lock<std::mutex> lock(m_mutex);
while (m_count == 0) m_cond.wait(lock);
--m_count;
}
void inc() // post
{
{
std::unique_lock<std::mutex> lock(m_mutex);
++m_count;
}
m_cond.notify_one();
}
};
______________________________
now, this is not optimized at all, wrt trying to avoid the mutex. Take a
look at the following thin layer on top of this, in std C++ and can
eliminate many calls to the "slow" underlying blocking nature. Btw, the
original algo was invented by Joe Seigh. If interested, take a look at
the fsem object:
______________________________
// A faster semaphore using a little spice wrt
// "bolting on" std C++ atomics and membars
struct fsem
{
std::atomic<int> m_count; // atomic sema counter
cpp_sem m_cpp_sem;
fsem(int count = 0) : m_count(count) {}
void dec() // wait
{
int count = m_count.fetch_sub(1, mb_relaxed);
if (count < 1) m_cpp_sem.dec(); // conditional wait
// we acquired a signal, acquire membar
mb_fence(mb_acquire);
}
void inc() // post
{
// we are going to release a signal, release membar
mb_fence(mb_release);
int count = m_count.fetch_add(1, mb_relaxed);
if (count < 0) m_cpp_sem.inc();
}
};
______________________________
Imvho, this is fairly straightforward logic.
Take note of the calls to m_count.fetch_add/sub(), and the calls to the
mb_fence macro, defined as:
______________________________
#define mb_relaxed std::memory_order_relaxed
#define mb_consume std::memory_order_consume
#define mb_acquire std::memory_order_acquire
#define mb_release std::memory_order_release
#define mb_acq_rel std::memory_order_acq_rel
#define mb_seq_cst std::memory_order_seq_cst
#define mb_fence(mb) std::atomic_thread_fence(mb)
______________________________
Here is all the code:
______________________________
// Fast-Semaphore by Joe Seigh
// C++ Implmentation by Chris Thomasson
// Modeled as a semaphore-as-a-mutex test...
#include <cstdio>
#include <deque>
#include <condition_variable>
#include <mutex>
#include <memory>
#include <thread>
#include <atomic>
#include <algorithm>
#include <cassert>
#define mb_relaxed std::memory_order_relaxed
#define mb_consume std::memory_order_consume
#define mb_acquire std::memory_order_acquire
#define mb_release std::memory_order_release
#define mb_acq_rel std::memory_order_acq_rel
#define mb_seq_cst std::memory_order_seq_cst
#define mb_fence(mb) std::atomic_thread_fence(mb)
#define THREADS 7
#define N 1000000
static std::mutex g_std_out_mutex;
// Basic, bare bones C++ Semaphore
struct cpp_sem
{
int m_count;
std::mutex m_mutex;
std::condition_variable m_cond;
cpp_sem(int count = 0) : m_count(count)
{
assert(count > -1);
}
void dec() // wait
{
std::unique_lock<std::mutex> lock(m_mutex);
while (m_count == 0) m_cond.wait(lock);
--m_count;
}
void inc() // post
{
{
std::unique_lock<std::mutex> lock(m_mutex);
++m_count;
}
m_cond.notify_one();
}
};
// A faster semaphore using a little spice wrt
// "bolting on" C++ atomics and membars
struct fsem
{
std::atomic<int> m_count; // atomic sema counter
cpp_sem m_cpp_sem;
fsem(int count = 0) : m_count(count) {}
void dec() // wait
{
int count = m_count.fetch_sub(1, mb_relaxed);
if (count < 1) m_cpp_sem.dec(); // conditional wait
// we acquired a signal, acquire membar
mb_fence(mb_acquire);
}
void inc() // post
{
// we are going to release a signal, release membar
mb_fence(mb_release);
int count = m_count.fetch_add(1, mb_relaxed);
if (count < 0) m_cpp_sem.inc();
}
};
// Some generic user state
struct shared_user_state
{
fsem m_sema; // acts as mutex sema with init count of 1
int m_user_state;
shared_user_state() : m_sema(1), m_user_state(0) {}
};
void producer_thread(
unsigned int id,
shared_user_state& sustate
) {
{
std::unique_lock<std::mutex> lock(g_std_out_mutex);
std::printf("producer_thread(%u)::sustate(%p) - Entry\n", id,
(void*)&sustate);
}
for (unsigned int i = 0; i < N; ++i)
{
// atomic state mutation
sustate.m_sema.dec();
int user_state = ++sustate.m_user_state;
sustate.m_sema.inc();
if (!(i % 1003))
{
{
std::unique_lock<std::mutex> lock(g_std_out_mutex);
std::printf("producer_thread(%u)::sustate(%p)::user_state(%d)\n",
id, (void*)&sustate, user_state);
}
}
}
{
std::unique_lock<std::mutex> lock(g_std_out_mutex);
std::printf("producer_thread(%u)::sustate(%p) - Exit\n", id,
(void*)&sustate);
}
}
void consumer_thread(
unsigned int id,
shared_user_state& sustate
) {
{
std::unique_lock<std::mutex> lock(g_std_out_mutex);
std::printf("consumer_thread(%u)::sustate(%p) - Entry\n", id,
(void*)&sustate);
}
for (unsigned int i = 0; i < N; ++i)
{
// atomic state mutation
sustate.m_sema.dec();
int user_state = --sustate.m_user_state;
sustate.m_sema.inc();
if (!(i % 1003))
{
{
std::unique_lock<std::mutex> lock(g_std_out_mutex);
std::printf("consumer_thread(%u)::sustate(%p)::user_state(%d)\n",
id, (void*)&sustate, user_state);
}
}
}
{
std::unique_lock<std::mutex> lock(g_std_out_mutex);
std::printf("consumer_thread(%u)::sustate(%p) - Exit\n", id,
(void*)&sustate);
}
}
int main(void)
{
{
shared_user_state sustate;
std::thread consumers[THREADS];
std::thread producers[THREADS];
for (unsigned int i = 0; i < THREADS; ++i)
{
consumers[i] = std::thread(
consumer_thread,
i + 0,
std::ref(sustate)
);
producers[i] = std::thread(
producer_thread,
i + 1,
std::ref(sustate)
);
}
for (unsigned int i = 0; i < THREADS; ++i)
{
producers[i].join();
consumers[i].join();
}
std::printf("sustate(%p)::m_user_state(%d)\n",
(void*)&sustate, sustate.m_user_state);
assert(sustate.m_user_state == 0);
}
std::printf("\nComplete, hit <ENTER> to exit...\n");
std::fflush(stdout);
std::getchar();
return 0;
}
______________________________
Can anybody run this stuff?
;^o
I will happily answer any questions, and would love comments,
criticisms, all of it.