Fwiw, I wrote a crude new benchmark that measures how many reads and
writes can be performed in a given amount of time. My algorithm vs
std::shared_mutex. So, we are _primarily_ looking for how many reads can
be performed in this test at 60 seconds. The number of threads is
variable and is determined by std::hardware_concurrency * THREADS, set
at 8 in the test. So on my system the setup is:
___________________________________
cpu_threads_n = 4
threads_n = 32
writers = 16
readers = 16
test duration = 60 seconds
___________________________________
This is going to be different for each system. Readers iterate a shared
linked list with 1000000 nodes in this test. Writers pop and push items,
and never use new or delete. Well, so far, the timings I am getting on
my end using MSVC 2017 is:
Testing: Chris M. Thomasson's Experimental Read/Write Mutex
___________________________________
cpu_threads_n = 4
threads_n = 32
writers = 16
readers = 16
test duration = 60 seconds
___________________________________
___________________________________
Raw Reads: 18046464
Raw Writes: 347498
reads_per_tick = 300693
writes_per_tick = 5790
Ticks = 60.0161
___________________________________
Testing: std::shared_mutex
___________________________________
cpu_threads_n = 4
threads_n = 32
writers = 16
readers = 16
test duration = 60 seconds
___________________________________
___________________________________
Raw Reads: 650006
Raw Writes: 168741
reads_per_tick = 10829
writes_per_tick = 2811
Ticks = 60.0193
___________________________________
As you can see, my algorithm is performing many more reads than
std::shared_mutex. Anyway, here is my code:
When you get some time, can you please give it a go? I want to see
timings on other systems, the good, bad and ugly. ;^) Thanks.
https://pastebin.com/raw/1QtPCGhV
___________________________________
/* Crude Read/Write Mutex Benchmark
This tests my algorithm ct_rwmutex vs. std::shared_mutex.
It shows how many reads and writes can be performed within
a fixed amount of time.
by: Chris M. Thomasson
__________________________________________*/
#include <thread>
#include <atomic>
#include <shared_mutex>
#include <condition_variable>
#include <iostream>
#include <functional>
#include <chrono>
#include <cassert>
#include <cstdlib>
#include <ctime>
#include <climits>
#include <cstdint>
#include <vector>
// undefine to test std::shared_mutex
#define CT_TEST_FAST_MUTEX 1
#define THREADS 8 // multiplied by std::hardware_concurrency
#define NODE_PRIME 1000000 // number of nodes
#define CRUDE_CACHE_PAD 256 // crude cache pad
#define TEST_DURATION_SEC 60 // number of seconds for the test
// bare bones mutex/condvar based semaphore
struct ct_slow_semaphore
{
unsigned long m_state;
std::mutex m_mutex;
std::condition_variable m_cond;
ct_slow_semaphore(unsigned long state) : m_state(state) {}
void inc()
{
m_mutex.lock();
++m_state;
m_mutex.unlock();
m_cond.notify_one();
}
void add(unsigned long addend)
{
{
std::unique_lock<std::mutex> lock(m_mutex);
m_state += addend;
}
m_cond.notify_all();
}
void dec()
{
std::unique_lock<std::mutex> lock(m_mutex);
while (m_state == 0) m_cond.wait(lock);
--m_state;
}
};
// bin-sema
struct ct_auto_reset_event
{
bool m_state;
std::mutex m_mutex;
std::condition_variable m_cond;
ct_auto_reset_event() : m_state(false) {}
void signal()
{
std::unique_lock<std::mutex> lock(m_mutex);
m_state = true;
m_cond.notify_one();
}
void wait()
{
std::unique_lock<std::mutex> lock(m_mutex);
while (m_state == false) m_cond.wait(lock);
m_state = false; // auto-reset
}
};
// just a layer over an auto-reset event
struct ct_fast_mutex
{
std::atomic<unsigned int> m_state;
ct_auto_reset_event m_waitset;
ct_fast_mutex() : m_state(0) {}
void lock()
{
if (m_state.exchange(1, std::memory_order_acquire))
{
while (m_state.exchange(2, std::memory_order_acquire))
{
m_waitset.wait();
}
}
}
void unlock()
{
if (m_state.exchange(0, std::memory_order_release) == 2)
{
m_waitset.signal();
}
}
};
// Chris M. Thomassons Experimental Read/Write Mutex
// Yeah, it is pretty damn fat wrt the state, however
// it has some interesting properties...
// The state can be compressed a bit...
// btw, it has no loops...
// Take a look at the lock_shared and unlock_shared functions
// The number of readers _must_ never exceed LONG_MAX!
#define RWMUTEX_COUNT_MAX LONG_MAX
struct ct_rwmutex
{
unsigned char m_crude_cache_pad_0[CRUDE_CACHE_PAD];
// shared state
std::atomic<long> m_count;
unsigned char m_crude_cache_pad_1[CRUDE_CACHE_PAD];
std::atomic<long> m_rdwake;
unsigned char m_crude_cache_pad_2[CRUDE_CACHE_PAD];
ct_slow_semaphore m_rdwset;
unsigned char m_crude_cache_pad_3[CRUDE_CACHE_PAD];
ct_slow_semaphore m_wrwset;
unsigned char m_crude_cache_pad_4[CRUDE_CACHE_PAD];
ct_fast_mutex m_wrlock;
unsigned char m_crude_cache_pad_5[CRUDE_CACHE_PAD];
ct_rwmutex() :
m_count(RWMUTEX_COUNT_MAX),
m_rdwake(0),
m_rdwset(0),
m_wrwset(0) {
}
// READ, pretty slim...
void lock_shared()
{
if (m_count.fetch_add(-1, std::memory_order_acquire) < 1)
{
// We need to wait for a writer.
m_rdwset.dec();
}
}
void unlock_shared()
{
if (m_count.fetch_add(1, std::memory_order_release) < 0)
{
// There is a writer
if (m_rdwake.fetch_add(-1, std::memory_order_acq_rel) == 1)
{
// We need to wake the writer up
m_wrwset.inc();
}
}
}
// WRITE, more hefty
void lock()
{
// Acquire exclusive access
m_wrlock.lock();
// we are the only thread in here now.
// Gain write access wrt m_count
long count = m_count.fetch_add(-RWMUTEX_COUNT_MAX,
std::memory_order_acquire);
// count can never be negative.
if (count < RWMUTEX_COUNT_MAX)
{
// We detected readers.
long rdwake = m_rdwake.fetch_add(RWMUTEX_COUNT_MAX - count,
std::memory_order_acquire);
if (rdwake + RWMUTEX_COUNT_MAX - count)
{
// Okay, we need to wait for all of the readers
// The number of readers is actually
// RWMUTEX_COUNT_MAX - count
m_wrwset.dec();
}
}
}
// write unlock
void unlock()
{
// Release write access wrt m_count
long count = m_count.fetch_add(RWMUTEX_COUNT_MAX,
std::memory_order_release);
if (count < 0)
{
// We need to wake -count readers.
m_rdwset.add(-count);
}
// Release exclusive access
m_wrlock.unlock();
}
};
struct ct_simple_stack
{
struct node
{
node* m_next;
unsigned int m_tid;
node(unsigned int tid) : m_tid(tid) {}
};
node* m_head;
ct_simple_stack() : m_head(nullptr) {}
~ct_simple_stack()
{
ct_simple_stack::node* n = flush();
while (n)
{
ct_simple_stack::node* next = n->m_next;
delete n;
n = next;
}
}
void push(node* n)
{
n->m_next = m_head;
m_head = n;
}
node* pop()
{
node* n = m_head;
if (n)
{
m_head = n->m_next;
}
return n;
}
node* flush()
{
node* n = m_head;
m_head = nullptr;
return n;
}
};
struct ct_shared
{
std::atomic<bool> m_run;
// protected by m_std_rwmutex
std::uint64_t m_reads;
std::uint64_t m_writes;
ct_simple_stack m_stack;
ct_simple_stack m_stack_dtor;
#if defined (CT_TEST_FAST_MUTEX)
ct_rwmutex m_std_rwmutex;
#else
std::shared_mutex m_std_rwmutex;
#endif
ct_shared() : m_run(true), m_reads(0), m_writes(0)
{
// prime m_stack
for (unsigned int i = 0; i < NODE_PRIME; ++i)
{
m_stack.push(new ct_simple_stack::node(i));
}
}
};
void ct_thread_reader(ct_shared& shared, std::size_t tidx)
{
std::uint64_t reads = 0;
while (shared.m_run.load(std::memory_order_relaxed))
{
shared.m_std_rwmutex.lock_shared();
ct_simple_stack::node* n = shared.m_stack.m_head;
while (n)
{
ct_simple_stack::node* next = n->m_next;
n = next;
}
std::this_thread::yield();
shared.m_std_rwmutex.unlock_shared();
++reads;
}
shared.m_std_rwmutex.lock();
shared.m_reads += reads;
shared.m_std_rwmutex.unlock();
}
void ct_thread_writer(ct_shared& shared, std::size_t tidx)
{
std::uint64_t writes = 0;
while (shared.m_run.load(std::memory_order_relaxed))
{
shared.m_std_rwmutex.lock();
ct_simple_stack::node* n = shared.m_stack.pop();
shared.m_std_rwmutex.unlock();
std::this_thread::yield();
shared.m_std_rwmutex.lock();
if (n)
{
shared.m_stack.push(n);
}
shared.m_std_rwmutex.unlock();
std::this_thread::yield();
++writes;
}
shared.m_std_rwmutex.lock();
shared.m_writes += writes;
shared.m_std_rwmutex.unlock();
}
int main()
{
ct_shared shared;
std::cout << "Testing: ";
#if defined (CT_TEST_FAST_MUTEX)
std::cout << "Chris M. Thomasson's Experimental Read/Write Mutex\n\n";
#else
std::cout << "std::shared_mutex\n\n";
#endif
std::cout.flush();
{
// Setup our threads
std::size_t cpu_threads_n = std::thread::hardware_concurrency();
std::size_t threads_n = cpu_threads_n * THREADS;
std::vector<std::thread> threads(threads_n);
std::size_t writers = threads_n / 2;
std::size_t readers = threads_n - writers;
std::cout << "___________________________________\n";
std::cout << "cpu_threads_n = " << cpu_threads_n << "\n";
std::cout << "threads_n = " << threads_n << "\n";
std::cout << "writers = " << writers << "\n";
std::cout << "readers = " << readers << "\n";
std::cout << "test duration = " << TEST_DURATION_SEC << "
seconds\n";
std::cout << "___________________________________\n\n\n";
auto time_start = std::chrono::high_resolution_clock::now();
// Create threads
for (std::size_t i = 0; i < threads_n; ++i)
{
if (i < writers)
{
threads[i] = std::thread(ct_thread_writer,
std::ref(shared), i);
}
else
{
threads[i] = std::thread(ct_thread_reader,
std::ref(shared), i);
}
}
// Give the test some time to run...
std::chrono::seconds time_duration(TEST_DURATION_SEC);
std::this_thread::sleep_for(time_duration);
shared.m_run.store(false, std::memory_order_relaxed);
// Join threads
for (std::size_t i = 0; i < threads_n; ++i)
{
threads[i].join();
}
// Grab our timing.
auto time_stop = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> time_diff = time_stop - time_start;
std::uint64_t raw_reads = shared.m_reads;
std::uint64_t raw_writes = shared.m_writes;
std::cout << "___________________________________\n";
std::cout << "Raw Reads: " << raw_reads << "\n";
std::cout << "Raw Writes: " << raw_writes << "\n";
std::uint64_t reads_per_tick = raw_reads / time_diff.count();
std::uint64_t writes_per_tick = raw_writes / time_diff.count();
std::cout << "reads_per_tick = " << reads_per_tick << "\n";
std::cout << "writes_per_tick = " << writes_per_tick << "\n";
std::cout << "Ticks = " << time_diff.count() << "\n";
std::cout << "___________________________________\n\n";
}
std::cout << "\n\nFin!\n";
return 0;
}
___________________________________
:^)