On Thursday, February 14, 2019 at 2:03:43 AM UTC-5, Chris M. Thomasson wrote:
> Fwiw, here is an older read/write mutex of mine. I created a little
> benchmark for it vs c++17's std::shared_mutex. On MSVC 2017, my
> algorithm beats std::shared_mutex pretty badly. It takes my algorithm
> around 34 seconds to complete. MSVC's std::shared_mutex takes around 127
> seconds. Wow! What a difference.
>
> Here is the code: Still have to try it out on GCC in c++17 mode.
>
> Can anybody run it?
>
>
https://pastebin.com/raw/xCBHY9qd
> __________________________________
> /* Simple, crude read/write mutex test
> by: Chris M. Thomasson
> __________________________________________*/
>
>
>
> #include <thread>
> #include <atomic>
> #include <shared_mutex>
> #include <condition_variable>
> #include <iostream>
> #include <functional>
> #include <cassert>
> #include <cstdlib>
> #include <ctime>
>
>
> #define THREADS 16UL
> #define ITERS 10000000UL
> #define COUNT (THREADS * ITERS)
>
>
> // undefine to test std::shared_mutex
> #define CT_TEST_FAST_MUTEX 1
>
>
> // bare bones mutex/condvar based semaphore
> struct ct_slow_semaphore
> {
> unsigned long m_state;
> std::mutex m_mutex;
> std::condition_variable m_cond;
>
> ct_slow_semaphore(unsigned long state) : m_state(state) {}
>
> void inc()
> {
> {
> std::unique_lock<std::mutex> lock(m_mutex);
> ++m_state;
> }
>
> m_cond.notify_one();
> }
>
> void add(unsigned long addend)
> {
> {
> std::unique_lock<std::mutex> lock(m_mutex);
> m_state += addend;
> }
>
> m_cond.notify_all();
> }
>
> void dec()
> {
> std::unique_lock<std::mutex> lock(m_mutex);
> while (m_state == 0) m_cond.wait(lock);
> --m_state;
> }
> };
>
>
>
>
> // bin-sema
> struct ct_auto_reset_event
> {
> bool m_state;
> std::mutex m_mutex;
> std::condition_variable m_cond;
>
> ct_auto_reset_event() : m_state(false) {}
>
> void signal()
> {
> std::unique_lock<std::mutex> lock(m_mutex);
> m_state = true;
> m_cond.notify_one();
> }
>
> void wait()
> {
> std::unique_lock<std::mutex> lock(m_mutex);
> while (m_state == false) m_cond.wait(lock);
> m_state = false; // auto-reset
> }
> };
>
>
> // just a layer over an auto-reset event
> struct ct_fast_mutex
> {
> std::atomic<unsigned int> m_state;
> ct_auto_reset_event m_waitset;
>
> ct_fast_mutex() : m_state(0) {}
>
> void lock()
> {
> if (m_state.exchange(1, std::memory_order_acquire))
> {
> while (m_state.exchange(2, std::memory_order_acquire))
> {
> m_waitset.wait();
> }
> }
> }
>
> void unlock()
> {
> if (m_state.exchange(0, std::memory_order_release) == 2)
> {
> m_waitset.signal();
> }
> }
> };
>
>
>
> // Chris M. Thomassons Experimental Read/Write Mutex
> // Yeah, it is pretty damn fat wrt the state, however
> // it has some interesting properties...
> // The state can be compressed a bit...
> // btw, it has no loops...
> // Take a look at the lock_shared and unlock_shared functions
>
> #define RWMUTEX_COUNT_MAX LONG_MAX
>
> struct ct_rwmutex
> {
> // shared state
> std::atomic<long> m_wrstate;
> std::atomic<long> m_count;
> std::atomic<long> m_rdwake;
>
> ct_slow_semaphore m_rdwset;
> ct_slow_semaphore m_wrwset;
> ct_fast_mutex m_wrlock;
>
>
> ct_rwmutex() :
> m_wrstate(1),
> m_count(RWMUTEX_COUNT_MAX),
> m_rdwake(0),
> m_rdwset(0),
> m_wrwset(0) {
> }
>
>
> // READ, pretty slim...
> void lock_shared()
> {
> if (m_count.fetch_add(-1, std::memory_order_acquire) < 1)
> {
> m_rdwset.dec();
> }
> }
>
> void unlock_shared()
> {
> if (m_count.fetch_add(1, std::memory_order_release) < 0)
> {
> if (m_rdwake.fetch_add(-1, std::memory_order_acq_rel) == 1)
> {
> m_wrwset.inc();
> }
> }
> }
>
>
> // WRITE, more hefty
> void lock()
> {
> m_wrlock.lock();
>
> long count = m_count.fetch_add(-RWMUTEX_COUNT_MAX,
> std::memory_order_acquire);
>
> if (count < RWMUTEX_COUNT_MAX)
> {
> long rdwake = m_rdwake.fetch_add(RWMUTEX_COUNT_MAX - count,
> std::memory_order_acquire);
>
> if (rdwake + RWMUTEX_COUNT_MAX - count)
> {
> m_wrwset.dec();
> }
> }
> }
>
> // write unlock
> void unlock()
> {
> long count = m_count.fetch_add(RWMUTEX_COUNT_MAX,
> std::memory_order_release);
>
> if (count < 0)
> {
> m_rdwset.add(-count);
> }
>
> m_wrlock.unlock();
> }
> };
>
>
> struct ct_shared
> {
> std::atomic<unsigned long> m_state;
>
> #if defined (CT_TEST_FAST_MUTEX)
> ct_rwmutex m_std_rwmutex;
> #else
> std::shared_mutex m_std_rwmutex;
> #endif
>
> ct_shared() : m_state(0) {}
> };
>
>
> void ct_thread(ct_shared& shared, std::size_t index)
> {
> for (unsigned int i = 0; i < ITERS; ++i)
> {
>
> shared.m_std_rwmutex.lock();
> if (i % 256 == 0) std::this_thread::yield();
> shared.m_state += 1;
> shared.m_std_rwmutex.unlock();
>
>
> shared.m_std_rwmutex.lock_shared();
> if (i % 512 == 0) std::this_thread::yield();
> //shared.m_state += 1;
> shared.m_std_rwmutex.unlock_shared();
>
> }
> }
>
>
> int main()
> {
> ct_shared shared;
>
> {
> std::thread threads[THREADS];
>
> std::clock_t start = std::clock();
>
> for (std::size_t i = 0; i < THREADS; ++i)
> {
> threads[i] = std::thread(ct_thread, std::ref(shared), i);
> }
>
> for (std::size_t i = 0; i < THREADS; ++i)
> {
> threads[i].join();
> }
>
> std::clock_t diff = clock() - start;
>
> unsigned long msec = diff * 1000 / CLOCKS_PER_SEC;
>
> std::cout << "msec = " << msec << "\n";
> }
>
> std::cout << "shared.m_state = " << shared.m_state << "\n";
> std::cout << "\n\nFin!\n";
>
> assert(shared.m_state == COUNT);
>
> return 0;
> }
> __________________________________
>
> I will explain the algorithm in further detail when I get some more
> time. Probably tomorrow.
Tested your latest code on 2018 i5 Mac Book Pro and 3 different compilers:
*************************************
* GCC -Ofast -march=native -lstdc++ *
*************************************
Testing: Chris M. Thomasson's Experimental Read/Write Mutex
msec = 46171
shared.m_state = 160000000
Fin!
******************************************
* Apple CLANG -Ofast -march=native -lc++ *
******************************************
Testing: Chris M. Thomasson's Experimental Read/Write Mutex
msec = 40027
shared.m_state = 160000000
Fin!
*****************************************
* LLVM CLANG -Ofast -march=native -lc++ *
*****************************************
Testing: Chris M. Thomasson's Experimental Read/Write Mutex
msec = 37518
shared.m_state = 160000000
Fin!
VS SHARED MUTEX:
Ran for 15 minutes then i stopped it. It's not even close.