> You make assumptions on my code without reading it.
Here it is again. Now with some POSIX-compatibility. Unfortunately
I'm not familiar how to write inline-assembly for g++. So I've just
written a spendCycles-routine which should spend 8 clock-cycles for
each iteration on my CPU. On other CPUs this might be different. So
mayme someone here could supply some inline-assembly to make the
code of spendCycles timing-equivalent for Li
#if defined(_MSC_VER)
#define _CRT_SECURE_NO_WARNINGS
#include <Windows.h>
#elif defined(__unix__)
#include <semaphore.h>
#endif
#include <iostream>
#include <cstdint>
#include <cstdlib>
#include <vector>
#include <thread>
#include <random>
#include <atomic>
#include <csignal>
#pragma warning(disable: 6031)
#pragma warning(disable: 6387)
#pragma warning(disable: 26495)
using namespace std;
struct Semaphore
{
Semaphore();
void release();
void wait();
private:
#if defined(_MSC_VER)
HANDLE hSem;
#elif defined(__unix__)
sem_t sem;
#endif
};
inline
Semaphore::Semaphore()
#if defined(_MSC_VER)
: hSem( CreateSemaphore( nullptr, 0, 0x7FFFFFFF, nullptr ) )
#endif
{
#if defined(__unix__)
sem_init( &sem, 0, 0 );
#endif
}
inline
void Semaphore::release()
{
#if defined(_MSC_VER)
ReleaseSemaphore( hSem, 1, nullptr );
#elif defined(__unix__)
sem_post( &sem );
#endif
}
inline
void Semaphore::wait()
{
#if defined(_MSC_VER)
WaitForSingleObject( hSem, INFINITE );
#elif defined(__unix__)
sem_wait( &sem );
#endif
}
struct SpinMutex
{
SpinMutex( uint32_t spinCount );
void lock();
void unlock();
uint64_t getAndResetSpinSuceeds();
uint64_t getAndResetSpinFails();
private:
uint32_t spinCount;
atomic<uint32_t> lockCounter;
Semaphore sem;
atomic<uint64_t> spinSucceeds,
spinFails;
uint64_t getAndZero( atomic<uint64_t> &value );
};
SpinMutex::SpinMutex( uint32_t spinCount ) :
spinCount( spinCount ),
lockCounter( 0 ),
spinSucceeds( 0 ),
spinFails( 0 )
{
}
void SpinMutex::lock()
{
for( uint32_t sc = spinCount; sc; --sc )
{
uint32_t cmp = 0;
if( lockCounter.compare_exchange_weak( cmp, 1, memory_order_acquire,
memory_order_relaxed ) )
{
if( --sc ) // only count if it is not the first attempt to lock the mutex
spinSucceeds.fetch_add( 1, memory_order_relaxed );
return;
}
}
if( spinCount ) // only count fails if there's a spin-count
spinFails.fetch_add( 1, memory_order_relaxed );
if( lockCounter.fetch_add( 1, memory_order_acquire ) != 0 )
sem.wait();
}
void SpinMutex::unlock()
{
if( lockCounter.fetch_sub( 1, memory_order_release ) != 1 )
sem.release();
}
inline
uint64_t SpinMutex::getAndResetSpinSuceeds()
{
return getAndZero( spinSucceeds );
}
inline
uint64_t SpinMutex::getAndResetSpinFails()
{
return getAndZero( spinFails );
}
inline
uint64_t SpinMutex::getAndZero( atomic<uint64_t> &value )
{
uint64_t ref = value;
while( !value.compare_exchange_weak( ref, 0 ) );
return ref;
}
void spendCycles( uint64_t cycles );
#if defined(__unix__)
void spendCycles( uint64_t cycles )
{
uint64_t volatile c = cycles;
while( c-- );
}
#endif
atomic<bool> stop;
int main( int argc, char **argv )
{
char const *errStr = "1. number of threads, 2. min non locked cycles,
3. max non locked cycles, 4. min locked cycles, 5. max locked cycles, 6.
spin-count";
if( argc < 7 )
{
cout << errStr << endl;
return EXIT_FAILURE;
}
unsigned nThreads;
unsigned long long minNonLockedCycles, maxNonLockedCycles;
unsigned long long minLockedCycles, maxLockedCycles;
unsigned spinCount;
sscanf( argv[1], "%u", &nThreads );
sscanf( argv[2], "%llu", &minNonLockedCycles );
sscanf( argv[3], "%llu", &maxNonLockedCycles );
sscanf( argv[4], "%llu", &minLockedCycles );
sscanf( argv[5], "%llu", &maxLockedCycles );
sscanf( argv[6], "%u", &spinCount );
if( nThreads == 0 || minNonLockedCycles > maxNonLockedCycles ||
minLockedCycles > maxLockedCycles )
{
cout << errStr << endl;
return EXIT_FAILURE;
}
auto sigHandler = []( int sig ) -> void
{
::stop = true;
signal( SIGINT, SIG_IGN );
};
::stop = false;
signal( SIGINT, sigHandler );
SpinMutex sm( spinCount );
auto thr = [&]()
{
random_device rd;
minstd_rand mr( rd() );
uniform_int_distribution<uint64_t> uidNonLocked( minNonLockedCycles,
maxNonLockedCycles );
uniform_int_distribution<uint64_t> uidLocked( minLockedCycles,
maxLockedCycles );
while( !stop.load( memory_order_relaxed ) )
spendCycles( uidNonLocked( mr ) ),
sm.lock(),
spendCycles( uidLocked( mr ) ),
sm.unlock();
};
vector<thread> threads;
for( unsigned t = 0; t != nThreads; ++t )
threads.emplace_back( thr );
while( !stop.load( memory_order_relaxed ) )
{
this_thread::sleep_for( 1s );
cout << "succeeds: " << sm.getAndResetSpinSuceeds() << endl;
cout << "fails: " << sm.getAndResetSpinFails() << endl;
}
for( thread &t : threads )
t.join();
}