> MSVC's C++20 semaphore class spins to acquire the semaphore;
> g++'s / libstdc++'s doesn't spin. I wote a little test program
> to rest how long it takes to flip to another thread and back
> with a binary_semaphore (always a counting_semaphore<1>).
> Here it is:
>
> #if defined(_WIN32)
> #include <Windows.h>
> #elif defined(__unix__)
> #include <sys/resource.h>
> #endif
> #include <iostream>
> #include <thread>
> #include <semaphore>
> #include <atomic>
> #include <utility>
> #include <vector>
>
> using namespace std;
>
> int main()
> {
> binary_semaphore
> semA( 1 ),
> semB( 0 );
> auto getTimes = []() -> pair<int64_t, int64_t>
> {
> #if defined(_WIN32)
> FILETIME ftDummy, ftKernel, ftUser;
> GetThreadTimes( GetCurrentThread(), &ftDummy, &ftDummy, &ftKernel,
> &ftUser );
> auto compose = []( FILETIME &ft ) { return
> ((uint64_t)ft.dwHighDateTime << 32 | ft.dwLowDateTime) * 100; };
> return { compose( ftKernel ), compose( ftUser ) };
> #elif defined(__unix__)
> rusage ru;
> getrusage( RUSAGE_THREAD, &ru );
> auto compose = []( timeval &tv ) -> int64_t { return
> (uint64_t)tv.tv_sec * 1'000'000'000u + (uint32_t)tv.tv_usec * 1'000u; };
> return { compose( ru.ru_stime ), compose( ru.ru_utime ) };
> #endif
> };
> atomic_int64_t
> aFlips( 0 ),
> aKernel( 0 ),
> aUser( 0 );
> atomic_bool stop( false );
> auto thr = [&]( binary_semaphore &semMe, binary_semaphore &semYou )
> {
> auto tBegin = getTimes();
> uint64_t f = 0;
> for( ; !stop.load( memory_order_relaxed ); ++f )
> semMe.acquire(),
> semYou.release();
> auto tEnd = getTimes();
> aFlips += f;
> aKernel += tEnd.first - tBegin.first;
> aUser += tEnd.second + tBegin.second;
> };
> vector<jthread> threads;
> threads.emplace_back( thr, ref( semA ), ref( semB ) );
> threads.emplace_back( thr, ref( semB ), ref( semA ) );
> this_thread::sleep_for( 1s );
> stop = true;
> threads.resize( 0 );
> double
> kernel = (double)aKernel,
> user = (double)aUser,
> total = kernel + user,
> flips = (double)aFlips;
> cout << user / total * 100.0 << "%" << endl;
> cout << total / flips << "ns" << endl;
> }
>
> Results unter Windows 11 on a AMD Ryzen 7950X (Zen4):
> 100%
> 89.3523ns
> So 100% of the CPU time is spent in user space and a flip back
> and forth takes 90ns.
> Results unter Ubuntu 20.04 on a AMD Threadripper 3990X (Zen2):
> 6.56209%
> 476.213ns
> So under Linux most of the time is spent in kernel space and I think
> about 0.5us, thats about 2000 clock cycles with full boost, isn't bad
> for an explicit kernel call.
> I think this userland spinning isn't really necessary because you
> normally a usual mutex and a condition_variable fits for everything
> you need. An explicit semaphore is rather needed if you build your
> own synchronization primitves on top of the semaphore. And if you do
> that you usually have your own spinning with that and you don't need
> further spinning if you want go into kernel mode. But this additional
> spinning actually doesn't really hurt since the kernel call itself
> costs a lot more.
>
maxa@Branimirs-MacBook-Air News % g++ -O2 run.cpp -o run -std=c++20
run.cpp:37:2: warning: non-void lambda does not return a value [-Wreturn-type]
};
^
1 warning generated.
bmaxa@Branimirs-MacBook-Air News % ./run
libc++abi: terminating
zsh: abort ./run
--
7-77-777, Evil Sinner!
https://www.linkedin.com/in/branimir-maksimovic-6762bbaa/