I modified the implementation a little bit:
1. Create the same amount of threads to the number of physical threads
local hardware actually has. This makes the program run much faster on
my dual core 4-thread CPU.
2. Measure the time elapsed to calculate how many bits are generated per
second.
______________________________________________________________________
#include <iostream>
#include <thread>
#include <atomic>
#include <time.h>
#ifdef __MACH__
#include <sys/time.h>
/* clock_gettime is not implemented on OSX
http://stackoverflow.com/questions/5167269/clock-gettime-alternative-in-mac-os-x
*/
#define CLOCK_REALTIME 0
int clock_gettime(int /*clk_id*/, timespec* t)
{
timeval now;
int rv = gettimeofday(&now, NULL);
if (rv)
return rv;
t->tv_sec = now.tv_sec;
t->tv_nsec = now.tv_usec * 1000;
return 0;
}
#endif
unsigned int threads_count;
static std::atomic<unsigned int> g_racer(0);
void racer(unsigned int n)
{
for (unsigned int i = 0; i < n; ++i)
{
// Race infested fetch-and-add op
unsigned int r = g_racer.load(std::memory_order_relaxed);
r = r + 1;
g_racer.store(r, std::memory_order_relaxed);
}
}
unsigned int get_rand_bit(unsigned int n)
{
std::thread *t = new std::thread[threads_count];
for (unsigned int i = 0; i < threads_count; ++i)
t[i] = std::thread(racer, n);
for (unsigned int i = 0; i < threads_count; ++i)
t[i].join();
delete[] t;
return (g_racer.load(std::memory_order_relaxed)) & 1;
}
void display_rand_bits(unsigned int n)
{
// std::clock() gives CPU time, not wall clock time
timespec start, finish;
std::cout << n << " Race-Condition Bits" << std::endl;
std::cout << "____________________________________________" <<
std::endl;
clock_gettime(CLOCK_REALTIME, &start);
for (unsigned int i = 0; i < n; ++i)
{
std::cout << get_rand_bit(1000000);
if (! ((i + 1) % 64)) std::cout << std::endl;
std::this_thread::yield();
}
clock_gettime(CLOCK_REALTIME, &finish);
std::cout << "____________________________________________" <<
std::endl;
double elapsed = (finish.tv_sec - start.tv_sec);
elapsed += (finish.tv_nsec - start.tv_nsec) / 1000000000.0;
std::cout << "averagely " << n * 1.0 / elapsed << " bits per
second" << std::endl;
}
int main()
{
threads_count = std::thread::hardware_concurrency();
std::cout << "Using " << threads_count << " threads" << std::endl;
display_rand_bits(1024);
return 0;
}
____________________________________________________________
My result:
Using 4 threads
1024 Race-Condition Bits
____________________________________________
0101010000111100110100001000111001110100111000100011110001000110
1100111100001111000100011011110100011100000101011100010111010011
1011010001100110111110111110111111000101011000000110101000010010
0110010000011000101000011010111100111100010010010011111110010110
0001010001110101101001000110101001010011100011100001011111010000
1110000010110011100000100010000111101111011001101011000101101011
0110110011111101100000001111010001010101100001101101000111010011
0100110110011110111101001100000000011101011110101011110001001000
1001101111111100010111000101101010100101011101111100100110100111
0110001101000101001101100001110110001011000010110000010100001011
0010010101001011101101101000100011110011000011110000011100111001
1000111110000010110011110011010111100101110110011011001110100101
1111001011001000110111010101000110000001100111110101100000111010
1001101101001001001111101100010000000001000011100110100110100000
0100100110110000010100010001000101001111111001010110110111000110
0111110110110101111101000101000001101010111111000000000001101101
____________________________________________
averagely 153.529 bits per second