original post in comp.programming.threads:
http://groups.google.ru/group/comp.programming.threads/browse_frm/thread/7f5f07401d3bbd91
atomic-free spsc-lifo-stack version 2
I've made benchmark against my previous algorithm:
http://groups.google.ru/group/comp.programming.threads/browse_frm/thread/fb8159324d34aeba
And here are the results:
atomic-free spsc-lifo-stack:
1 processor: 44 cycles
dual core: 141 cycles (scaling 0.31)
2 processors: 432 cycles (scaling 0.10)
atomic-free spsc-lifo-stack 2:
1 processor: 31 cycles
dual core: 25 cycles (scaling 1.24)
2 processors: 39 cycles (scaling 0.79)
linear scaling - 2
1 iteration: 2 push operations and 2 pop operations
Here is benchmark code:
stack_t g_stack1;
stack_t g_stack2;
unsigned __stdcall thread_func_stack(void* p)
{
int const n = (int)p; // 0 or 1
int const init_count = 100000;
int const iter_count = 100000000;
if (0 == n)
{
for (int i = 0 ; i != init_count; ++i)
g_stack1.push(new stack_t::node, i);
}
// execution barrier
// start timer
if (n)
{
for (int i = 0; i != iter_count; ++i)
{
int v = 0;
stack_t::node* n = 0;
while (false == g_stack1.pop(n, v))
SwitchToThread();
g_stack2.push(n, v);
}
}
else
{
for (int i = 0; i != iter_count; ++i)
{
int v = 0;
stack_t::node* n = 0;
while (false == g_stack2.pop(n, v))
SwitchToThread();
g_stack1.push(n, v);
}
}
// execution barrier
// end timer
return 0;
}
Here is implementation:
template<typename T>
class spsc_lifo_stack2
{
public:
struct node
{
node* link_;
T data_;
};
spsc_lifo_stack2()
{
head_ = new node();
head_->link_ = 0;
tail_ = head_;
prev_ = 0;
}
~spsc_lifo_stack2()
{
ASSERT(0 == head_->link_);
ASSERT(head_ == tail_);
ASSERT(0 == prev_);
delete head_;
}
void push(node* n, T const& v)
{
n->link_ = 0;
head_->data_ = v;
_ReadWriteBarrier();
head_->link_ = n;
_ReadWriteBarrier();
head_ = n;
}
bool pop(node*& n, T& v)
{
if (node* next = tail_->link_)
{
node* cur = tail_;
node* prev = prev_;
do
{
cur->link_ = prev;
prev = cur;
cur = next;
next = cur->link_;
}
while (next);
tail_ = cur;
prev_ = prev;
}
if (0 == prev_)
return false;
n = prev_;
v = n->data_;
prev_ = n->link_;
return true;
}
private:
char pad1_ [128];
node* head_;
char pad2_ [128];
node* tail_;
node* prev_;
char pad3_ [128];
};
Dmitriy V'jukov