Re: atomically thread-safe static initialization algorithm...

Chris Thomasson

unread,

Nov 4, 2006, 4:09:12 PM11/4/06

to

I am providing some experimental and compliable C++ code that uses my
AppCore library, which can be found and downloaded here:

http://appcore.home.comcast.net/

It's all basically static initialization of POD data-structures which are
accessed via. special smart pointer... My experimental "atomic once"
algorithm fully supports a thread-safety level of strong throughout all of
its operations. I think this particular algorithm will work fine. What say
you?

:^)

Here is my algorithm:

<once-experimental.hpp>

#if ! defined(ONCE_HPP)
#define ONCE_HPP

#include <cassert>
#include <cstdio>
#include <appcore.h>

// User API Decl
namespace atomic {
template<typename T> class once_ptr;

} // namespace atomic

// System API Decl
namespace atomic {
namespace sys {
typedef ac_intword_t refs_t;
template<typename T> struct once_POD;
template<typename T> struct once_def_POD;
static bool once_inc(refs_t*) throw();
static bool once_dec(refs_t*) throw();
static void dbg_allocs_inc() throw();
static void dbg_allocs_dec() throw();

}} // namespace atomic::sys

// System API Def
namespace atomic {
namespace sys {

// Debug counter
static ac_intword_t dbg_allocs = 0;

// Inc the debug counter
void dbg_allocs_inc() throw() {
ac_intword_t allocs = ac_atomic_inc_acquire(&dbg_allocs);
std::printf("atomic::sys::dbg_allocs_inc - %i\n", allocs);
}

// Inc the debug counter
void dbg_allocs_dec() throw() {
ac_intword_t allocs = ac_atomic_dec_acquire(&dbg_allocs);
std::printf("atomic::sys::dbg_allocs_dec - %i\n", allocs);
}

// Inc the refcount if its >= 0.
// returns true if the refcount was inc'd,
// otherwise false
bool once_inc(refs_t *_this) throw() {
refs_t local, cmp;
do {
local = ::ac_mb_load_naked(_this);
if (local < 0) { return false; }
cmp = local;
local = ac_atomic_cas_acquire(_this, local, local + 1);
} while(cmp != local);
std::printf("atomic::sys::once_inc(); - %i\n", local + 1);

return true;
}

// Dec the refcount if its >= 0.
// returns true for the last ref,
// otherwise false
bool once_dec(refs_t *_this) throw() {
refs_t local, cmp, xchg;
do {
local = ::ac_mb_load_naked(_this);
if (local < 0) { return false; }
xchg = (local == 1) ? -1 : local - 1;
cmp = local;
local = ac_atomic_cas_release(_this, local, xchg);
} while(cmp != local);
std::printf("atomic::sys::once_dec(); - %i\n", local - 1);

return (local == 1);
}

// once is a POD
#define ATOMIC_ONCE_SYS_STATICINIT() {0, 0}
template<typename T>
struct once_POD {
typedef T type_t;
typedef once_def_POD<once_POD> define_POD_t;
refs_t m_refs;
type_t *m_state;
};

// define is a POD that holds a once POD
template<typename T>
struct once_def_POD {
typedef typename T::type_t type_t;
static T s_this;

// Acquires a reference and calls ctor for first ref.
// returns pointer to ref,
// otherwise NULL
type_t* acquire() {
type_t *local = 0, *cmp;

do {
if (local) { ac_atomic_dec_release(&s_this.m_refs); }
local = (type_t*)ac_mb_loadptr_depends(&s_this.m_state);
if (! local) {
// hashed_mutex::guard_t lock(&_this);
local = (type_t*)ac_mb_loadptr_depends(&s_this.m_state);
if (! once_inc(&s_this.m_refs)) { return 0; }
if (! local) {
try { local = new type_t; }
catch(...) {
(void)once_dec(&s_this.m_refs);
throw;
}
dbg_allocs_inc();
std::printf("\n(%p)once_def_POD::acquire(); - %p\n",
(void*)this, (void*)local);
ac_mb_storeptr_release(&s_this.m_state, local);
return local;
}
} else if(! once_inc(&s_this.m_refs)) {
return 0;
}

cmp = (type_t*)ac_mb_loadptr_depends(&s_this.m_state);
} while(local != cmp);

return local;
}

// Releases a reference and calls dtor if last ref.
// returns nothing,
// otherwise false
void release() {
type_t *local = (type_t*)ac_mb_loadptr_depends(&s_this.m_state);
if (once_dec(&s_this.m_refs)) {
ac_mb_storeptr_fence(&s_this.m_state, 0);
ac_mb_store_fence(&s_this.m_refs, 0);
if (local) {
delete local;
std::printf("(%p)once_def_POD::release(); - %p\n\n", (void*)this,
(void*)local);
dbg_allocs_dec();
}
}
}

// static POD init
}; template<typename T>
T once_def_POD<T>::s_this = ATOMIC_ONCE_SYS_STATICINIT();

}} // namespace atomic::sys

// User API Def
namespace atomic {

// Holds an acquired reference.
template<typename T>
class once_ptr {
public:
typedef typename sys::once_POD<T>::define_POD_t define_POD_t;

private:
define_POD_t *m_once;
T *m_state;

private:
T* acquire() {
return (m_once) ? m_once->acquire() : 0;
}

void release() {
if (m_once) { assert(m_state); m_once->release(); }
}

public:
once_ptr() throw() : m_once(0), m_state(0) {}

once_ptr(define_POD_t &_once)
: m_once(&_once), m_state(_once.acquire()) {}

~once_ptr() { release(); }

public:
once_ptr(once_ptr const &rhs)
: m_once(rhs.m_once), m_state(rhs.acquire()) {}

once_ptr const& operator =(once_ptr &rhs) {
define_POD_t *old = m_once;
if (old != rhs.m_once) {
define_POD_t *_once = rhs.m_once;
T *_state = rhs.acquire();
release();
m_once = _once;
m_state = _state;
}
return *this;
}

once_ptr const& operator =(define_POD_t &rhs) {
define_POD_t *old = m_once;
if (old != &rhs) {
define_POD_t *_once = &rhs;
T *_state = rhs.acquire();
release();
m_once = _once;
m_state = _state;
}
return *this;
}

public:
T* load() const throw() { return m_state; }

public:
T* operator ->() { return load(); }
T& operator *() { return *load(); }

public:
operator bool() throw() { return (m_state != 0); }
bool operator !() throw() { return (m_state == 0); }
};

} // namespace atomic

#endif

</once-experimental.hpp>

Here is a quick sample usage:

class foo1 {
int m_state;
mutex_t m_mtx;

public:
typedef atomic::once_ptr<foo1> once_ptr_t;

foo1() : m_state(0) {
// we are the only thread; for sure.
std::printf("(%p)foo1::foo1()\n", (void*)this);
}

~foo1() {
// we are the only thread; for sure.
std::printf("(%p)foo1::~foo1()\n", (void*)this);
}

public:
int do_something(int state) {
mutex_t::guard_t lock(m_mtx);
// we are the only thread; for sure.
int old = m_state;
m_state = state;
std::printf("(%p)foo1::do_something(%i) - %i\n", (void*)this, m_state,
old);
return old;
}
};

class foo2 {
int m_state;

public:

foo2(int state) : m_state(state) {
static foo1::once_ptr_t::define_POD_t s_foo1;

foo1::once_ptr_t myfoo1(s_foo1);
if (myfoo1) {
do_something(myfoo1->do_something(m_state));
}

std::printf("(%p)foo2::foo2()\n", (void*)this);
}

~foo2() {
std::printf("(%p)foo2::~foo2()\n", (void*)this);
}

public:
void do_something(int state) {
std::printf("(%p)foo2::do_something(%i) - %i\n", (void*)this, state,
m_state);
m_state = state;
}

void do_something_else(int state) {
static foo1::once_ptr_t::define_POD_t s_foo1;

foo1::once_ptr_t myfoo1(s_foo1);
if (myfoo1) {
do_something(myfoo1->do_something(state));
}

std::printf("(%p)foo2::do_something_else(%i) - %i\n", (void*)this,
state, m_state);
}
};

void some_func(...) {
foo2 foo(1), *pfoo = new foo2(2);
pfoo->do_something(3);
foo.do_something_else(4);
foo.do_something(5);
pfoo->do_something_else(6);
delete pfoo;
}

As you may know by now, I am not a C++ expert; any help/comments would be
greatly appreciated!

;^)

Thank You

Chris Thomasson

unread,

Nov 4, 2006, 4:10:30 PM11/4/06

to

ARGH! sorry about the double post everybody; damn outlook decided to do its
thing again.

;^(...

Chris Thomasson

unread,

Nov 8, 2006, 10:19:38 PM11/8/06

to

Here is a very rough draft of a paper I am creating that describes how my
static initialization algorithms is implemented. The document is incomplete,
however I thought that I should try to gather any comments I ca before I go
ahead and move the paper out of the "rough draft " stage:

http://appcore.home.comcast.net/vzdoc/atomic/static-init/

Any comments?

Chris Thomasson

unread,

Nov 8, 2006, 11:28:48 PM11/8/06

to

> The document is certainly not incomplete,

Ack! that was suppose to read:

"The document is most certainly incomplete"

I pulled a double negative there!

rfis...@gmail.com

unread,

Nov 9, 2006, 5:24:20 AM11/9/06

to

> From: "Chris Thomasson"

>From the abstract and the introduction I'm not exactly sure what
the library does - can it be described in terms of an existing known
problem? Is lib xy a lockfree impl of the singleton pattern,
keywords: lockfree, singleton pattern, doublechecked locking, GOF?
Or does it resolve the c++ static initialization order fiasco?
Or is it a lockfree refcnt pointer?

Confused,
RF

P.S Too many first person singular pronouns throughout [I, me, my,
mine].
We know it's you!

Markus Elfring

unread,

Nov 10, 2006, 3:00:29 PM11/10/06

to

> http://appcore.home.comcast.net/vzdoc/atomic/static-init/
>
> Any comments?

a) I suggest to change the order of your arguments in the introduction.
=> 1. well-known (?) technical problems with (static) initialization in
multi-threaded programming environments
2. The approach for the proposed algorithm and its benefits

b) Would you like to adjust your wording to increase the emphasis on specific
details?
http://en.wikipedia.org/wiki/Grammatical_voice

c) How do you think about my opinion on a few aspects in your section "2.1
Reordering Issues"?
Discussion "assembly in future C standard"
http://groups.google.de/group/comp.lang.c/msg/6f78611d81de498b

d) I read about your "open-source library called AppCore".
Which licence did you choose?
http://en.wikipedia.org/wiki/Free_software_license

e) I guess that some external links or further references will be added later.
Examples:
- "Trip Report: Ad-Hoc Meeting on Threads in C++" by Eric Niebler
http://artima.com/cppsource/threads_meeting.html

- "The Technical Report on C++ Library Extensions (TR1)" by Matt Austern
Comparison between "pthread_once" and " thread_once" (on page 26)
http://devconnections.com/updates/LasVegasFall_05/CPP_Connections/CLT304.pdf

- Class library by Eric Crahen
http://zthread.sourceforge.net/html/classZThread_1_1LocalStaticInstantiation.html

Regards,
Markus

Chris Thomasson

unread,

Nov 11, 2006, 4:11:49 AM11/11/06

to

"Markus Elfring" <Markus....@web.de> wrote in message
news:4rk42uF...@mid.individual.net...

>> http://appcore.home.comcast.net/vzdoc/atomic/static-init/
>>
>> Any comments?
>
> a) I suggest to change the order of your arguments in the introduction.
> => 1. well-known (?) technical problems with (static) initialization in
> multi-threaded programming environments
> 2. The approach for the proposed algorithm and its benefits
>
> b) Would you like to adjust your wording to increase the emphasis on
> specific details?
> http://en.wikipedia.org/wiki/Grammatical_voice

Yes. However, I think I am going to keep the details at a rather low-level
for now. Basically, keep the the wording as technical as and to the point as
possible. I am more interested in polishing off the actual implementation
details of the C/C++ API that the paper will show how to implement. Also, if
you follow the API declared in the paper, you should be able to
cut-and-paste sections of compliable code, which currently consists of
C/C++. Simple examples of how to replace the two main static C++ function
implementations which drive the papers algorithm with pure IA-32 / SPARC
32/64 Assembly Language. That should top things off before I really start
to drill down on the technical details wrt grammar...

;^)

> c) How do you think about my opinion on a few aspects in your section "2.1
> Reordering Issues"?
> Discussion "assembly in future C standard"
> http://groups.google.de/group/comp.lang.c/msg/6f78611d81de498b

IMHO, there is no such thing as an "Optimizing" Assembler. Period... Here is
why, again this is IMHO:

--- Once an Assembler, of all things, trys to move an instruction to another
place is the day that it will never be used again. Well, at least I would
never use it again... Please note that macro assemblers are fine because
they insert explicit blocks of instructions, and you have control over where
everything goes.

--- IMO, a so-called "Optimizing" Assembler sounds suspiciously like a C
compiler. It also sounds like it would suffer from the same issues that we
have to deal with wrt current C/C++ Standard today. Not to mention that
"Optimizing" Assembler sounds suspiciously like it would end up skyrocketing
from low-level nature of Assembly Language into the ozone layer of the
high-level and undefined bliss...

> d) I read about your "open-source library called AppCore".
> Which licence did you choose?
> http://en.wikipedia.org/wiki/Free_software_license

Well, for now I guess you can call it detailed reference material based
solely on example C/Assembly implementation. You can download; compile;
experiment with; and see if you can make some use of its methods. However,
is has a full-blown SMR implementation that IBM has a patent application out
on. So, I need to revisit AppCore; I have let it just sit there for a
while... Humm...

> e) I guess that some external links or further references will be added
> later.
> Examples:
> - "Trip Report: Ad-Hoc Meeting on Threads in C++" by Eric Niebler
> http://artima.com/cppsource/threads_meeting.html
>
> - "The Technical Report on C++ Library Extensions (TR1)" by Matt Austern
> Comparison between "pthread_once" and " thread_once" (on page 26)
>
> http://devconnections.com/updates/LasVegasFall_05/CPP_Connections/CLT304.pdf
>
> - Class library by Eric Crahen
>
> http://zthread.sourceforge.net/html/classZThread_1_1LocalStaticInstantiation.html

Indeed.

Thanks for all of you comments. I will add your suggestion of using an
unsigned data type to rid myself of using 'less/greater and/or than zero
constraints' in my reference counting documentation.

:^)

Chris Thomasson

unread,

Nov 11, 2006, 4:30:39 AM11/11/06

to

<rfis...@gmail.com> wrote in message
news:1163067860.2...@h54g2000cwb.googlegroups.com...

>> From: "Chris Thomasson"
>
>> Here is a very rough draft of a paper I am creating that describes how my
>> static initialization algorithms is implemented. The document is
>> incomplete,
>> however I thought that I should try to gather any comments I ca before I
>> go
>> ahead and move the paper out of the "rough draft " stage:
>>
>> http://appcore.home.comcast.net/vzdoc/atomic/static-init/
>>
>> Any comments?
>
>>From the abstract and the introduction I'm not exactly sure what
> the library does - can it be described in terms of an existing known
> problem? Is lib xy a lockfree impl of the singleton pattern,
> keywords: lockfree, singleton pattern, doublechecked locking, GOF?

[...]

> Or does it resolve the c++ static initialization order fiasco?

It allows threads to hit the POD in any order, at any time, and more
importantly, it knows how and when to create/destroy the static objects that
decide to visit it. For instance, if a user trys to load from a POD that is
NULL, the algorithm will go ahead and try to execute a constructor. If it
determines that the memory the makes up the object is no longer being
referenced, it will atomically flag, destroy, and reset the POD, therefore
concurrent constructor will be called on the next load.

Yes. It makes it strongly thread-safe. The paper will show how you can use a
smart-pointer that makes use of the static interface of a C++ template which
uses atomic-ops/members to bring order to the static initialization C++
multi-threading "fiasco". Its statically initialized POD (e.g., extern "C"
and braces {}) memberatomic-ops/membar c C++ template

> Or is it a lockfree refcnt pointer?

The two main functions make use of strongly thread-safe atomic reference
counting, so yes. A large part of its functionality and flexibility are
provided by the reference counting aspect of my algorithm.

> P.S Too many first person singular pronouns throughout [I, me, my,
> mine].
> We know it's you!

Sorry about that!

:O

Markus Elfring

unread,

Nov 12, 2006, 6:44:11 AM11/12/06

to

> IMHO, there is no such thing as an "Optimizing" Assembler.

I suggest to be more precise about this area.
Background:
- Software optimization resources
http://agner.org/optimize/

- "Real-time debugging 101" by Srinivas Gollakota and Guiseppe Olivadoti
http://audiodesignline.com/howto/188101723

> --- Once an Assembler, of all things, trys to move an instruction to another
> place is the day that it will never be used again.

I would say that this is generally usual.
http://en.wikipedia.org/wiki/Instruction_scheduling

We would like to reject that some translation tools fiddle and shuffle around
with specific opcodes. Which are the operation codes that need special treatment
in the used programming language?

> Please note that macro assemblers are fine because they insert explicit
> blocks of instructions, and you have control over where everything goes.

I guess that special flags or specific pragmas must be used to keep control on
the intended instruction sequence in the source files. Do you use any to achieve
the desired effects?
Some tools can have more knowledge on source code details than an "ordinary"
software developer or programmer.

> --- IMO, a so-called "Optimizing" Assembler sounds suspiciously like a C
> compiler. It also sounds like it would suffer from the same issues that we
> have to deal with wrt current C/C++ Standard today. Not to mention that
> "Optimizing" Assembler sounds suspiciously like it would end up skyrocketing
> from low-level nature of Assembly Language into the ozone layer of the
> high-level and undefined bliss...

Can you imagine more opportunities for code generation and rewriting?
http://en.wikipedia.org/wiki/Compiler_optimization
http://programmersheaven.com/zone5/index.htm

Would you also like to discuss the open issues in the forum "comp.compilers"?

Regards,
Markus

Markus Elfring

unread,

Nov 13, 2006, 4:35:24 PM11/13/06

to

> IMHO, there is no such thing as an "Optimizing" Assembler.

Does an old discussion about the topic "categories of assembly language" show
useful informations?
http://groups.google.de/group/alt.lang.asm/browse_frm/thread/81ec3bc3930ef9d0/70e486fac67c2fc4

> --- IMO, a so-called "Optimizing" Assembler sounds suspiciously like a C
> compiler. It also sounds like it would suffer from the same issues that we
> have to deal with wrt current C/C++ Standard today.

Would you like to share any more experiences about different approaches?
http://en.wikipedia.org/wiki/High-level_assembler
http://webster.cs.ucr.edu/AsmTools/HLA/HLADoc/HLARef/HLARef15.html#1020993

How much are you used to alternative designs from this software area?

Regards,
Markus

Chris Thomasson

unread,

Nov 14, 2006, 8:05:51 PM11/14/06

to

"Markus Elfring" <Markus....@web.de> wrote in message

news:4rs6osF...@mid.individual.net...

>> IMHO, there is no such thing as an "Optimizing" Assembler.

[...]

>> --- IMO, a so-called "Optimizing" Assembler sounds suspiciously like a C
>> compiler. It also sounds like it would suffer from the same issues that
>> we have to deal with wrt current C/C++ Standard today.
>
> Would you like to share any more experiences about different approaches?
> http://en.wikipedia.org/wiki/High-level_assembler
> http://webster.cs.ucr.edu/AsmTools/HLA/HLADoc/HLARef/HLARef15.html#1020993
>
> How much are you used to alternative designs from this software area?

Okay, I trust this guys work:
> http://webster.cs.ucr.edu/AsmTools/HLA/HLADoc/HLARef/HLARef15.html#1020993

So, you can definitely use the guarantees' that this high-level assembler
provides to create a library that passes my papers test... Indeed. I will
augment my papers requirements for its library with something that will
address this issue in more detail.

Anyway, I think that all of this is going to break down to the method that
the compiler vendors use for turning their link-time optimizations'
on/off...

Markus Elfring

unread,

Dec 5, 2006, 1:03:09 PM12/5/06

to

http://groups.google.de/group/muc.lists.netbsd.port.mips/tree/browse_frm/thread/daf32ea476feb5ea/e8ff97bd9ea8ed61

Are the assemblers and compilers for MIPS environments the only tools that
support a pragma or directive "noreorder"?
Do you know more software with similar options?

Regards,
Markus

rand...@earthlink.net

unread,

Dec 5, 2006, 1:56:00 PM12/5/06

to

I'm pretty sure the SPARC tools do instruction reordering, though I
don't know what sort of control they offer on this.
Cheers,
Randy Hyde

Markus Elfring

unread,

Dec 7, 2006, 3:55:52 PM12/7/06

to

> So, you can definitely use the guarantees' that this high-level assembler
> provides to create a library that passes my papers test... Indeed. I will
> augment my papers requirements for its library with something that will
> address this issue in more detail.

Did you notice any corresponding requests and discussions about optimisation
approaches in this software area?
- Optimizing operations of the Sun SPARC assembler
http://compilers.iecc.com/comparch/article/91-11-008
http://compilers.iecc.com/comparch/article/91-10-097

- instruction rescheduler?
http://groups.google.de/group/comp.lang.asm.x86/msg/02e514c9a05c558f

- An Introduction to Sparc Code Optimization
http://www.cs.wisc.edu/~fischer/cs701.f01/asg1.html

- Did anybody ever try to use the directive or pseudo-op ".optim" explicitly?
http://docs.sun.com/app/docs/doc/816-1681/6m83631kv?a=view

- Typed Assembly Language
http://www.cs.cornell.edu/talc/overview.html

- New optimizer merged to trunk
http://www.tortall.net/projects/yasm/wiki/2006/08/20/14.28

Regards,
Markus

Markus Elfring

unread,

Dec 7, 2006, 4:24:04 PM12/7/06

to

> I'm pretty sure the SPARC tools do instruction reordering, though I
> don't know what sort of control they offer on this.

1. Are you interested to get control on such technical details?

2. Is it a topic in any of your lessons?
http://groups.google.de/group/comp.lang.asm.x86/browse_frm/thread/8816e6e2b5b3ff52

3. Would you like to specify concrete programs?
(Are any names used that are different from "as"?)

Regards,
Markus

rand...@earthlink.net

unread,

Dec 7, 2006, 5:15:16 PM12/7/06

to

Markus Elfring wrote:
> > I'm pretty sure the SPARC tools do instruction reordering, though I
> > don't know what sort of control they offer on this.
>
> 1. Are you interested to get control on such technical details?

Sure. As I've mentioned, this appeared in a "roadmap" of the HLA design
at one time. It's not something that I have an immediate interest in,
as there are far more important things to take care of first, but I do
keep my eyes open for people who are attempting this kind of stuff.

BTW, have you checked out Hutch's work in this area (on the MASM
Forum)?

>
> 2. Is it a topic in any of your lessons?
> http://groups.google.de/group/comp.lang.asm.x86/browse_frm/thread/8816e6e2b5b3ff52

At the level of that discussion, no. But I certainly do mention
instruction scheduling and other related matters in AoA/32.

Cheers,
Randy Hyde

Markus Elfring

unread,

Dec 8, 2006, 1:37:14 PM12/8/06

to

> BTW, have you checked out Hutch's work in this area (on the MASM Forum)?

Which topics would you like to recommend for reading?
http://masm32.com/board/index.php?action=profile;u=1
http://masm32.com/board/index.php?board=9.0

Regards,
Markus

rand...@earthlink.net

unread,

Dec 8, 2006, 6:40:57 PM12/8/06

to

Try this one:
http://www.masm32.com/board/index.php?topic=6203.0

I don't know how far Hutch got with this, probably not very far
compared with what you're expecting, but you'll still want to take a
look at it.

Cheers,
Randy Hyde