http://appcore.home.comcast.net/
It's all basically static initialization of POD data-structures which are
accessed via. special smart pointer... My experimental "atomic once"
algorithm fully supports a thread-safety level of strong throughout all of
its operations. I think this particular algorithm will work fine. What say
you?
:^)
Here is my algorithm:
<once-experimental.hpp>
#if ! defined(ONCE_HPP)
#define ONCE_HPP
#include <cassert>
#include <cstdio>
#include <appcore.h>
// User API Decl
namespace atomic {
template<typename T> class once_ptr;
} // namespace atomic
// System API Decl
namespace atomic {
namespace sys {
typedef ac_intword_t refs_t;
template<typename T> struct once_POD;
template<typename T> struct once_def_POD;
static bool once_inc(refs_t*) throw();
static bool once_dec(refs_t*) throw();
static void dbg_allocs_inc() throw();
static void dbg_allocs_dec() throw();
}} // namespace atomic::sys
// System API Def
namespace atomic {
namespace sys {
// Debug counter
static ac_intword_t dbg_allocs = 0;
// Inc the debug counter
void dbg_allocs_inc() throw() {
ac_intword_t allocs = ac_atomic_inc_acquire(&dbg_allocs);
std::printf("atomic::sys::dbg_allocs_inc - %i\n", allocs);
}
// Inc the debug counter
void dbg_allocs_dec() throw() {
ac_intword_t allocs = ac_atomic_dec_acquire(&dbg_allocs);
std::printf("atomic::sys::dbg_allocs_dec - %i\n", allocs);
}
// Inc the refcount if its >= 0.
// returns true if the refcount was inc'd,
// otherwise false
bool once_inc(refs_t *_this) throw() {
refs_t local, cmp;
do {
local = ::ac_mb_load_naked(_this);
if (local < 0) { return false; }
cmp = local;
local = ac_atomic_cas_acquire(_this, local, local + 1);
} while(cmp != local);
std::printf("atomic::sys::once_inc(); - %i\n", local + 1);
return true;
}
// Dec the refcount if its >= 0.
// returns true for the last ref,
// otherwise false
bool once_dec(refs_t *_this) throw() {
refs_t local, cmp, xchg;
do {
local = ::ac_mb_load_naked(_this);
if (local < 0) { return false; }
xchg = (local == 1) ? -1 : local - 1;
cmp = local;
local = ac_atomic_cas_release(_this, local, xchg);
} while(cmp != local);
std::printf("atomic::sys::once_dec(); - %i\n", local - 1);
return (local == 1);
}
// once is a POD
#define ATOMIC_ONCE_SYS_STATICINIT() {0, 0}
template<typename T>
struct once_POD {
typedef T type_t;
typedef once_def_POD<once_POD> define_POD_t;
refs_t m_refs;
type_t *m_state;
};
// define is a POD that holds a once POD
template<typename T>
struct once_def_POD {
typedef typename T::type_t type_t;
static T s_this;
// Acquires a reference and calls ctor for first ref.
// returns pointer to ref,
// otherwise NULL
type_t* acquire() {
type_t *local = 0, *cmp;
do {
if (local) { ac_atomic_dec_release(&s_this.m_refs); }
local = (type_t*)ac_mb_loadptr_depends(&s_this.m_state);
if (! local) {
// hashed_mutex::guard_t lock(&_this);
local = (type_t*)ac_mb_loadptr_depends(&s_this.m_state);
if (! once_inc(&s_this.m_refs)) { return 0; }
if (! local) {
try { local = new type_t; }
catch(...) {
(void)once_dec(&s_this.m_refs);
throw;
}
dbg_allocs_inc();
std::printf("\n(%p)once_def_POD::acquire(); - %p\n",
(void*)this, (void*)local);
ac_mb_storeptr_release(&s_this.m_state, local);
return local;
}
} else if(! once_inc(&s_this.m_refs)) {
return 0;
}
cmp = (type_t*)ac_mb_loadptr_depends(&s_this.m_state);
} while(local != cmp);
return local;
}
// Releases a reference and calls dtor if last ref.
// returns nothing,
// otherwise false
void release() {
type_t *local = (type_t*)ac_mb_loadptr_depends(&s_this.m_state);
if (once_dec(&s_this.m_refs)) {
ac_mb_storeptr_fence(&s_this.m_state, 0);
ac_mb_store_fence(&s_this.m_refs, 0);
if (local) {
delete local;
std::printf("(%p)once_def_POD::release(); - %p\n\n", (void*)this,
(void*)local);
dbg_allocs_dec();
}
}
}
// static POD init
}; template<typename T>
T once_def_POD<T>::s_this = ATOMIC_ONCE_SYS_STATICINIT();
}} // namespace atomic::sys
// User API Def
namespace atomic {
// Holds an acquired reference.
template<typename T>
class once_ptr {
public:
typedef typename sys::once_POD<T>::define_POD_t define_POD_t;
private:
define_POD_t *m_once;
T *m_state;
private:
T* acquire() {
return (m_once) ? m_once->acquire() : 0;
}
void release() {
if (m_once) { assert(m_state); m_once->release(); }
}
public:
once_ptr() throw() : m_once(0), m_state(0) {}
once_ptr(define_POD_t &_once)
: m_once(&_once), m_state(_once.acquire()) {}
~once_ptr() { release(); }
public:
once_ptr(once_ptr const &rhs)
: m_once(rhs.m_once), m_state(rhs.acquire()) {}
once_ptr const& operator =(once_ptr &rhs) {
define_POD_t *old = m_once;
if (old != rhs.m_once) {
define_POD_t *_once = rhs.m_once;
T *_state = rhs.acquire();
release();
m_once = _once;
m_state = _state;
}
return *this;
}
once_ptr const& operator =(define_POD_t &rhs) {
define_POD_t *old = m_once;
if (old != &rhs) {
define_POD_t *_once = &rhs;
T *_state = rhs.acquire();
release();
m_once = _once;
m_state = _state;
}
return *this;
}
public:
T* load() const throw() { return m_state; }
public:
T* operator ->() { return load(); }
T& operator *() { return *load(); }
public:
operator bool() throw() { return (m_state != 0); }
bool operator !() throw() { return (m_state == 0); }
};
} // namespace atomic
#endif
</once-experimental.hpp>
Here is a quick sample usage:
class foo1 {
int m_state;
mutex_t m_mtx;
public:
typedef atomic::once_ptr<foo1> once_ptr_t;
foo1() : m_state(0) {
// we are the only thread; for sure.
std::printf("(%p)foo1::foo1()\n", (void*)this);
}
~foo1() {
// we are the only thread; for sure.
std::printf("(%p)foo1::~foo1()\n", (void*)this);
}
public:
int do_something(int state) {
mutex_t::guard_t lock(m_mtx);
// we are the only thread; for sure.
int old = m_state;
m_state = state;
std::printf("(%p)foo1::do_something(%i) - %i\n", (void*)this, m_state,
old);
return old;
}
};
class foo2 {
int m_state;
public:
foo2(int state) : m_state(state) {
static foo1::once_ptr_t::define_POD_t s_foo1;
foo1::once_ptr_t myfoo1(s_foo1);
if (myfoo1) {
do_something(myfoo1->do_something(m_state));
}
std::printf("(%p)foo2::foo2()\n", (void*)this);
}
~foo2() {
std::printf("(%p)foo2::~foo2()\n", (void*)this);
}
public:
void do_something(int state) {
std::printf("(%p)foo2::do_something(%i) - %i\n", (void*)this, state,
m_state);
m_state = state;
}
void do_something_else(int state) {
static foo1::once_ptr_t::define_POD_t s_foo1;
foo1::once_ptr_t myfoo1(s_foo1);
if (myfoo1) {
do_something(myfoo1->do_something(state));
}
std::printf("(%p)foo2::do_something_else(%i) - %i\n", (void*)this,
state, m_state);
}
};
void some_func(...) {
foo2 foo(1), *pfoo = new foo2(2);
pfoo->do_something(3);
foo.do_something_else(4);
foo.do_something(5);
pfoo->do_something_else(6);
delete pfoo;
}
As you may know by now, I am not a C++ expert; any help/comments would be
greatly appreciated!
;^)
Thank You
;^(...
http://appcore.home.comcast.net/vzdoc/atomic/static-init/
Any comments?
Ack! that was suppose to read:
"The document is most certainly incomplete"
I pulled a double negative there!
>From the abstract and the introduction I'm not exactly sure what
the library does - can it be described in terms of an existing known
problem? Is lib xy a lockfree impl of the singleton pattern,
keywords: lockfree, singleton pattern, doublechecked locking, GOF?
Or does it resolve the c++ static initialization order fiasco?
Or is it a lockfree refcnt pointer?
Confused,
RF
P.S Too many first person singular pronouns throughout [I, me, my,
mine].
We know it's you!
a) I suggest to change the order of your arguments in the introduction.
=> 1. well-known (?) technical problems with (static) initialization in
multi-threaded programming environments
2. The approach for the proposed algorithm and its benefits
b) Would you like to adjust your wording to increase the emphasis on specific
details?
http://en.wikipedia.org/wiki/Grammatical_voice
c) How do you think about my opinion on a few aspects in your section "2.1
Reordering Issues"?
Discussion "assembly in future C standard"
http://groups.google.de/group/comp.lang.c/msg/6f78611d81de498b
d) I read about your "open-source library called AppCore".
Which licence did you choose?
http://en.wikipedia.org/wiki/Free_software_license
e) I guess that some external links or further references will be added later.
Examples:
- "Trip Report: Ad-Hoc Meeting on Threads in C++" by Eric Niebler
http://artima.com/cppsource/threads_meeting.html
- "The Technical Report on C++ Library Extensions (TR1)" by Matt Austern
Comparison between "pthread_once" and " thread_once" (on page 26)
http://devconnections.com/updates/LasVegasFall_05/CPP_Connections/CLT304.pdf
- Class library by Eric Crahen
http://zthread.sourceforge.net/html/classZThread_1_1LocalStaticInstantiation.html
Regards,
Markus
Yes. However, I think I am going to keep the details at a rather low-level
for now. Basically, keep the the wording as technical as and to the point as
possible. I am more interested in polishing off the actual implementation
details of the C/C++ API that the paper will show how to implement. Also, if
you follow the API declared in the paper, you should be able to
cut-and-paste sections of compliable code, which currently consists of
C/C++. Simple examples of how to replace the two main static C++ function
implementations which drive the papers algorithm with pure IA-32 / SPARC
32/64 Assembly Language. That should top things off before I really start
to drill down on the technical details wrt grammar...
;^)
> c) How do you think about my opinion on a few aspects in your section "2.1
> Reordering Issues"?
> Discussion "assembly in future C standard"
> http://groups.google.de/group/comp.lang.c/msg/6f78611d81de498b
IMHO, there is no such thing as an "Optimizing" Assembler. Period... Here is
why, again this is IMHO:
--- Once an Assembler, of all things, trys to move an instruction to another
place is the day that it will never be used again. Well, at least I would
never use it again... Please note that macro assemblers are fine because
they insert explicit blocks of instructions, and you have control over where
everything goes.
--- IMO, a so-called "Optimizing" Assembler sounds suspiciously like a C
compiler. It also sounds like it would suffer from the same issues that we
have to deal with wrt current C/C++ Standard today. Not to mention that
"Optimizing" Assembler sounds suspiciously like it would end up skyrocketing
from low-level nature of Assembly Language into the ozone layer of the
high-level and undefined bliss...
> d) I read about your "open-source library called AppCore".
> Which licence did you choose?
> http://en.wikipedia.org/wiki/Free_software_license
Well, for now I guess you can call it detailed reference material based
solely on example C/Assembly implementation. You can download; compile;
experiment with; and see if you can make some use of its methods. However,
is has a full-blown SMR implementation that IBM has a patent application out
on. So, I need to revisit AppCore; I have let it just sit there for a
while... Humm...
> e) I guess that some external links or further references will be added
> later.
> Examples:
> - "Trip Report: Ad-Hoc Meeting on Threads in C++" by Eric Niebler
> http://artima.com/cppsource/threads_meeting.html
>
> - "The Technical Report on C++ Library Extensions (TR1)" by Matt Austern
> Comparison between "pthread_once" and " thread_once" (on page 26)
>
> http://devconnections.com/updates/LasVegasFall_05/CPP_Connections/CLT304.pdf
>
> - Class library by Eric Crahen
>
> http://zthread.sourceforge.net/html/classZThread_1_1LocalStaticInstantiation.html
Indeed.
Thanks for all of you comments. I will add your suggestion of using an
unsigned data type to rid myself of using 'less/greater and/or than zero
constraints' in my reference counting documentation.
:^)
> Or does it resolve the c++ static initialization order fiasco?
It allows threads to hit the POD in any order, at any time, and more
importantly, it knows how and when to create/destroy the static objects that
decide to visit it. For instance, if a user trys to load from a POD that is
NULL, the algorithm will go ahead and try to execute a constructor. If it
determines that the memory the makes up the object is no longer being
referenced, it will atomically flag, destroy, and reset the POD, therefore
concurrent constructor will be called on the next load.
Yes. It makes it strongly thread-safe. The paper will show how you can use a
smart-pointer that makes use of the static interface of a C++ template which
uses atomic-ops/members to bring order to the static initialization C++
multi-threading "fiasco". Its statically initialized POD (e.g., extern "C"
and braces {}) memberatomic-ops/membar c C++ template
> Or is it a lockfree refcnt pointer?
The two main functions make use of strongly thread-safe atomic reference
counting, so yes. A large part of its functionality and flexibility are
provided by the reference counting aspect of my algorithm.
> P.S Too many first person singular pronouns throughout [I, me, my,
> mine].
> We know it's you!
Sorry about that!
:O
I suggest to be more precise about this area.
Background:
- Software optimization resources
http://agner.org/optimize/
- "Real-time debugging 101" by Srinivas Gollakota and Guiseppe Olivadoti
http://audiodesignline.com/howto/188101723
> --- Once an Assembler, of all things, trys to move an instruction to another
> place is the day that it will never be used again.
I would say that this is generally usual.
http://en.wikipedia.org/wiki/Instruction_scheduling
We would like to reject that some translation tools fiddle and shuffle around
with specific opcodes. Which are the operation codes that need special treatment
in the used programming language?
> Please note that macro assemblers are fine because they insert explicit
> blocks of instructions, and you have control over where everything goes.
I guess that special flags or specific pragmas must be used to keep control on
the intended instruction sequence in the source files. Do you use any to achieve
the desired effects?
Some tools can have more knowledge on source code details than an "ordinary"
software developer or programmer.
> --- IMO, a so-called "Optimizing" Assembler sounds suspiciously like a C
> compiler. It also sounds like it would suffer from the same issues that we
> have to deal with wrt current C/C++ Standard today. Not to mention that
> "Optimizing" Assembler sounds suspiciously like it would end up skyrocketing
> from low-level nature of Assembly Language into the ozone layer of the
> high-level and undefined bliss...
Can you imagine more opportunities for code generation and rewriting?
http://en.wikipedia.org/wiki/Compiler_optimization
http://programmersheaven.com/zone5/index.htm
Would you also like to discuss the open issues in the forum "comp.compilers"?
Regards,
Markus
Does an old discussion about the topic "categories of assembly language" show
useful informations?
http://groups.google.de/group/alt.lang.asm/browse_frm/thread/81ec3bc3930ef9d0/70e486fac67c2fc4
> --- IMO, a so-called "Optimizing" Assembler sounds suspiciously like a C
> compiler. It also sounds like it would suffer from the same issues that we
> have to deal with wrt current C/C++ Standard today.
Would you like to share any more experiences about different approaches?
http://en.wikipedia.org/wiki/High-level_assembler
http://webster.cs.ucr.edu/AsmTools/HLA/HLADoc/HLARef/HLARef15.html#1020993
How much are you used to alternative designs from this software area?
Regards,
Markus
Okay, I trust this guys work:
> http://webster.cs.ucr.edu/AsmTools/HLA/HLADoc/HLARef/HLARef15.html#1020993
So, you can definitely use the guarantees' that this high-level assembler
provides to create a library that passes my papers test... Indeed. I will
augment my papers requirements for its library with something that will
address this issue in more detail.
Anyway, I think that all of this is going to break down to the method that
the compiler vendors use for turning their link-time optimizations'
on/off...
Are the assemblers and compilers for MIPS environments the only tools that
support a pragma or directive "noreorder"?
Do you know more software with similar options?
Regards,
Markus
I'm pretty sure the SPARC tools do instruction reordering, though I
don't know what sort of control they offer on this.
Cheers,
Randy Hyde
Did you notice any corresponding requests and discussions about optimisation
approaches in this software area?
- Optimizing operations of the Sun SPARC assembler
http://compilers.iecc.com/comparch/article/91-11-008
http://compilers.iecc.com/comparch/article/91-10-097
- instruction rescheduler?
http://groups.google.de/group/comp.lang.asm.x86/msg/02e514c9a05c558f
- An Introduction to Sparc Code Optimization
http://www.cs.wisc.edu/~fischer/cs701.f01/asg1.html
- Did anybody ever try to use the directive or pseudo-op ".optim" explicitly?
http://docs.sun.com/app/docs/doc/816-1681/6m83631kv?a=view
- Typed Assembly Language
http://www.cs.cornell.edu/talc/overview.html
- New optimizer merged to trunk
http://www.tortall.net/projects/yasm/wiki/2006/08/20/14.28
Regards,
Markus
1. Are you interested to get control on such technical details?
2. Is it a topic in any of your lessons?
http://groups.google.de/group/comp.lang.asm.x86/browse_frm/thread/8816e6e2b5b3ff52
3. Would you like to specify concrete programs?
(Are any names used that are different from "as"?)
Regards,
Markus
Sure. As I've mentioned, this appeared in a "roadmap" of the HLA design
at one time. It's not something that I have an immediate interest in,
as there are far more important things to take care of first, but I do
keep my eyes open for people who are attempting this kind of stuff.
BTW, have you checked out Hutch's work in this area (on the MASM
Forum)?
>
> 2. Is it a topic in any of your lessons?
> http://groups.google.de/group/comp.lang.asm.x86/browse_frm/thread/8816e6e2b5b3ff52
At the level of that discussion, no. But I certainly do mention
instruction scheduling and other related matters in AoA/32.
Cheers,
Randy Hyde
Which topics would you like to recommend for reading?
http://masm32.com/board/index.php?action=profile;u=1
http://masm32.com/board/index.php?board=9.0
Regards,
Markus
Try this one:
http://www.masm32.com/board/index.php?topic=6203.0
I don't know how far Hutch got with this, probably not very far
compared with what you're expecting, but you'll still want to take a
look at it.
Cheers,
Randy Hyde