不用空闲链表实现高效率对象池

30 views

Skip to first unread message

Linker M Lin

unread,

Oct 30, 2007, 12:21:04 AM10/30/07

to TopLanguage

// TODO: 还没有测试,没有用CAS实现多线程安全
#include <string.h>
//
// @ author Linker.Lin
//
inline unsigned int FastFindSet64(unsigned long long bits)
{
// if bits==0 return >63 (maybe 68 -_-!)
_asm
{
mov ebx,dword ptr[bits];
mov ecx,dword ptr[bits+4];
bsf eax,ecx;
add eax,32;
bsf eax,ebx;
}
}

// only for single thread
template< typename ObjectType, unsigned int ObjectsNum/* ObjectsNum %
64==0 */ >
class ASMObjectPool
{
public:
ASMObjectPool():_index(0),_objectCount(ObjectsNum)
{
memset(_bitsMap, 1, sizeof(unsigned long long)*(ObjectsNum/
64));
}
ObjectType* AcquireObject()
{
unsigned int oldIndex=_index;
unsigned int bitOffset=FastFindSet64(_bitsMap[_index++]);
_index>=(ObjectsNum/64)?_index=0:NULL;
unsigned int count=0;
while(bitOffset>63)
{
oldIndex=_index;
bitOffset=FastFindSet64(_bitsMap[_index++]);
_index>=(ObjectsNum/64)?_index=0:NULL;
count++;
if(count>=(ObjectsNum/64)) return NULL;
}
_bitsMap[oldIndex] &= ~(_mask << bitOffset);
_objectCount--;
return &_objects[oldIndex*64+bitOffset];

}
void ReleaseObject(ObjectType* pObject)
{
unsigned int ObjOffset = pObject - _objects;
if(ObjOffset >= ObjectsNum) return;
_bitsMap[ObjOffset / 64] |= _mask << (ObjOffset % 64);
_objectCount++;
}
unsigned int Count()
{
return _objectCount;
}
private:
ObjectType _objects[ObjectsNum];
unsigned long long _bitsMap[ObjectsNum/64];
unsigned int _index;
unsigned int _objectCount;
static const unsigned long long _mask = 1;

};

int main(int argx,char* argv[])
{

ASMObjectPool<int 100> intPool;
int* pInt=NULL;
for(unsigned int i=0;i<10;i++)
{
pInt=intPool.AcquireObject();
*pInt=i*10;
intPool.ReleaseObject(pInt);
}

return 0;

}

还没有测试...
从时间复杂度上做简单的分析,在Pool占用<50%的时候,平均每次分配和释放最好是o(1),最差是o(n),
但是,在对象生命平均长度相近的情况下,这个o(1)的可能是非常大的.
这个算法以前应用在路由器(Intel IXP2400)的报文缓冲里面, 效率很高的.
关键就是,对象的寿命长度要相近,这个在报文方面是比较吻合的,还有就是Pool不能太满.

还有,就是这个BSF指令,这条指令是可以在3~10个指令周期快速找到一个长字中从低位起第一个1的.
在Intel的其他架构的处理器里面,以及GPU,DSP里面都有类似的指令.
之所以提供这条指令就是为了在实现滑动窗口,Pool管理,掩码比较的时候提高效率.
实现这条指令的硬件基础是一个循环移位器(RR).
Windows在实现Waitformultiobject的时候也是依赖这样一条指令.

这个Pool每次分配一个对象后,都会把游标移到下一个窗口(64bits),从而保证分配尽量散布在各个窗口,
在Pool不满的情况下,窗口的占用是比较平均的.
在这样的情况下,每次分配的时候,只是O(1)的设置一下当前窗口的标志位,并没有轮询.
这个是非常高效的.
^_^

red...@gmail.com

unread,

Oct 30, 2007, 1:47:12 AM10/30/07

to pon...@googlegroups.com

这个做法有新意, 能分析一下, 这个比起用空闲链表的优势吗 ?