> I think this is because gc deallocates in chunks rather piece by
> piece, perhaps allocates from pool. Also there are no destructors
> calls.
> fastest C++ program on this site uses boost pool which calls
> destructors and can deallocate, therefore, is slower then C variant
> which uses apache's pool, which just deallocate.
> So I have coded simple custom pool that does not keeps track of
> allocated objects and doesn't call destructors and result is
> that it is slightly faster than C ;)
> What is clear is that Java garbage collector cannot be bitten
> in this case if manual deletion of objects is required.
> (as seen here)
> http://shootout.alioth.debian.org/u64/performance.php?test=binarytrees
> Timings for cpp with my custom alloc
> bmaxa@maxa:~/shootout/trees$ time ./binarytreescpp 20
> stretch tree of depth 21 check: -1
> 2097152 trees of depth 4 check: -2097152
> 524288 trees of depth 6 check: -524288
> 131072 trees of depth 8 check: -131072
> 32768 trees of depth 10 check: -32768
> 8192 trees of depth 12 check: -8192
> 2048 trees of depth 14 check: -2048
> 512 trees of depth 16 check: -512
> 128 trees of depth 18 check: -128
> 32 trees of depth 20 check: -32
> long lived tree of depth 20 check: -1
> real 0m4.188s
> user 0m7.036s
> sys 0m0.112s
> bmaxa@maxa:~/shootout/trees$ time taskset -c 0 ./binarytreescpp 20
> stretch tree of depth 21 check: -1
> 2097152 trees of depth 4 check: -2097152
> 524288 trees of depth 6 check: -524288
> 131072 trees of depth 8 check: -131072
> 32768 trees of depth 10 check: -32768
> 8192 trees of depth 12 check: -8192
> 2048 trees of depth 14 check: -2048
> 512 trees of depth 16 check: -512
> 128 trees of depth 18 check: -128
> 32 trees of depth 20 check: -32
> long lived tree of depth 20 check: -1
> real 0m7.077s
> user 0m6.976s
> sys 0m0.084s
> Timings for C with apache pool:
> bmaxa@maxa:~/shootout/trees$ time ./binarytreesc 20
> stretch tree of depth 21 check: -1
> 2097152 trees of depth 4 check: -2097152
> 524288 trees of depth 6 check: -524288
> 131072 trees of depth 8 check: -131072
> 32768 trees of depth 10 check: -32768
> 8192 trees of depth 12 check: -8192
> 2048 trees of depth 14 check: -2048
> 512 trees of depth 16 check: -512
> 128 trees of depth 18 check: -128
> 32 trees of depth 20 check: -32
> long lived tree of depth 20 check: -1
> real 0m4.625s
> user 0m8.085s
> sys 0m0.140s
> bmaxa@maxa:~/shootout/trees$ time taskset -c 0 ./binarytreesc 20
> stretch tree of depth 21 check: -1
> 2097152 trees of depth 4 check: -2097152
> 524288 trees of depth 6 check: -524288
> 131072 trees of depth 8 check: -131072
> 32768 trees of depth 10 check: -32768
> 8192 trees of depth 12 check: -8192
> 2048 trees of depth 14 check: -2048
> 512 trees of depth 16 check: -512
> 128 trees of depth 18 check: -128
> 32 trees of depth 20 check: -32
> long lived tree of depth 20 check: -1
> real 0m8.009s
> user 0m7.888s
> sys 0m0.116s
> code follows if someone is interested:
> /* The Computer Language Benchmarks Game
> * http://shootout.alioth.debian.org/
> *
> * contributed by Jon Harrop
> * modified by Alex Mizrahi
> * modified by Andreas Schäfer
> * very minor omp tweak by The Anh Tran
> * custom pool Branimir Maksimovic
> */
> #include <iostream>
> #include <stdlib.h>
> #include <stdio.h>
> #include <vector>
> #include <new>
> #include <pthread.h>
> #include <boost/pool/object_pool.hpp>
> const size_t LINE_SIZE = 64;
> template <class T>
> class Pool{
> static const unsigned NELEM = 10000*sizeof(T);
> public:
> Pool()
> : pos_(0)
> {
> pool_.push_back(alloc());
> }
> Pool(const Pool&)=delete;
> Pool& operator=(const Pool&)=delete;
> ~Pool()
> {
> for(auto i : pool_)
> {
> free(i);
> }
> }
> template <class ...T1>
> T* allocate(T1... args)
> {
> if(NELEM <= pos_)
> {
> pool_.push_back(alloc());
> pos_=0;
> }
> void* p = &pool_.back()[pos_];
> pos_+=sizeof(T);
> return new(p)T(args...);
> }
> private:
> static char* alloc()
> {
> Inner* next = (Inner*)pthread_getspecific(k.key);
> if(next == nullptr)
> {
> next = (Inner*)::operator new(NELEM);
> next->next = nullptr;
> }
> char* p = (char*)next;
> next = next->next;
> pthread_setspecific(k.key,next);
> return p;
> }
> static void free(void* p)
> {
> Inner* next = (Inner*)pthread_getspecific(k.key);
> Inner* tmp = (Inner*)p;
> tmp->next = next;
> next = tmp;
> pthread_setspecific(k.key,next);
> }
> std::vector<char*> pool_;
> unsigned pos_;
> struct Inner{
> Inner* next;
> };
> static struct Key{
> Key()
> {
> pthread_key_create(&key,destroy);
> }
> ~Key()
> {
> pthread_key_delete(key);
> }
> static void destroy(void* p)
> {
> Inner* next = (Inner*)p;
> while(next)
> {
> Inner* tmp = next->next;
> ::operator delete(next);
> next = tmp;
> }
> }
> pthread_key_t key;
> }k;
> };
> template <class T>
> typename Pool<T>::Key Pool<T>::k;
> struct Node
> {
> Node *l, *r;
> int i;
> Node(int i2) :l(0),r(0), i(i2)
> {}
> Node(Node *l2, int i2, Node *r2) : l(l2), r(r2), i(i2)
> {}
> Node(const Node&)=delete;
> Node& operator=(const Node&)=delete;
> int check() const
> {
> if (l)
> return l->check() + i - r->check();
> else return i;
> }
> };
> typedef boost::object_pool<Node> NodePool;
> Node *make(int i, int d, Pool<Node>& p)
> {
> if (d > 0)
> return p.allocate( make(2*i-1, d-1,p),
> i,
> make(2*i, d-1,p));
> return p.allocate(i);
> }
> Node *make(int i, int d, NodePool& p)
> {
> if (d > 0)
> return p.construct( make(2*i-1, d-1,p),
> i,
> make(2*i, d-1,p));
> return p.construct(i);
> }
> int main(int argc, char *argv[])
> {
> int min_depth = 4;
> int max_depth = std::max(min_depth+2,
> (argc == 2 ? atoi(argv[1]) : 10));
> int stretch_depth = max_depth+1;
> // Alloc then dealloc stretchdepth tree
> {
> Pool<Node> c_pool;
> //NodePool c_pool;
> Node* c (make(0, stretch_depth,c_pool));
> std::cout << "stretch tree of depth " << stretch_depth << "\t "
> << "check: " << c->check() << '\n';
> }
> Pool<Node> long_lived_tree_pool;
> //NodePool long_lived_tree_pool;
> Node* long_lived_tree(make(0, max_depth,long_lived_tree_pool));
> char *outputstr = (char*)malloc(LINE_SIZE * (max_depth +1) * sizeof(char));
> #pragma omp parallel for
> for (int d = min_depth; d <= max_depth; d += 2)
> {
> int iterations = 1 << (max_depth - d + min_depth);
> int c = 0;
> for (int i = 1; i <= iterations; ++i)
> {
> Pool<Node> pool;
> //NodePool pool;
> Node *a(make(i, d,pool)), *b(make(-i, d,pool));
> c += a->check() + b->check();
> }
> sprintf(outputstr + LINE_SIZE * d, "%d\t trees of depth %d\t check: %d\n", (2 * iterations), d, c);
> }
> for (int d = min_depth; d <= max_depth; d += 2)
> printf("%s", outputstr + (d * LINE_SIZE) );
> free(outputstr);
> std::cout << "long lived tree of depth " << max_depth << "\t "
> << "check: " << (long_lived_tree->check()) << '\n';
> return 0;
> }
On Saturday, July 28, 2012 7:20:04 PM UTC-4, Melzzzzz wrote:
> I have played a little tree with benchmark from computer language game
> site:
> http://shootout.alioth.debian.org/u64q/performance.php?test=binarytrees