A broken mark-region prototype

155 views
Skip to first unread message

Jon Harrop

unread,
Nov 27, 2012, 8:39:08 AM11/27/12
to pragmatic-functional...@googlegroups.com

Sadly, the version of my mark-region prototype that I kept does not work and I haven't found the time to fix it so I'd like to archive it here for future reference and as a guide to anyone interested:

 

#include <iostream>

#include <stdio.h>

#include <tchar.h>

#include <windows.h>

 

using namespace std;

 

class Cons {

public:

       char x;

       char y;

       Cons *t;

       Cons() { }

       Cons(char x, char y, Cons *t) : x(x), y(x), t(t) { }

};

 

const int size = 16000000;

 

typedef unsigned char byte;

 

// One 16kB region

const int regionSize = 14;

int firstFree = 0;

int bitVectorSize = 1 << (regionSize-6);

byte *marked = (byte *)_aligned_malloc(bitVectorSize, 64);

byte *allocated = (byte *)_aligned_malloc(bitVectorSize, 64);

Cons *region = (Cons *)_aligned_malloc(1 << regionSize, 1 << regionSize);

 

Cons **shadowStackStart = (Cons **)malloc(size * sizeof(Cons *));

Cons **shadowStackPtr = shadowStackStart;

 

byte *firstUnsetBit = new byte[256];

 

int findFirstUnsetBit(byte n) {

       if ((n & 1) == 0) return 0;

       return 1 + findFirstUnsetBit(n >> 1);

}

 

/// The empty list.

Cons *empty = NULL;

 

// Using a raw visit stack rather than a std::vector makes marking at least 2x

// faster.

Cons **visitStart = (Cons **)malloc(size * sizeof(Cons *));

Cons **visitPtr = visitStart;

 

// Double marking is 3% slower

void mark(Cons *r) {

       int n = r - region;

       if ((marked[n/8] & (1 << (n & 7))) == 0) {

             marked[n/8] |= 1 << (n & 7);

             if (r->t) *(visitPtr++) = r->t;

       }

}

 

int gcCycles = 0;

 

void gc() {

       ++gcCycles;

       // Mark the global roots.

       for (Cons **r=shadowStackStart; r!=shadowStackPtr; ++r)

             mark(*r);

       // Mark the entire heap.

       while (visitPtr != visitStart)

             mark(*(--visitPtr));

       // Filter out reachable blocks from the allocated list.

       for (int i=0; i<bitVectorSize; ++i) {

             allocated[i] &= marked[i];

             marked[i] = 0;

       }

/*

       int size=0;

       for (int i=0; i<bitVectorSize; ++i)

             for (int j=0; j<8; ++j)

                    if (((allocated[i] >> j) & 1) == 1) {

                           ++size;

                    }

                    else {

                           Cons *r = region + 8*i + j;

                           r->x = -1;

                           r->y = -1;

                           r->t = (Cons *)-1;

                    }

       //cout << 100-100.0*size/4096 << "% reduction in heap size" << endl;

       */

       firstFree = 0;

}

 

/// Returns a pointer into the current region or NULL if the region is full.

Cons *tryCons(char x, char y, Cons *t) {

       for (; firstFree<bitVectorSize; ++firstFree) {

             byte b = *(allocated + firstFree);

             if (b != 255) {

                    // Find the first free block

                    int j = firstUnsetBit[b];

                    // Mark this block as allocated

                    *(allocated + firstFree) |= (1 << j);

                    Cons *r = region + 8*firstFree + j;

                    r->x = x;

                    r->y = y;

                    r->t = t;

                    return r;

             }

       }

       return empty;

}

 

void oom() {

       cout << "Out of memory " << shadowStackPtr - shadowStackStart << endl;

       getc(stdin);

       exit(1);

}

 

// Assume tail is already reachable.

Cons *cons(char x, char y, Cons *t) {

       Cons *list = tryCons(x, y, t);

       if (list) return list;

       gc();

       list = tryCons(x, y, t);

       if (!list) oom();

       return list;

}

 

Cons *cons(Cons *a, char x, char y, Cons *t) {

       Cons *list = tryCons(x, y, t);

       if (list) return list;

       Cons **oldShadowStackPtr = shadowStackPtr;

       if (a) *shadowStackPtr++ = a;

       gc();

       shadowStackPtr = oldShadowStackPtr;

       list = tryCons(x, y, t);

       if (!list) oom();

       return list;

}

 

/// Is one position safe from a queen at another position.

bool safe(char x0, char y0, char x1, char y1) {

       return x0!=x1 && y0!=y1 && x0-y0!=x1-y1 && x0+y0!=x1+y1;

}

 

/// Filter safe positions from a list of positions.

Cons *filter(char x0, char y0, Cons *list) {

       if (!list) return empty;

       if (safe(x0, y0, list->x, list->y))

       {

             // Copy value types out early so we have fewer reference types.

             char x=list->x, y=list->y;

             Cons *t = filter(x0, y0, list->t);

             // t is not otherwise reachable so we must push it in the event of a GC.

             // i.e. cons(t, x, y, t) rather than cons(x, y, t).

             return cons(t, x, y, t);

       }

       else

             return filter(x0, y0, list->t);

}

 

/// Search for solutions to the n-queens problem.

int search(int n, int nqs, Cons *qs, Cons *qps, int i) {

       if (!qps) return (nqs==n ? i+1 : i);

       Cons **oldShadowStackPtr = shadowStackPtr;

       // We don't need to push qps as long as we copy out its components.

       Cons ps = *qps;

       // qs and ps.t are required by the final call so we must push them.

       if (qs) *shadowStackPtr++ = qs;

       if (ps.t) *shadowStackPtr++ = ps.t;

       Cons *qs2 = cons(ps.x, ps.y, qs);

       // qs2 is required after the call to filter so we must push it.

       if (qs2) *shadowStackPtr++ = qs2;

       Cons *ps2 = filter(ps.x, ps.y, ps.t);

       // ps2 isn't used after this call so we don't need to push it.

       i = search(n, nqs+1, qs2, ps2, i);

       // Restore the shadow stack before the tail call.

       shadowStackPtr = oldShadowStackPtr;

       return search(n, nqs, qs, ps.t, i);

}

 

/// Construct a list of all board positions.

Cons *ps(int n) {

       Cons *list = empty;

       for (int i=1; i<=n; ++i)

             for (int j=1; j<=n; ++j)

                    list = cons(list, i, j, list);

       return list;

}

 

int main() {

       memset(allocated, 0, bitVectorSize);

       memset(marked, 0, bitVectorSize);

       memset(region, 0, 1 << regionSize);

 

       for (int i=0; i<256; ++i)

             firstUnsetBit[i] = findFirstUnsetBit(i);

 

       __int64 freq, tStart, tStop;

       unsigned long TimeDiff;

       QueryPerformanceFrequency((LARGE_INTEGER *)&freq);

       QueryPerformanceCounter((LARGE_INTEGER *)&tStart);

 

       for (int n=1; n<12; ++n) {

             cout << search(n, 0, empty, ps(n), 0) << " solutions to the " << n << "-queens problem" << endl;

       }

 

       QueryPerformanceCounter((LARGE_INTEGER *)&tStop);

       TimeDiff = (unsigned long)(((tStop - tStart) * 1000000) / freq);

       cout << "Took " << TimeDiff/1e6 << "s" << endl;

       cout << gcCycles << " gc cycles => every " << TimeDiff/gcCycles << "us" << endl;

       getc(stdin);

       return 0;

}

 

--

Dr Jon Harrop, Flying Frog Consultancy Ltd.

http://www.ffconsultancy.com

 

 

Jon Harrop

unread,
Dec 6, 2012, 11:10:48 AM12/6/12
to pragmatic-functional...@googlegroups.com
Turns out this prototype does work on my 32-bit machine. It just doesn't work in 64-bit...
Reply all
Reply to author
Forward
0 new messages