code review 5991049: runtime: speedup GC sweep phase (batch free) (issue 5991049)

413 views
Skip to first unread message

dvy...@google.com

unread,
Apr 5, 2012, 1:44:55 PM4/5/12
to golan...@googlegroups.com, re...@codereview-hr.appspotmail.com
Reviewers: golang-dev_googlegroups.com,

Message:
Hello golan...@googlegroups.com,

I'd like you to review this change to
https://go.googlecode.com/hg/


Description:
runtime: speedup GC sweep phase (batch free)

benchmark old ns/op new ns/op delta
garbage.BenchmarkParser 4370050250 3779668750 -13.51%
garbage.BenchmarkParser-2 3713087000 3628771500 -2.27%
garbage.BenchmarkParser-4 3519755250 3406349750 -3.22%
garbage.BenchmarkParser-8 3386627750 3319144000 -1.99%

garbage.BenchmarkTree 493585529 408102411 -17.32%
garbage.BenchmarkTree-2 500487176 402285176 -19.62%
garbage.BenchmarkTree-4 473238882 361484058 -23.61%
garbage.BenchmarkTree-8 486977823 368334823 -24.36%

garbage.BenchmarkTree2 31446600 31203200 -0.77%
garbage.BenchmarkTree2-2 21469000 21077900 -1.82%
garbage.BenchmarkTree2-4 11007600 10899100 -0.99%
garbage.BenchmarkTree2-8 7692400 7032600 -8.58%

garbage.BenchmarkParserPause 241863263 163249450 -32.50%
garbage.BenchmarkParserPause-2 120135418 112981575 -5.95%
garbage.BenchmarkParserPause-4 83411552 64580700 -22.58%
garbage.BenchmarkParserPause-8 51870697 42207244 -18.63%

garbage.BenchmarkTreePause 20940474 13147011 -37.22%
garbage.BenchmarkTreePause-2 20115124 11146715 -44.59%
garbage.BenchmarkTreePause-4 17217584 7486327 -56.52%
garbage.BenchmarkTreePause-8 18258845 7400871 -59.47%

garbage.BenchmarkTree2Pause 174067190 172674190 -0.80%
garbage.BenchmarkTree2Pause-2 131175809 130615761 -0.43%
garbage.BenchmarkTree2Pause-4 95406666 93972047 -1.50%
garbage.BenchmarkTree2Pause-8 86056095 85334952 -0.84%

garbage.BenchmarkParserLastPause 329932000 324790000 -1.56%
garbage.BenchmarkParserLastPause-2 209383000 210456000 +0.51%
garbage.BenchmarkParserLastPause-4 113981000 112921000 -0.93%
garbage.BenchmarkParserLastPause-8 77967000 76625000 -1.72%

garbage.BenchmarkTreeLastPause 29752000 18444000 -38.01%
garbage.BenchmarkTreeLastPause-2 24274000 14766000 -39.17%
garbage.BenchmarkTreeLastPause-4 19565000 8726000 -55.40%
garbage.BenchmarkTreeLastPause-8 21956000 10530000 -52.04%

garbage.BenchmarkTree2LastPause 314411000 311945000 -0.78%
garbage.BenchmarkTree2LastPause-2 214641000 210836000 -1.77%
garbage.BenchmarkTree2LastPause-4 110024000 108943000 -0.98%
garbage.BenchmarkTree2LastPause-8 76873000 70263000 -8.60%

Please review this at http://codereview.appspot.com/5991049/

Affected files:
M src/pkg/runtime/malloc.h
M src/pkg/runtime/mcentral.c
M src/pkg/runtime/mgc0.c


Index: src/pkg/runtime/malloc.h
===================================================================
--- a/src/pkg/runtime/malloc.h
+++ b/src/pkg/runtime/malloc.h
@@ -342,6 +342,7 @@
void runtime·MCentral_Init(MCentral *c, int32 sizeclass);
int32 runtime·MCentral_AllocList(MCentral *c, int32 n, MLink **first);
void runtime·MCentral_FreeList(MCentral *c, int32 n, MLink *first);
+void runtime·MCentral_FreeSpan(MCentral *c, MSpan *s, int32 n, MLink
*start, MLink *end);

// Main malloc heap.
// The heap itself is the "free[]" and "large" arrays,
Index: src/pkg/runtime/mcentral.c
===================================================================
--- a/src/pkg/runtime/mcentral.c
+++ b/src/pkg/runtime/mcentral.c
@@ -88,9 +88,6 @@
}

// Free n objects back into the central free list.
-// Return the number of objects allocated.
-// The objects are linked together by their first words.
-// On return, *pstart points at the first object and *pend at the last.
void
runtime·MCentral_FreeList(MCentral *c, int32 n, MLink *start)
{
@@ -148,6 +145,42 @@
}
}

+// Free n objects from a span s back into the central free list c.
+// Called from GC.
+void
+runtime·MCentral_FreeSpan(MCentral *c, MSpan *s, int32 n, MLink *start,
MLink *end)
+{
+ int32 size;
+
+ runtime·lock(c);
+
+ // Move to nonempty if necessary.
+ if(s->freelist == nil) {
+ runtime·MSpanList_Remove(s);
+ runtime·MSpanList_Insert(&c->nonempty, s);
+ }
+
+ // Add the objects back to s's free list.
+ end->next = s->freelist;
+ s->freelist = start;
+ s->ref -= n;
+ c->nfree += n;
+
+ // If s is completely freed, return it to the heap.
+ if(s->ref == 0) {
+ size = runtime·class_to_size[c->sizeclass];
+ runtime·MSpanList_Remove(s);
+ *(uintptr*)(s->start<<PageShift) = 1; // needs zeroing
+ s->freelist = nil;
+ c->nfree -= (s->npages << PageShift) / size;
+ runtime·unlock(c);
+ runtime·unmarkspan((byte*)(s->start<<PageShift), s->npages<<PageShift);
+ runtime·MHeap_Free(&runtime·mheap, s, 0);
+ } else {
+ runtime·unlock(c);
+ }
+}
+
void
runtime·MGetSizeClassInfo(int32 sizeclass, uintptr *sizep, int32 *npagesp,
int32 *nobj)
{
Index: src/pkg/runtime/mgc0.c
===================================================================
--- a/src/pkg/runtime/mgc0.c
+++ b/src/pkg/runtime/mgc0.c
@@ -757,6 +757,8 @@
byte *p;
MCache *c;
byte *arena_start;
+ MLink *start, *end;
+ int32 nfree;

arena_start = runtime·mheap.arena_start;
p = (byte*)(s->start << PageShift);
@@ -770,6 +772,9 @@
npages = runtime·class_to_allocnpages[cl];
n = (npages << PageShift) / size;
}
+ nfree = 0;
+ start = end = nil;
+ c = m->mcache;

// Sweep through n objects of given size starting at p.
// This thread owns the span now, so it can manipulate
@@ -806,21 +811,33 @@
// Mark freed; restore block boundary bit.
*bitp = (*bitp & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);

- c = m->mcache;
if(s->sizeclass == 0) {
// Free large span.
runtime·unmarkspan(p, 1<<PageShift);
*(uintptr*)p = 1; // needs zeroing
runtime·MHeap_Free(&runtime·mheap, s, 1);
+ c->local_alloc -= size;
+ c->local_nfree++;
} else {
// Free small object.
if(size > sizeof(uintptr))
((uintptr*)p)[1] = 1; // mark as "needs to be zeroed"
- c->local_by_size[s->sizeclass].nfree++;
- runtime·MCache_Free(c, p, s->sizeclass, size);
+ if(nfree)
+ end->next = (MLink*)p;
+ else
+ start = (MLink*)p;
+ end = (MLink*)p;
+ nfree++;
}
- c->local_alloc -= size;
- c->local_nfree++;
+ }
+
+ if(nfree) {
+ c->local_by_size[s->sizeclass].nfree += nfree;
+ c->local_alloc -= size * nfree;
+ c->local_nfree += nfree;
+ c->local_cachealloc -= nfree * size;
+ c->local_objects -= nfree;
+ runtime·MCentral_FreeSpan(&runtime·mheap.central[cl], s, nfree, start,
end);
}
}



r...@golang.org

unread,
Apr 10, 2012, 3:09:03 PM4/10/12
to dvy...@google.com, golan...@googlegroups.com, re...@codereview-hr.appspotmail.com
LGTM

Kind of ridiculous how much this helps. Nice.


http://codereview.appspot.com/5991049/

Reply all
Reply to author
Forward
0 new messages