Hi,
I've created a testing program that imports all standard packages, then allocates 2 GB in equally sized blocks, then executes GC and then checks how many of the blocks are freed (source code is below, but it also requires patching mgc0.c to not do GC until manually triggered).
On Mac/386 it says that 90%(!) of 1MB blocks are not freed (there may be some measurement error, but I think not that big).
I've investigated the problem, and it turned out that basically all blocks are pinned by various tables in data segment of the program (after adding some debug output to mgc0.c and merging it with nm output I can see what symbols cause problems).
I think most cases can be fixed by turning static arrays into dynamic slices. For example, after applying the following patch:
------------------------------------------------------------
diff -r 7fec8679f10d src/pkg/math/pow10.go
--- a/src/pkg/math/pow10.go Fri Aug 26 17:45:19 2011 -0400
+++ b/src/pkg/math/pow10.go Tue Aug 30 11:29:47 2011 +0400
@@ -4,12 +4,18 @@
package math
+import (
+ "sync"
+)
+
// This table might overflow 127-bit exponent representations.
// In that case, truncate it after 1.0e38.
-var pow10tab [70]float64
+var pow10tab []float64
+var pow10tabOnce sync.Once
// Pow10 returns 10**e, the base-10 exponential of e.
func Pow10(e int) float64 {
+ pow10tabOnce.Do(pow10tabInit)
if e <= -325 {
return 0
} else if e > 309 {
@@ -26,7 +32,8 @@
return Pow10(m) * Pow10(e-m)
}
-func init() {
+func pow10tabInit() {
+ pow10tab = make([]float64, 70)
pow10tab[0] = 1.0e0
pow10tab[1] = 1.0e1
for i := 2; i < len(pow10tab); i++ {
------------------------------------------------------------
3 additional 1MB blocks get freed. Or when I use 16-byte blocks, 70(!) additional blocks get freed, that basically means that every float64 pins a heap object.
Before submitting any changes, I would like to know as to whether you think it (turning static arrays into dynamic slices) is the right approach or not. Potentially it also reduces exec size and/or startup times, fortunately sync.Once is quite fast now :)
------------------------------------------------------------
// the testing utility
package main
import (
"flag"
"fmt"
"os"
"runtime"
)
import (
_ "container/heap"
_ "container/list"
_ "container/ring"
_ "container/vector"
_ "flag"
_ "json"
_ "path"
_ "strings"
_ "unsafe"
_ "crypto"
_ "fmt"
_ "log"
_ "rand"
_ "sync"
_ "url"
_ "archive/tar"
_ "archive/zip"
_ "csv"
_ "go/ast"
_ "go/build"
_ "go/doc"
_ "go/parser"
_ "go/printer"
_ "go/scanner"
_ "go/token"
_ "go/typechecker"
_ "go/types"
_ "mail"
_ "reflect"
_ "syscall"
_ "utf16"
_ "asn1"
_ "debug/dwarf"
_ "debug/elf"
_ "debug/gosym"
_ "debug/macho"
_ "debug/pe"
_ "gob"
_ "math"
_ "regexp"
_ "syslog"
_ "utf8"
_ "big"
_ "hash"
_ "mime"
_ "rpc"
_ "tabwriter"
_ "websocket"
_ "bufio"
_ "ebnf"
_ "html"
_ "net"
_ "runtime"
_ "template"
_ "xml"
_ "encoding/ascii85"
_ "encoding/base32"
_ "encoding/base64"
_ "encoding/binary"
_ "encoding/git85"
_ "encoding/hex"
_ "encoding/pem"
_ "http"
_ "netchan"
_ "scanner"
_ "testing"
_ "bytes"
_ "exec"
_ "image"
_ "old/template"
_ "smtp"
_ "time"
_ "cmath"
_ "exp/datafmt"
_ "exp/gui"
_ "exp/gui/x11"
_ "exp/norm"
_ "exp/regexp/syntax"
_ "exp/template/html"
_ "index/suffixarray"
_ "os"
_ "sort"
_ "try"
_ "compress/bzip2"
_ "compress/flate"
_ "compress/gzip"
_ "compress/lzw"
_ "compress/zlib"
_ "expvar"
_ "io"
_ "patch"
_ "strconv"
_ "unicode"
)
func main() {
flagSize := flag.Int("size", 0, "alloc block size in bytes (must be a power of 2)")
flagMem := flag.Int("mem", 0, "total mem to allocate in MB")
flag.Parse()
if *flagSize <= 0 || *flagSize&(*flagSize-1) != 0 || *flagMem <= 0 {
flag.PrintDefaults()
os.Exit(1)
}
sz := uintptr(*flagSize)
cnt := *flagMem * 1024 * 1024 / (*flagSize)
a0 := runtime.MemStats.Mallocs - runtime.MemStats.Frees
for i := 0; i < cnt; i++ {
p := make([]byte, sz)
func(p []byte) {
}(p)
}
a1 := runtime.MemStats.Mallocs - runtime.MemStats.Frees - a0
runtime.GC()
a2 := runtime.MemStats.Mallocs - runtime.MemStats.Frees - a0
fmt.Printf("%.2f%% pinned (%d)\n", float64(a2)*100/float64(a1), a2)
fmt.Printf("%dMB wasted\n", a2*100*uint64(*flagMem)/a1)
}