Garbage collector problems

59 views
Skip to first unread message

Eric Fode

unread,
Jan 19, 2010, 12:10:36 PM1/19/10
to golang-nuts
I am having trouble getting this program to garbage collect when i
release the root node of the "geneTree" this happens in deleteLeafs
and is started in train any ideas?
Thanks in advanced,
Eric Fode

package trainers


import (
"../genome"
"fmt"
"io"
"io/ioutil"
"os"
"malloc"
"strings"
"sort"

)

func init() { fmt.Print("Test") }

type GenePart struct {
DataValue byte
HitNumber uint16
Next *GeneNode
}
type GeneData struct {
DataValue []byte
HitNumber uint16
}
type GeneNode struct {
DataValue byte
Depth uint8
HitNumber uint32
GeneParts [256]GenePart
}

type BruteTreeTrainer struct {
root *GeneNode
curHighest []GeneData
}

func NewBruteTreeTrainer() *BruteTreeTrainer {
bt := new(BruteTreeTrainer)
bt.curHighest = make([]GeneData,0)
bt.root = newTree()
return bt
}

func newTree() *GeneNode{
root := new(GeneNode)
root.Depth = 0
root.DataValue = 0
root.HitNumber = 0
for index, part := range (root.GeneParts) {
part.DataValue = uint8(index)
part.HitNumber =0
}
return root
}

const NextNodeBound int = 2
const MaxDepth int = 15
//creates a new geneNode wit b as it counter
func contains(i int, slice []int) bool {
for _, cur := range (slice) {
if cur == i {
return false
}
}
return true
}
func (gn *GeneNode) numLeafs()(num int){
tally :=0
//for all of the decendent nodes
for _,cur := range(gn.GeneParts){
//ask them how mnay leafs they have
if cur.Next != nil {
num += cur.Next.numLeafs()
tally++
}
}
//if this is a leaf return 1
if tally ==0 {
num = 1
}
return

}

func (gn *GeneNode) numChildern()(num int){
num = 0
for _,cur := range(gn.GeneParts){
if cur.Next!= nil {
num++
}
}
return
}
type GeneDataSlice []GeneData

func join (right, left []GeneData) []GeneData{
//!fmt.Print("j")
newGeneData := make([]GeneData,len(right)+len(left))
for index, item := range(right) {
newGeneData[index] = item
}
for index, item := range(left) {
newGeneData[index+len(right)] = item
}
right = newGeneData
return right
}

func (gn *GeneNode) collapseLeafs()([]GeneData){
//for each child
geneData := make([]GeneData,0)
//!fmt.Println("depth:",gn.Depth, "numChildern:", gn.numChildern
(),"numLeafs:", gn.numLeafs())
//if this is a leaf
//make a slice of the depth that this leaf is at
//put your data value at the end
//the the nodes data value at the next from the end
//return
if gn.numChildern() == 0 {
geneData = make([]GeneData,1)
geneData[0].HitNumber = uint16(gn.HitNumber)
geneData[0].DataValue = make([]byte,gn.Depth)
geneData[0].DataValue[gn.gettingDepth-1] = gn.DataValue
//!fmt.Println(" geneData:",geneData,"\n")
return geneData
}

//if this is a brach
//join all the leafs together
//add this data value to depths place in all of the slice
for _, child := range(gn.GeneParts){
//recurese this function down to its leaf
if(child.Next!=nil){
geneData = join(geneData,child.Next.collapseLeafs())
//! fmt.Println("cumlitive geneData:", geneData)
}
}
for _, curGeneData := range(geneData){
curGeneData.DataValue[gn.Depth] = gn.DataValue
}
///!fmt.Println("Final geneData:", geneData)
return geneData
}

func (gn *GeneNode) deleteLeafs(root *GeneNode){

for index, child := range(gn.GeneParts){
//recurese this function down to its leaf
if(child.Next!=nil){
child.Next.deleteLeafs(root)
//! fmt.Println("cumlitive geneData:", geneData)
gn.GeneParts[index].Next = root
}
malloc.Free(
}
return
}

//find the number of leafs on a node

func (gn *GeneNode) addNode(b byte) {
gn.GeneParts[b].Next = new(GeneNode)
gn.GeneParts[b].Next.Depth = gn.Depth + 1
gn.GeneParts[b].Next.DataValue = uint8(b)
for index, part := range (gn.GeneParts[b].Next.GeneParts) {
part.DataValue = uint8(index)
part.HitNumber = 0
}
return
}

//will take a byte to add to the tree then if it wants anouther byte
for the next node it returns true. if it returns false then it
//hit a leaf
func (gn *GeneNode) AddByte(b []byte) {
//find the next GenePart and add one to its hit number
//!fmt.Println("at:", b[0], " old hit number:", gn.GeneParts[b
[0]].HitNumber, "Depth:",gn.Depth)

gn.GeneParts[b[0]].HitNumber++
gn.HitNumber++

switch {
//if the hit number of a genePart is enugh to create a new node
case gn.GeneParts[b[0]].HitNumber == uint16(NextNodeBound):
//!fmt.Println(" adding node")
//add a node
gn.addNode(b[0])
//if it is greater tehn then Next node
case gn.GeneParts[b[0]].HitNumber > uint16(NextNodeBound):
//!fmt.Println(" calling Down")
//then call AddByte with the ngettingext byte on the next node if
there are bytes left
if cap(b) > 1 && gn.Depth < uint8(MaxDepth) {
gn.GeneParts[b[0]].Next.AddByte(b[1:2])
}
}
return
}

//inverted becuase it need to be sorted greatest to least
func (gd GeneDataSlice)Less(i, j int)bool{
if gd[i].HitNumber >= gd[j].HitNumber {
return true
}else {
return false
}
return false
}
func (gd GeneDataSlice)Swap(i, j int) {
tempd := gd[i]
gd[i] = gd[j]
gd[j] = tempd
return
}
func (gd GeneDataSlice)Len()int{
return len(gd)
}
func (gd GeneDataSlice)Sort(){
sortable := sort.Interface(gd)
fmt.Print("s")
sort.Sort(sortable)
return
}

//should consider changeing this to a io.Reader instead of a []byte to
deal with huge data sets (a movie for example)
func (bt *BruteTreeTrainer) Train(trainingData []byte) {
fmt.Println(len(trainingData))
//from the minimum match size to the max ma
for index := 0; index < len(trainingData); index++ {
if (index % 1000) == 0 {
fmt.Print(",")
}
bt.root.AddByte(trainingData[index : index+1])
}
if bt.root.HitNumber >900000{

//bt.nodeSet()
//bt.curHighest = join(bt.curHighest,bt.nodeSet())
fmt.Println(malloc.GetStats())
bt.root.deleteLeafs(bt.root)
//bt.root = nil
bt.root = newTree()
//malloc.GC()
fmt.Println(malloc.GetStats())
fmt.Print("i should have collocted garbage")
//time.Sleep(1000)


}

}

//Returns the nthHighestNodes this is decied from the top down
//so if there are three nodes that are valid at the top level of the
tree
//and 2 lower down and the user asks for 5 nodes then it will return
//first the top one then the 2 lower ones
func (bt *BruteTreeTrainer) nthHighest(n int) []GeneData {
slice := bt.root.collapseLeafs()
//fmt.Println(slice)
gds := GeneDataSlice(slice)
gds.Sort()
if n > len(gds) {
n = len(gds)-1
}
return gds[0:n]
}

func (bt *BruteTreeTrainer) nodeSet() []GeneData {
slice := bt.root.collapseLeafs()
gds := GeneDataSlice(slice)
return gds
}

func (trainer *BruteTreeTrainer) TrainOnDir(dir string) (err os.Error)
{
//open the directory
files, err := ioutil.ReadDir(dir)
if err != nil {
return err
}
for _, fileDesc := range (files) {
if fileDesc.IsRegular() {
file, err := os.Open(strings.Join([]string{dir, fileDesc.Name},
"/"), os.O_RDONLY, 0666)
if err != nil {
fmt.Println("oops!!", err)
}
fReader := io.Reader(file)
data, err := ioutil.ReadAll(fReader)
trainer.Train(data)
}
}
return
}
//func compareArray(bytes []byte, index, i int) (isThereGene bool) { }
func (trainer *BruteTreeTrainer) TrainOnDirPart(dir string, howMany
int) (err os.Error) {
//open the directory
files, err := ioutil.ReadDir(dir)
fmt.Println("trainging on dir ! = finished file . = finished pass , =
finished pass part")
if err != nil {
return err
}
for x := 0; x <= howMany; x++ {
fileDesc := files[x]
if fileDesc.IsRegular() {
file, err := os.Open(strings.Join([]string{dir, fileDesc.Name},
"/"), os.O_RDONLY, 0666)
if err != nil {
fmt.Println("oops!!", err)
}
fReader := io.Reader(file)
data, err := ioutil.ReadAll(fReader)
trainer.Train(data)
fmt.Print("!")
}
}
return
}
// needs to be implented
func (trainer *BruteTreeTrainer) GetGenes() (genes []genome.Gene, err
os.Error) {
genes = nil
err = nil
return
}
/*
func OpenTrainer(data io.Reader) (err os.Error) {
return
}

func Save(data io.Writer) (err os.Error) {}
*/
func (bt *BruteTreeTrainer) Output() {
fmt.Println("current Highest occurance", bt.nthHighest(5))
return
}

Ian Lance Taylor

unread,
Jan 19, 2010, 12:40:29 PM1/19/10
to Eric Fode, golang-nuts
Eric Fode <eric...@gmail.com> writes:

> I am having trouble getting this program to garbage collect when i
> release the root node of the "geneTree" this happens in deleteLeafs
> and is started in train any ideas?

What are the symptoms of the problem?


> func (gn *GeneNode) deleteLeafs(root *GeneNode){
>
> for index, child := range(gn.GeneParts){
> //recurese this function down to its leaf
> if(child.Next!=nil){
> child.Next.deleteLeafs(root)
> //! fmt.Println("cumlitive geneData:", geneData)
> gn.GeneParts[index].Next = root
> }
> malloc.Free(
> }
> return
> }

Your function seems to be missing an argument to malloc.Free.

Ian

Eric Fode

unread,
Jan 19, 2010, 12:48:34 PM1/19/10
to golang-nuts
yes that was a typo in my post, that line should not be there. The
symptoms are, even after i set all of the pointers to nil in the tree
starting from the bottom up, after the garbage collector runs the
program does not release its memory

On Jan 19, 9:40 am, Ian Lance Taylor <i...@google.com> wrote:


> Eric Fode <ericf...@gmail.com> writes:
> > I am having trouble getting this program to garbage collect when i
> > release the root node of the "geneTree" this happens in deleteLeafs
> > and is started in train any ideas?
>
> What are the symptoms of the problem?
>
> > func (gn *GeneNode) deleteLeafs(root *GeneNode){
>
> >    for index, child := range(gn.GeneParts){
> >            //recurese this function down to its leaf
> >            if(child.Next!=nil){
> >                    child.Next.deleteLeafs(root)
> >            //!     fmt.Println("cumlitive geneData:", geneData)
> >                    gn.GeneParts[index].Next = root
> >            }
> >        
> >    }

Ian Lance Taylor

unread,
Jan 19, 2010, 1:13:24 PM1/19/10
to Fode, golang-nuts
Fode <eric...@gmail.com> writes:

> Thy symptoms are that when i set
> every thing to nil from the bottom of the tree up
> it does not return the memory to the operating system after running the
> garbage collector

The way Go works at present, the garbage collector never returns
memory to the operating system. It frees up memory in its internal
data structures and reuses that space for future allocations.
Assuming there are no leaks in your program, what you should expect to
see is that the program grows to some size and then reaches a steady
state.

Ian

Fode

unread,
Jan 19, 2010, 1:17:36 PM1/19/10
to Ian Lance Taylor, golang-nuts
That would explain why it never reports memory being returned. So just out of curiosity, i was looking at the run time library and there were a few todos talking about incremental scavenge what is involved in writing that?

Russ Cox

unread,
Jan 19, 2010, 1:42:25 PM1/19/10
to Fode, golang-nuts
I hate to suggest the obvious, but it looks like in

//bt.nodeSet()
//bt.curHighest = join(bt.curHighest,bt.nodeSet())
fmt.Println(malloc.GetStats())
bt.root.deleteLeafs(bt.root)
//bt.root = nil
bt.root = newTree()
//malloc.GC()
fmt.Println(malloc.GetStats())
fmt.Print("i should have collocted garbage")
//time.Sleep(1000)

the call to malloc.GC should be uncommented.
Otherwise the GC will happen the next time malloc decides
enough allocations have happened since the last collection.
I cut out the dead code from your program and told it to train on
$GOROOT/doc (full code below), and I get reasonable output from

alloc := malloc.GetStats().Alloc
bt.root.deleteLeafs(nil)
bt.root = nil
malloc.GC()
fmt.Println("Collected", alloc-malloc.GetStats().Alloc, "out of", alloc)
bt.root = new(GeneNode)

which prints

; 6g x.go
; 6l x.6
; 6.out
Collected 1237281024 out of 1248889616
Collected 1266371840 out of 1277115504
;

Russ

--- x.go

package main

import (
"fmt"
"io/ioutil"
"os"
"malloc"
)

const (
NextNodeBound = 2
MaxDepth = 15
)

type GenePart struct {
DataValue byte
HitNumber uint16
Next *GeneNode
}

type GeneData struct {
DataValue []byte
HitNumber uint16
}

type GeneNode struct {
DataValue byte
Depth uint8
HitNumber uint32
GeneParts [256]GenePart
}

type BruteTreeTrainer struct {
root *GeneNode
curHighest []GeneData
}

func NewBruteTreeTrainer() *BruteTreeTrainer {
return &BruteTreeTrainer{
new(GeneNode),
make([]GeneData, 0),
}
}

func (gn *GeneNode) deleteLeafs(root *GeneNode) {

for i := range gn.GeneParts {
p := &gn.GeneParts[i]
if p.Next != nil {
p.Next.deleteLeafs(root)
p.Next = root
}
}
}

func (gn *GeneNode) addNode(b byte) {

p := &gn.GeneParts[b]
p.Next = new(GeneNode)
p.Next.Depth = gn.Depth + 1
p.Next.DataValue = uint8(b)
for i, part := range p.Next.GeneParts {
part.DataValue = uint8(i)
part.HitNumber = 0
}
}

func (gn *GeneNode) AddByte(b []byte) {

p := &gn.GeneParts[b[0]]
p.HitNumber++
gn.HitNumber++

switch {
case p.HitNumber == NextNodeBound:
gn.addNode(b[0])
case p.HitNumber > NextNodeBound:
if cap(b) > 1 && gn.Depth < MaxDepth {
p.Next.AddByte(b[1:2])
}
}
}

func (bt *BruteTreeTrainer) Train(data []byte) {
for i := 0; i < len(data); i++ {
bt.root.AddByte(data[i : i+1])
}
if bt.root.HitNumber > 900000 {
alloc := malloc.GetStats().Alloc
bt.root.deleteLeafs(nil)
bt.root = nil
malloc.GC()
fmt.Println("Collected", alloc-malloc.GetStats().Alloc, "out of", alloc)
bt.root = new(GeneNode)
}
}

func (trainer *BruteTreeTrainer) TrainOnDir(dir string) (err os.Error) {

dirs, err := ioutil.ReadDir(dir)


if err != nil {
return err
}

for _, d := range dirs {
if d.IsRegular() {
file, err := os.Open(dir+"/"+d.Name, os.O_RDONLY, 0666)


if err != nil {
fmt.Println("oops!!", err)
}

data, err := ioutil.ReadAll(file)
trainer.Train(data)
}
}
return
}

func main() {
bt := NewBruteTreeTrainer()
bt.TrainOnDir(os.Getenv("GOROOT") + "/doc")
}

Fode

unread,
Jan 19, 2010, 1:52:21 PM1/19/10
to r...@golang.org, golang-nuts
The obvious is sometimes the best suggestion, yes i think that may have been part of the problem. Does any one know if the GC can detect orphaned pointers and clean them up? i fixed that and i still have a small memory leak that i cant find.
Again thanks for your guys's time 

Fode

unread,
Jan 19, 2010, 2:00:20 PM1/19/10
to r...@golang.org, golang-nuts
Is there any way to use memory pages?

Russ Cox

unread,
Jan 19, 2010, 2:07:37 PM1/19/10
to Fode, golang-nuts
On Tue, Jan 19, 2010 at 10:52, Fode <eric...@gmail.com> wrote:
> The obvious is sometimes the best suggestion, yes i think that may have been
> part of the problem. Does any one know if the GC can detect orphaned
> pointers and clean them up? i fixed that and i still have a small memory
> leak that i cant find.

The garbage collector reclaims cyclic data too.
In the program I posted before, if I remove the call
to deleteLeafs, I get:

; 6.out
Collected 1087808512 out of 1248889648
Collected 1243567216 out of 1411300608
;

The collector probably sees some garbage on the
stack that looks like pointers and keeps it from
reclaiming as much as it did with deleteLeafs, but
it will get the rest on a future round.

> Is there any way to use memory pages?

I don't understand the question.
Normal allocation uses memory, in pages.

Russ

Fode

unread,
Jan 19, 2010, 2:11:58 PM1/19/10
to r...@golang.org, golang-nuts
I am referring to using swap as opposed to ram. i am not really sure what that is called but if there was a way i could force it to use that in some places that would be awesome any ideas on how?

Jessta

unread,
Jan 19, 2010, 6:17:34 PM1/19/10
to Fode, golan...@googlegroups.com
2010/1/20 Fode <eric...@gmail.com>:

> I am referring to using swap as opposed to ram. i am not really sure what
> that is called but if there was a way i could force it to use that in some
> places that would be awesome any ideas on how?
>

Swap doesn't work that way.
It sounds to me like you're looking to make a file.

- jessta
--
=====================
http://jessta.id.au

Fode

unread,
Jan 19, 2010, 6:24:32 PM1/19/10
to Jessta, golan...@googlegroups.com
yah your right, and infact that is exactly what i just did though i still have the same problem of the garbage collector not releasing everything
if you run this code and watch the output from the garbage collector you will see what i mean (btw it compiles this time)
Thanks ! eric
package main

import (

"fmt"
"io"
"io/ioutil"
"os"
"malloc"
"strings"
"sort"
"bytes"
"gob"
)

func init() { fmt.Print("Test") }

type GenePart struct {
DataValue byte
HitNumber uint16
Next      *GeneNode
}
type GeneData struct {
DataValue []byte
HitNumber uint16
}
type GeneNode struct {
DataValue byte
Depth     uint8
HitNumber uint32
GeneParts [256]GenePart
}

type BruteTreeTrainer struct {
root       *GeneNode
curHighest []GeneData
}

func NewBruteTreeTrainer() *BruteTreeTrainer {
bt := new(BruteTreeTrainer)
bt.curHighest = make([]GeneData, 0)
bt.root = newTree()
return bt
}

func newTree() *GeneNode {
root := new(GeneNode)
root.Depth = 0
root.DataValue = 0
root.HitNumber = 0
for index, part := range (root.GeneParts) {
part.DataValue = uint8(index)
part.HitNumber = 1
}
return root
}

const NextNodeBound int = 3
const MaxDepth int = 10
//creates a new geneNode wit b as it counter
func contains(i int, slice []int) bool {
for _, cur := range (slice) {
if cur == i {
return false
}
}
return true
}
func (gn *GeneNode) numLeafs() (num int) {
tally := 0
//for all of the decendent nodes
for _, cur := range (gn.GeneParts) {
//ask them how mnay leafs they have
if cur.Next != nil {
num += cur.Next.numLeafs()
tally++
}
}
//if this is a leaf return 1
if tally == 0 {
num = 1
}
return

}

func (gn *GeneNode) numChildern() (num int) {
num = 0
for _, cur := range (gn.GeneParts) {
if cur.Next != nil {
num++
}
}
return
}

type GeneDataSlice []GeneData

func join(right, left []GeneData) []GeneData {
//!fmt.Print("j")
newGeneData := make([]GeneData, len(right)+len(left))
for index, item := range (right) {
newGeneData[index] = item
}
for index, item := range (left) {
newGeneData[index+len(right)] = item
}
right = newGeneData
return right
}

func merge(right, left []GeneData) []GeneData {

//find the size of the new array by finding the number of matchs
fmt.Print("m")
dups := 0
for _, item := range (left) {

for _, itemr := range (right) {
if bytes.Equal(item.DataValue, itemr.DataValue) {
dups++
break
}
}
}
newGeneData := make([]GeneData, len(right)+len(left)-dups)
for index, item := range (right) {
newGeneData[index] = item
}
tally := 0
for _, item := range (left) {
matched := false
for indexr, itemr := range (right) {
if bytes.Equal(item.DataValue, itemr.DataValue) {
newGeneData[indexr].HitNumber += item.HitNumber
matched = true
break
}
}
if !matched {
newGeneData[len(right)+tally] = item
tally++
}
}
fmt.Print("M")

return newGeneData
}

func (gn *GeneNode) collapseLeafs() []GeneData {
//for each child
geneData := make([]GeneData, 0)
//!fmt.Println("depth:",gn.Depth, "numChildern:", gn.numChildern(),"numLeafs:", gn.numLeafs())
//if this is a leaf
//make a slice of the depth that this leaf is at
//put your data value at the end
//the the nodes data value at the next from the end
//return
if gn.numChildern() == 0 {
geneData = make([]GeneData, 1)
geneData[0].HitNumber = uint16(gn.HitNumber)
geneData[0].DataValue = make([]byte, gn.Depth)
geneData[0].DataValue[gn.Depth-1] = gn.DataValue
//!fmt.Println("    geneData:",geneData,"\n")
return geneData
}

//if this is a brach
//join all the leafs together
//add this data value to depths place in all of the slice
for _, child := range (gn.GeneParts) {
//recurese this function down to its leaf
if child.Next != nil {
geneData = join(geneData, child.Next.collapseLeafs())
//! fmt.Println("cumlitive geneData:", geneData)
}
}
for _, curGeneData := range (geneData) {
curGeneData.DataValue[gn.Depth] = gn.DataValue
}
///!fmt.Println("Final geneData:", geneData)
return geneData
}

func (gn *GeneNode) deleteLeafs(root *GeneNode) {

for index, child := range (gn.GeneParts) {
//recurese this function down to its leaf
if child.Next != nil {
child.Next.deleteLeafs(root)
//! fmt.Println("cumlitive geneData:", geneData)
gn.GeneParts[index].Next = nil
}
}
return
}

//find the number of leafs on a node

func (gn *GeneNode) addNode(b byte) {
gn.GeneParts[b].Next = new(GeneNode)
gn.GeneParts[b].Next.Depth = gn.Depth + 1
gn.GeneParts[b].Next.DataValue = uint8(b)
for index, part := range (gn.GeneParts[b].Next.GeneParts) {
part.DataValue = uint8(index)
part.HitNumber = 0
}
return
}

//will take a byte to add to the tree then if it wants anouther byte for the next node it returns true. if it returns false then it
//hit a leaf
func (gn *GeneNode) AddByte(b []byte) {
//find the next GenePart and add one to its hit number
//!fmt.Println("at:", b[0], " old hit number:", gn.GeneParts[b[0]].HitNumber, "Depth:",gn.Depth)

gn.GeneParts[b[0]].HitNumber++
gn.HitNumber++

switch {
//if the hit number of a genePart is enugh to create a new node
case gn.GeneParts[b[0]].HitNumber == uint16(NextNodeBound):
//!fmt.Println("    adding node")
//add a node
gn.addNode(b[0])
//if it is greater tehn then Next node
case gn.GeneParts[b[0]].HitNumber > uint16(NextNodeBound):
//!fmt.Println("    calling Down")
//then call AddByte with the next byte on the next node if there are bytes left
if cap(b) > 1 && gn.Depth < uint8(MaxDepth) {
gn.GeneParts[b[0]].Next.AddByte(b[1:2])
}
}
return
}

//inverted becuase it need to be sorted greatest to least
func (gd GeneDataSlice) Less(i, j int) bool {
if gd[i].HitNumber >= gd[j].HitNumber {
return true
} else {
return false
}
return false
}
func (gd GeneDataSlice) Swap(i, j int) {
tempd := gd[i]
gd[i] = gd[j]
gd[j] = tempd
return
}
func (gd GeneDataSlice) Len() int { return len(gd) }
func (gd GeneDataSlice) Sort() {
sortable := sort.Interface(gd)
fmt.Print("s")
sort.Sort(sortable)
fmt.Print("S")
return
}
func (bt *BruteTreeTrainer) Save(gobSize int,file io.ReadWriteSeeker)(int){
encoder := gob.NewEncoder(file)
decoder := gob.NewDecoder(file)
geneData := make([]GeneData, gobSize)
file.Seek(0, 0)
for i := 0; i < gobSize; i++{
i++
err := decoder.Decode(&geneData[i])
if err != nil {
panic(err.String())
}
}

data := merge(geneData, bt.nodeSet())
file.Seek(0, 0)
for _, item := range (data) {
err := encoder.Encode(item)
if err != nil {
panic(err.String())
}
}
gobSize += len(data)
data = make([]GeneData,0)
//gob.
geneData = nil
fmt.Println(malloc.GetStats().Alloc)
bt.root.deleteLeafs(bt.root)
bt.root = nil
malloc.GC()
bt.root = newTree()
fmt.Println(malloc.GetStats())
fmt.Print("i should have collocted garbage")
return gobSize
}

//should consider changeing this to a io.Reader instead of a []byte to deal with huge data sets (a movie for example)
func (bt *BruteTreeTrainer) Train(trainingData []byte, file io.ReadWriteSeeker,gobSize int)(int) {
fmt.Println(len(trainingData))
//from the minimum match size to the max ma
for index := 0; index < len(trainingData); index++ {
if (index % 1000) == 0 {
fmt.Print(",")
}
if bt.root.HitNumber > 5000000 {
gobSize = bt.Save(gobSize,file)
//bt.nodeSet()
//time.Sleep(1000)
}
bt.root.AddByte(trainingData[index : index+1])
}
bt.Save(gobSize,file)
return gobSize
}

//Returns the nthHighestNodes this is decied from the top down
//so if there are three nodes that are valid at the top level of the tree
//and 2 lower down and the user asks for 5 nodes then it will return
//first the top one then the 2 lower ones
func (bt *BruteTreeTrainer) nthHighest(n int) []GeneData {
slice := bt.root.collapseLeafs()
//fmt.Println(slice)
gds := GeneDataSlice(slice)
gds.Sort()
if n > len(gds) {
n = len(gds) - 1
}
return gds[0:n]
}

func (bt *BruteTreeTrainer) nodeSet() []GeneData {
slice := bt.root.collapseLeafs()
gds := GeneDataSlice(slice)
return gds
}

func (trainer *BruteTreeTrainer) TrainOnDir(dir string) (err os.Error) {
//open the directory
files, err := ioutil.ReadDir(dir)
if err != nil {
return err
}
gobSize := 0
genomeFile, err := os.Open("/home/eric/win/temp.genome", os.O_CREATE^os.O_RDWR, 0666)
for _, fileDesc := range (files) {
if fileDesc.IsRegular() {
file, err := os.Open(strings.Join([]string{dir, fileDesc.Name}, "/"), os.O_RDONLY, 0666)
if err != nil {
fmt.Println("oops!!", err)
}
fReader := io.Reader(file)
data, err := ioutil.ReadAll(fReader)
gobSize = trainer.Train(data, io.ReadWriteSeeker(genomeFile),gobSize)
file.Close()
}
}
return
}
//func compareArray(bytes []byte, index, i int) (isThereGene bool) { }
func TreeTrainOnDirPart(dir string, howMany int) (err os.Error) {
//open the directory
files, err := ioutil.ReadDir(dir)
fmt.Println("trainging on dir ! = finished file . = finished pass , = finished pass part")
if err != nil {
return err
}
gobSize := 0
genomeFile, err := os.Open("/home/eric/win/temp.genome", os.O_CREATE^os.O_RDWR, 0666)
for x := 0; x < howMany; x++ {
fileDesc := files[x]
if fileDesc.IsRegular() {
file, err := os.Open(strings.Join([]string{dir, fileDesc.Name}, "/"), os.O_RDONLY, 0666)
if err != nil {
fmt.Println("oops!!", err)
}
fReader := io.Reader(file)
data, err := ioutil.ReadAll(fReader)
trainer := NewBruteTreeTrainer()
gobSize = trainer.Train(data, io.ReadWriteSeeker(genomeFile),gobSize)
//trainer.Output()
fmt.Print("!")
file.Close()
}
}
return
}
// needs to be implented

/*
func OpenTrainer(data io.Reader) (err os.Error) {
return
}

func Save(data io.Writer) (err os.Error) {}
*/
func (bt *BruteTreeTrainer) Output() {
fmt.Println("current Highest occurance", bt.nthHighest(30))
return
}
func main() {
//train := trainers.NewBruteTreeTrainer()
TreeTrainOnDirPart("/usr/bin", 200)
//train.Output()

Esko Luontola

unread,
Jan 19, 2010, 6:35:32 PM1/19/10
to golang-nuts
On Jan 19, 9:11 pm, Fode <ericf...@gmail.com> wrote:
> I am referring to using swap as opposed to ram. i am not really sure what
> that is called but if there was a way i could force it to use that in some
> places that would be awesome any ideas on how?

Are you talking about memory-mapped files?
http://en.wikipedia.org/wiki/Memory-mapped_file

Fode

unread,
Jan 19, 2010, 6:36:32 PM1/19/10
to Esko Luontola, golang-nuts
That would work well yes

Fode

unread,
Jan 19, 2010, 6:56:09 PM1/19/10
to Esko Luontola, golang-nuts
I believe i have figured out the problem. it is because it is a recursive function and the garbage collector is ignoring the parameters because they are on the stack if i under stand issue  353 correctly.  

Ian Lance Taylor

unread,
Jan 20, 2010, 1:18:16 AM1/20/10
to Scott Solmonson, Fode, golang-nuts
Scott Solmonson <sco...@scosol.org> writes:

> I've run in to this in other garbage-collected language
> implementations and still fail to understand why anyone thinks this
> is advisable behavior versus returning the memory to the OS.

The heap works this way in most languages by default and in most
applications which do their own memory management. It's because most
programs tend to reach a steady state in memory usage. Returning
memory to the OS is often soon followed by allocating memory from the
OS. Both OS operations are significantly more expensive than managing
a free list in allocated memory.

Ian

Scott Solmonson

unread,
Jan 20, 2010, 12:31:24 AM1/20/10
to Ian Lance Taylor, Fode, golang-nuts
I've run in to this in other garbage-collected language implementations and still fail to understand why anyone thinks this is advisable behavior versus returning the memory to the OS.
Poorly-written memory-leaking programs will cause problems either way, but incremental allocate->verify->delayed_release paradigms (needed by some kinds of things) will cause this type of non-releasing VM to just naturally grow its usage over time for no reason.

Memory is used for a lot more things than your precious VM, and the OS is there and highly tuned to optimize its usage and behavior- so make use of it :)

-SS

--
NUNQUAM NON PARATUS
V: 408.718.6290

Scott Solmonson

unread,
Jan 20, 2010, 2:02:07 AM1/20/10
to Ian Lance Taylor, Fode, golang-nuts
I think my key problem is "never returned to the OS"- it's never a good idea.

It of course makes sense to maintain a bit of app-internal memory overhead to accommodate the thrashing needs of the app, but even just a simple last_access->timeout->free is all that's needed to let the OS do its job while still preserving that functionality-

--
NUNQUAM NON PARATUS
V: 408.718.6290

befelemepeseveze

unread,
Jan 20, 2010, 10:13:09 AM1/20/10
to golang-nuts
On 20 led, 08:02, Scott Solmonson <sco...@scosol.org> wrote:
> I think my key problem is "never returned to the OS"- it's never a good idea.

What problem do you see in "never returned to the OS"? If a process
doesn't use some of it's allocated virtual memory space (and possibly
marks it as not needing secondary storage backing) then this costs
virtually zero real memory as soon as other processes demands
allocations of more than available real memory. And if they don't,
then (also) nothing is lost.

-bflm

Fode

unread,
Jan 20, 2010, 10:16:12 AM1/20/10
to befelemepeseveze, golang-nuts
I don't think there is a problem with that either i think the problem lies in not cleaning up the stack, ever. This is an especially big problem when calling recursive functions and you have a large call stack.

Jon Harrop

unread,
Jan 20, 2010, 11:56:46 AM1/20/10
to golan...@googlegroups.com

But is it still scanned by the conservative GC?

--
Dr Jon Harrop, Flying Frog Consultancy Ltd.
http://www.ffconsultancy.com/?e

Fode

unread,
Jan 20, 2010, 10:45:48 AM1/20/10
to Jon Harrop, golan...@googlegroups.com
I am not sure what you are asking, but from the looks of the bug report that i posted a link to i would say that while it may be scanned it is not really collected

Jon Harrop

unread,
Jan 20, 2010, 12:22:43 PM1/20/10
to Fode, golan...@googlegroups.com
On Wednesday 20 January 2010 15:45:48 Fode wrote:
> I am not sure what you are asking, but from the looks of the bug report
> that i posted a link to i would say that while it may be scanned it is not
> really collected

My point is that the OS cannot swap out the unused space if it is being
scanned by the GC. The GC will force it to remain resident.

Ian Lance Taylor

unread,
Jan 20, 2010, 11:18:27 AM1/20/10
to Jon Harrop, Fode, golan...@googlegroups.com
Jon Harrop <j...@ffconsultancy.com> writes:

> On Wednesday 20 January 2010 15:45:48 Fode wrote:
>> I am not sure what you are asking, but from the looks of the bug report
>> that i posted a link to i would say that while it may be scanned it is not
>> really collected
>
> My point is that the OS cannot swap out the unused space if it is being
> scanned by the GC. The GC will force it to remain resident.

The GC is being rewritten, so the current GC isn't too meaningful. In
any case, the current GC only scans allocated blocks. Of course,
there may be fragmentation.

Ian

Fode

unread,
Jan 20, 2010, 11:22:53 AM1/20/10
to Ian Lance Taylor, Jon Harrop, golan...@googlegroups.com
is that a temporary fix we can work on to make it so that it scans the stack when it starts to get to big? or am i misunderstanding how it works? 

Ian Lance Taylor

unread,
Jan 20, 2010, 11:42:26 AM1/20/10
to Fode, golan...@googlegroups.com
Fode <eric...@gmail.com> writes:

> is that a temporary fix we can work on to make it so that it scans the stack
> when it starts to get to big? or am i misunderstanding how it works?

Sorry, I think I lost some context. I'm not sure quite what the
problem is.

The garbage collection code is mainly in src/pkg/runtime/mgc0.c. The
data structures are documented in src/pkg/runtime/malloc.h. Please
feel free to take a look, with the understanding that much of it is
going to change.

Ian

Russ Cox

unread,
Jan 20, 2010, 11:51:13 AM1/20/10
to Jon Harrop, golan...@googlegroups.com
> But is it still scanned by the conservative GC?

No, the Go runtime's garbage collector does not scan
freed memory. In fact I am not aware of any garbage
collection algorithm that does. (What would it hope to find?)

Russ

Russ Cox

unread,
Jan 20, 2010, 12:02:58 PM1/20/10
to Fode, golan...@googlegroups.com
On Wed, Jan 20, 2010 at 07:45, Fode <eric...@gmail.com> wrote:
> I am not sure what you are asking, but from the looks of the bug report that
> i posted a link to i would say that while it may be scanned it is not really
> collected

You seem to be talking about the bug that keeps blocks
from being freed, because something on the current stack
looks like a pointer into the block, either because of old
temporary variables or accidents (integers that look like pointers).
When we do replace the garbage collector, we plan to take a
closer look at which it is. I suspect the former, and it may be
cheap and worthwhile to eagerly zero out temporary locations
used for pointers once they are dead. Or it may not, but we're
certainly going to look at that.

Everyone else seems to be talking about the fact that freed
memory is held in reserve for future allocation rather than
returned to the OS. As Ian said, it's common behavior and
generally not considered a bug. That said, the Go memory
allocator is based on Google's open source tcmalloc allocator,
which does have mechanisms for releasing memory back to the OS,
using timing and other considerations to make good decisions.
I expect that once we're happy with the garbage collector it may
make sense to adopt some of those mechanisms. Or it may not,
since they are tuned to manual allocation and freeing, but again
we're going to look at it.

Russ

Fode

unread,
Jan 20, 2010, 12:13:03 PM1/20/10
to r...@golang.org, golan...@googlegroups.com
Thank you for your time
Reply all
Reply to author
Forward
0 new messages