Garbage collection in multiprocessing

263 views

Skip to first unread message

Andrey Igoshev

unread,

Apr 22, 2016, 2:14:23 PM4/22/16

to pyevolve

Dear Developers,

I start using pyevolve for my project and I got a problem with multiprocessing. I call an external c++ library from my pyevolve script and after 65±2 generation I got message that it is

impossible to allocate more memory:

File "control.py", line 119, in <module>
ga.evolve(freq_stats=1)
File "build/bdist.linux-x86_64/egg/pyevolve/GSimpleGA.py", line 824, in evolve
File "build/bdist.linux-x86_64/egg/pyevolve/GSimpleGA.py", line 668, in step
File "build/bdist.linux-x86_64/egg/pyevolve/GPopulation.py", line 373, in evaluate
File "/usr/lib/python2.7/multiprocessing/pool.py", line 251, in map
return self.map_async(func, iterable, chunksize).get()
File "/usr/lib/python2.7/multiprocessing/pool.py", line 558, in get
raise self._value
OSError: [Errno 12] Cannot allocate memory

As for actual memory it is present.

total used free shared buffers cached
Mem: 125G 122G 3.0G 6.5M 12M 115G
Low: 125G 122G 3.0G
High: 0B 0B 0B
-/+ buffers/cache: 7.7G 118G
Swap: 127G 4K 127G

I looked both in my c++ library and in pyevolve code, and I noticed that the instruction for garbage collection is not followed:

This is an example from multiprocessing https://docs.python.org/2/library/multiprocessing.html#multiprocessing-programming

 #
    # Check garbage collection
    #

    print 'Testing garbage collection:'

    pool = multiprocessing.Pool(2)
    DELTA = 0.1
    processes = pool._pool
    ignore = pool.apply(pow3, [2])
    results = [pool.apply_async(time.sleep, [DELTA]) for i in range(100)]

    results = pool = None

    time.sleep(DELTA * 2)

    for worker in processes:
        assert not worker.is_alive()

    print '\tgarbage collection succeeded\n'

While in pyevolve code I can see (368 line of GPpopulation.py):

if self.multiProcessing[0] and MULTI_PROCESSING:
	logging.debug("Evaluating the population using the multiprocessing method")
	proc_pool = Pool(processes=self.multiProcessing[2])

	# Multiprocessing full_copy parameter
	if self.multiProcessing[1]:
	results = proc_pool.map(multiprocessing_eval_full, self.internalPop)
	proc_pool.close()
	proc_pool.join()
	for i in xrange(len(self.internalPop)):
	self.internalPop[i] = results[i]
	else:
	results = proc_pool.map(multiprocessing_eval, self.internalPop)
	proc_pool.close()
	proc_pool.join()
	for individual, score in zip(self.internalPop, results):
	individual.score = score
	else:
	for ind in self.internalPop:
	ind.evaluate(**args)

So, the references to the Pool are not freed.

Is it really the problem?

This is my script:

from pyevolve import G1DList, Mutators, DBAdapters

from pyevolve import GSimpleGA, Initializators, Selectors

import time

import ctypes

from ctypes import cdll, c_double, c_int, cast, c_char

import numpy as np

from subprocess import call

import os

def eval_func(chromosome):

n_vel_model = c_int(10);

vh_vel_model = c_double(100.0);

sigma_vel_model = (c_double * 10)()

# iterate over the chromosome

i = 0

# g = open ('intermediate_steps.txt', 'a')

for value in chromosome:

sigma_vel_model[i] = c_double(value);

# g.write(str(value) + '\t')

print value

i = i + 1

cast(sigma_vel_model, ctypes.POINTER(c_double))

score = lib.wrap_loglikelihood (n_vel_model, vh_vel_model, sigma_vel_model)

# g.write (str(score) + '\n')

g = open ('intermediate_steps.txt', 'a')

for value in chromosome:

#sigma_vel_model[i] = c_double(value);

g.write(str(value) + '\t')

g.write (str(score) + '\n')

g.close()

# call(["free", "-lh"])

# call(["pmap", "-x <current pid>"])

# call(["ps", "aux"])

#print os.getpid()

# line=str(os.getpid())

# call(["free", "-lh"])

# call(["pmap", line])

# call(["ps", "aux"])

print 'score is ', score + 4000.0

score = score + 4000.0

return score

if __name__ == '__main__':

lib = cdll.LoadLibrary('./likelihood.so') ## use our library written in C

lib.initilise_catalogue.restype = ctypes.c_int ## Declare a type of return argument (important)

lib.wrap_loglikelihood.restype = ctypes.c_double

name_of_file = (c_char * 20) ()

strl = 'master_rev.dat'

#strl='test_cat.txt'

#strl='test_cat_250.txt'

#strl='test_cat_better_meas.txt'

#strl='test_cat_bm.txt'

#strl='test_cat25bm.txt'

#strl='tk_50_ne.txt'

#strl='tk_300_ne.txt'

#strl='tk_300_ne2.txt'

for i in range (0, len(strl)):

name_of_file[i] = strl[i]

cast(name_of_file, ctypes.POINTER(c_char))

lib.initilise_catalogue(name_of_file)

g = open ('intermediate_steps.txt', 'w')

g.close()

#t = open ('memory_log.txt', 'w')

#t.close()

genome = G1DList.G1DList(10)

genome.evaluator.set(eval_func)

genome.setParams(rangemin=0.0, rangemax=1.0)

genome.initializator.set(Initializators.G1DListInitializatorReal)

genome.mutator.set(Mutators.G1DListMutatorRealGaussian)

ga = GSimpleGA.GSimpleGA(genome)

ga.selector.set(Selectors.GRankSelector)

ga.setMultiProcessing(flag=True, full_copy=False, max_processes=16)

csv_adapter = DBAdapters.DBFileCSV(identify="run1", filename="stats.csv", frequency=1)

ga.setDBAdapter(csv_adapter)

ga.setGenerations(100)

ga.setPopulationSize(16)

ga.evolve(freq_stats=1)

print ga.bestIndividual()

print '#######################'

res_file = open('res_genetic_optimisation.txt', 'w')

res_file.write(ga.bestIndividual())

res_file.close()

g.close()

Simone Marini

unread,

May 9, 2016, 12:03:21 PM5/9/16

to pyevolve

Hi,

I had a similar problem.
I needed to modify the GPopulation.py in the library source code.

Try to add the following two commands:
proc_pool.close()
proc_pool.join()

just before the last line of the function evaluate(self, **args)

Then you need to rebuild and re-install the libraries.

Hope it helps.
Simone

Reply all

Reply to author

Forward

0 new messages