Hi,
I modified onemax_short.py and used multiprocessing to run evolution faster. It worked, but I realized that each process unnecessarily reads input data (actually 4 dataframes stored in excels) from disk. My code looks like this:
no_attr: int
trn_sys, tst_sys, trn_ftr, tst_ftr = read_trn_tst()
no_sys, no_ftr = len(trn_sys.columns), len(trn_ftr.columns)
no_attr = no_sys + no_ftr
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", array.array, typecode='b', fitness=creator.FitnessMax)
toolbox = base.Toolbox()
toolbox.register("attr_bool", random.randint, 0, 1)
toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.attr_bool, no_attr)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
def eval_ftn(individual):
sys_ind_bin = individual[:no_sys]
ftr_ind_bin = individual[no_sys:]
pi = PredictIndividual(sys_ind_bin, ftr_ind_bin, trn_sys, tst_sys, trn_ftr, tst_ftr)
ftn_val = pi.get_mdl_ftn()
return ftn_val,
toolbox.register("evaluate", eval_ftn)
toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("mutate", tools.mutFlipBit, indpb=0.05)
toolbox.register("select", tools.selTournament, tournsize=3)
def main():
manager = multiprocessing.Manager()
pool = multiprocessing.Pool()
toolbox.register("map", pool.map)
random.seed(64)
pop = toolbox.population(n=300) # Initial population with 300 individuals
hof = tools.HallOfFame(no_hof)
stats = tools.Statistics(lambda ind: ind.fitness.values)
stats.register("max", numpy.max)
pop, log = algorithms.eaSimple(pop, toolbox, cxpb=0.5, mutpb=0.2, ngen=max_itr,
stats=stats, halloffame=hof, verbose=True)
if __name__ == "__main__":
main()
# end of code
I couldn’t move data input line to main function, because no_attr depends on data and is determined before calling main. How can I resolve this issue?
Thanks,
Halil Ibrahim
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
# PUT CODE RIGHT HERE IN GLOBAL SCOPE
def eval_ftn(individual):
...