GP with DEAP to generate random numpy array transformations

58 views

Skip to first unread message

Riccardo Buscicchio

unread,

Nov 15, 2022, 8:08:37 PM11/15/22

to deap-users

Hi,
I am trying to use DEAP to generate random transformations for numpy arrays, and minimize a suitable fitness function.

As a MWE, let us imagine we have two vectors

x = {x[i], i=0,...,N-1}
and
y = {y[i], i=0,...,N-1}
with fixed N.
We want to randomly transform the vectors x and y only with joint, element-wise operations on x[i] and y[i]. Elementwise operations are quite efficient when vectorized using numpy array, so I am following:
https://github.com/DEAP/deap/blob/master/examples/gp/symbreg_numpy.py

I need GP individuals to output two arrays of the same dimension as the original ones

s = {s[i], i=0,...,N-1}
and
t = {t[i], i=0,...,N-1}
because that is the input required by my target fitness.
Would you suggest to use strongly typed primitives?

Here below an example without strong typing. I am quite new to DEAP so any help would be quite appreciated.

import random
import numpy as np

from deap import gp
from deap import base
from deap import tools
from deap import creator
from deap import algorithms

# Generate some data I want to learn how to decorrelate
Nsamples = 1000
x = np.random.normal(0.0,0.2, Nsamples)
y = np.random.normal(0.0,0.3, Nsamples)

# Correlating them by hand
def transform(x,y):
return (x+10*y)/11, (x-10*y)/11

# I want to "learn" through GP the trasformation above
xt,yt = transform(x,y)

# Let's define primitives, individuals and population, and an ephemeral constant
def numpyprotectedDiv(left, right):
with np.errstate(divide='ignore',invalid='ignore'):
x = np.divide(left, right)
if isinstance(x, np.ndarray):
x[np.isinf(x)] = 1
x[np.isnan(x)] = 1
elif np.isinf(x) or np.isnan(x):
x = 1
return x

# I want to combine the two arrays x and y with random transformations
# Therefore it seems natural to split the two sets into two arguments to be passed to MAIN

pset = gp.PrimitiveSet("MAIN", 2)
pset.addPrimitive(np.add, 2, name="vadd")
pset.addPrimitive(np.subtract, 2, name="vsub")
pset.addPrimitive(np.multiply, 2, name="vmul")
pset.addPrimitive(numpyprotectedDiv, 2)
pset.addPrimitive(np.negative, 1, name="vneg")
pset.addEphemeralConstant("rand20020", lambda: random.randint(-20,20))

pset.renameArguments(ARG0="x")
pset.renameArguments(ARG1="y")

creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
creator.create("Individual", gp.PrimitiveTree, fitness=creator.FitnessMin)

toolbox = base.Toolbox()
toolbox.register("expr", gp.genHalfAndHalf, pset=pset, min_=1, max_=2)
toolbox.register("individual", tools.initIterate, creator.Individual, toolbox.expr)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
toolbox.register("compile", gp.compile, pset=pset)

def myfitness(individual, pointx=None, pointy=None):
# Transform the tree expression in a callable function
func = toolbox.compile(expr=individual)
# Minimize the correlation coefficient (for now).
# From the correlation matrix I need to choose the out of diagonal term
return np.corrcoef(func(pointx, pointy))[1,0]

toolbox.register("evaluate", myfitness, pointx=xt, pointy=yt)
toolbox.register("select", tools.selTournament, tournsize=3)
toolbox.register("mate", gp.cxOnePoint)
toolbox.register("expr_mut", gp.genFull, min_=0, max_=2)
toolbox.register("mutate", gp.mutUniform, expr=toolbox.expr_mut, pset=pset)

stats_fit = tools.Statistics(lambda ind: ind.fitness.values)
stats_size = tools.Statistics(len)
mstats = tools.MultiStatistics(fitness=stats_fit, size=stats_size)
mstats.register("avg", np.mean)
mstats.register("std", np.std)
mstats.register("min", np.min)
mstats.register("max", np.max)

pop = toolbox.population(n=300)
hof = tools.HallOfFame(1)
pop, log = algorithms.eaSimple(pop, toolbox, 0.5, 0.1, 40, stats=mstats,halloffame=hof, verbose=True)

Although there must be something wrong with the individual generated. I'm getting:

"toolbox.register("evaluate", myfitness, pointx=xt, pointy=yt) IndexError: invalid index to scalar variable."