Custom layer help: accessing values of trainable variables?

1,085 views
Skip to first unread message

mcsk...@gmail.com

unread,
Apr 4, 2017, 9:32:17 PM4/4/17
to Keras-users

I'm trying to implement a custom layer which has only two trainable scalar variables.  I having trouble actually accessing the values of these variables, both for debugging and for passing to numpy.interp.  Can anyone offer a suggestion?  

Code:


from keras import backend as K
from keras.engine.topology import Layer
from keras.models import Sequential
import numpy as np


class EchoLayer(Layer):
   
# This (differentiably) tries to pick out one element from an feature vector and adds
   
# (a multiple of) it to the 0'th feature
   
# Designed to take an input of shape (batch_size, 1, window_length) ...where window_length is the
   
#    number of features.... and output (batch_size, 1)
   
#
   
# The question we're investigating is, for modeling an echo, whether this approach is faster/more
   
# efficient than training an LSTM with an entire matrix of weights that all end up needing to be
   
# zero except for one element.
   
def __init__(self, output_dim, **kwargs):
       
assert (1 == output_dim)
       
self.output_dim = output_dim
       
super(EchoLayer, self).__init__(**kwargs)


   
def build(self, input_shape):  
       
# we want to define two trainable single-value (scalar) variables: index and ratio
       
#      we don't want these to be an entire matrix of weights
       
#self.index = K.variable(np.zeros(1).astype(np.float32),trainable=True)  # Didn't work; variables aren't "trainable"?
       
#self.ratio = K.variable(np.ones(1).astype(np.float32),trainable=True)
       
self.index = self.add_weight(shape=(input_shape[1], self.output_dim), initializer='uniform', trainable=True)
       
self.ratio = self.add_weight(shape=(input_shape[1], self.output_dim), initializer='uniform', trainable=True)
       
super(EchoLayer, self).build(input_shape)  


   
def call(self, x):
       
# for each member of batch...
        batch_output
= np.zeros([x.shape[0],1])
       
for batch_i in range(x.shape[0]):

           
# How to get the value of the variables (self.index and self.ratio)?   K.eval...()?
            ival
= self.index[batch_i,0]  
            rval
= self.ratio[batch_i,0]

            xp
= range(x.shape[-1])
            fp
= x[batch_i, 0, :]  

           
print("  call: x.shape = ",x.shape)
           
print("        ival = ",ival)
           
print("        rval = ",rval)
           
print("        xp = ",xp)
           
print("        fp = ",fp)
            batch_output
[batch_i,0] =  x[batch_i,0,0] + self.ratio * np.interp( ival, xp, fp)
       
return batch_output

   
def compute_output_shape(self, input_shape):
       
return (input_shape[0], self.output_dim)


def EchoNet(batch_size=1000, tsteps=1, window_length=5000):
    model
= Sequential()
    model
.add(EchoLayer(1, batch_input_shape=(batch_size, tsteps, window_length)))
    model
.compile(loss='mae', optimizer='nadam')
   
return model


model
= EchoNet()



Log of run:
$ python echolayer_min.py 
Using TensorFlow backend.
  call: x.shape =  (1000, 1, 5000)
        ival =  Tensor("echo_layer_1/strided_slice:0", shape=(), dtype=float32)
        rval =  Tensor("echo_layer_1/strided_slice_1:0", shape=(), dtype=float32)
        xp =  range(0, 5000)
        fp =  Tensor("echo_layer_1/strided_slice_2:0", shape=(5000,), dtype=float32)
Traceback (most recent call last):
  File "echolayer_min.py", line 58, in <module>
    model = EchoNet()
  File "echolayer_min.py", line 54, in EchoNet
    model.add(EchoLayer(1, batch_input_shape=(batch_size, tsteps, window_length)))
  File "/opt/anaconda/envs/py35/lib/python3.5/site-packages/keras/models.py", line 422, in add
    layer(x)
  File "/opt/anaconda/envs/py35/lib/python3.5/site-packages/keras/engine/topology.py", line 554, in __call__
    output = self.call(inputs, **kwargs)
  File "echolayer_min.py", line 45, in call
    batch_output[batch_i,0] =  x[batch_i,0,0] + self.ratio * np.interp( ival, xp, fp)
  File "/opt/anaconda/envs/py35/lib/python3.5/site-packages/numpy/lib/function_base.py", line 1881, in interp
    return interp_func(x, xp, fp, left, right)
ValueError: object of too small depth for desired array


My experience indicates that the "ValueError: object of too small depth for desired array" is NumPy's way of saying, "Don't give me these Tensor objects, give me real numbers" but I don't know how to do that.   Any tips?  

Thanks.

mcsk...@gmail.com

unread,
Apr 5, 2017, 2:12:16 AM4/5/17
to Keras-users, mcsk...@gmail.com
Managed to get it most of the way there, although some interpolation still needs to occur.  It now succeeds through the initialization but fails after I go on and run a model.fit().


Changed the Layer definition to...

class EchoLayer(Layer):
   
# This (differentiably) tries to pick out one element from an feature vector and adds
   
# (a multiple of) it to the 0'th feature

   
# Designed to take an input of shape (batch_size, 1, window_length) and output (batch_size, 1)
   
#
   
# The question we're investigating is, for model an echo, whether this approach is faster/more
   
# efficient than training an LSTM with an entire matrix of weights to all be zero except for one element.

   
def __init__(self, output_dim, **kwargs):
       
assert (1 == output_dim)
       
self.output_dim = output_dim
       
super(EchoLayer, self).__init__(**kwargs)


   
def build(self, input_shape):  
       
# we want to define two trainable single-value (scalar) variables: index and ratio
       
#      we don't want these to be an entire matrix of weights
       
#self.index = K.variable(np.zeros(1).astype(np.float32),trainable=True)  # Didn't work; variables aren't "trainable"?
       
#self.ratio = K.variable(np.ones(1).astype(np.float32),trainable=True)

       
self.index = self.add_weight(shape=(input_shape[0], self.output_dim), initializer='zeros', trainable=True)
       
self.ratio = self.add_weight(shape=(input_shape[0], self.output_dim), initializer='uniform', trainable=True)

       
super(EchoLayer, self).build(input_shape)  


   
def call(self, x):

        xshape
= K.int_shape(x)
        ival
= K.batch_get_value(self.index)
        rval
= K.batch_get_value(self.ratio)
        ilo
= np.floor(ival).astype(np.int32)
        ihi
= np.ceil(ival).astype(np.int32)


        picker_arr
= np.zeros([xshape[0], 1])
       
# really need to add some kind of interpolation here...
        picker_arr
[ilo] = rval[ilo]
        picker_arr
[ihi] = rval[ihi]  
        picker
= K.variable(picker_arr)
       
       
return K.dot(picker, x)



   
def compute_output_shape(self, input_shape):
       
return (input_shape[0], self.output_dim)



Traceback of failure...
...
 model.fit(X, Y, batch_size=batch_size, verbose=1, epochs=1, shuffle=False, callbacks=callbacks, validation_data=validation_data)
  File "/opt/anaconda/envs/py35/lib/python3.5/site-packages/keras/models.py", line 845, in fit
    initial_epoch=initial_epoch)
  File "/opt/anaconda/envs/py35/lib/python3.5/site-packages/keras/engine/training.py", line 1457, in fit
    self._make_train_function()
  File "/opt/anaconda/envs/py35/lib/python3.5/site-packages/keras/engine/training.py", line 1001, in _make_train_function
    self.total_loss)
  File "/opt/anaconda/envs/py35/lib/python3.5/site-packages/keras/optimizers.py", line 547, in get_updates
    g_prime = g / (1. - m_schedule_new)
  File "/opt/anaconda/envs/py35/lib/python3.5/site-packages/tensorflow/python/ops/math_ops.py", line 830, in r_binary_op_wrapper
    x = ops.convert_to_tensor(x, dtype=y.dtype.base_dtype, name="x")
  File "/opt/anaconda/envs/py35/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 639, in convert_to_tensor
    as_ref=False)
  File "/opt/anaconda/envs/py35/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 704, in internal_convert_to_tensor
    ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
  File "/opt/anaconda/envs/py35/lib/python3.5/site-packages/tensorflow/python/framework/constant_op.py", line 113, in _constant_tensor_conversion_function
    return constant(v, dtype=dtype, name=name)
  File "/opt/anaconda/envs/py35/lib/python3.5/site-packages/tensorflow/python/framework/constant_op.py", line 102, in constant
    tensor_util.make_tensor_proto(value, dtype=dtype, shape=shape, verify_shape=verify_shape))
  File "/opt/anaconda/envs/py35/lib/python3.5/site-packages/tensorflow/python/framework/tensor_util.py", line 360, in make_tensor_proto
    raise ValueError("None values not supported.")
ValueError: None values not supported.


Not sure which value is "None"

mcsk...@gmail.com

unread,
Apr 5, 2017, 2:36:05 AM4/5/17
to Keras-users, mcsk...@gmail.com
It's the gradients which are None.  For some reason it's not computing gradients at all.    I didn't see anything in https://keras.io/layers/writing-your-own-keras-layers/ about writing gradients

Is there another place where they need to be specified?  I was under the impression that such things were automatic in Keras 2.  


Daπid

unread,
Apr 5, 2017, 4:32:54 AM4/5/17
to Scott Hawley, Keras-users

On 5 April 2017 at 08:36, <mcsk...@gmail.com> wrote:
Is there another place where they need to be specified?  I was under the impression that such things were automatic in Keras 2.  


They are, but you need to write everything in backend operations, eitherTheano or Tensorflow, so they can compute gradients. They don't know what things like np.floor and np.zeros do. In general, numpy can only be used to initialise values.

Since you are looking for a 1D interpolation, you can get away with writing explicitly a linear interpolator at the point of interest.

mcsk...@gmail.com

unread,
Apr 6, 2017, 12:01:09 AM4/6/17
to Keras-users, mcsk...@gmail.com
That makes sense.  So the variables I was using were not part of the graph.  I've rewritten it to use the backend operations, but it seems that K.batch_dot is still not outputting correctly.

I need something like a "K.range()" operation, but I don't see that one exists. Is there such a thing?  Currently testing for which backend...


class EchoLayer(Layer):
   
# This (differentiably) tries to pick out one element from an feature vector and adds
   
# (a multiple of) it to the 0'th feature
   
# Designed to take an input of shape (batch_size, 1, window_length) and output (batch_size, 1)
   
#
   
# The question we're investigating is, for model an echo, whether this approach is faster/more
   
# efficient than training an LSTM with an entire matrix of weights to all be zero except for one element.
   
def __init__(self, output_dim, **kwargs):
       
assert (1 == output_dim)
       
self.output_dim = output_dim
       
super(EchoLayer, self).__init__(**kwargs)

   
def build(self, input_shape):  
       
# we want to define two trainable single-value (scalar) variables: index and ratio
       
#      we don't want these to be an entire matrix of weights

       
print("    input_shape = ",input_shape)
       
self.delay = self.add_weight(shape=(input_shape[1], self.output_dim), initializer='zeros', trainable=True)
       
self.ratio = self.add_weight(shape=(input_shape[1], self.output_dim), initializer='uniform', trainable=True)

       
# Which one of these 'self.indices='' to use?  Really just want K.range()
       
#self.indices = K.variable(value=np.arange(0,input_shape[-1]))  # list of indices of feature vec
       
if ('tensorflow' == backend.backend()):
           
self.indices = tf.range(0,limit=input_shape[-1], name='indices', dtype=tf.float32)
       
else:
           
self.indices = theano.tensor.arange(input_shape[-1], dtype='float32')
       
print("    delay shape = ",K.int_shape(self.delay))
       
#print("    indices shape = ",K.int_shape(self.indices))   # "TypeError: Not a Keras tensor"

       
super(EchoLayer, self).build(input_shape)  

   
def call(self, x):

       
# "kernel" is a gaussian over indices, centered at delay
        kernel
= self.ratio * K.exp( K.pow((self.indices-self.delay)/.6,2) ) 
       
print("    kernel shape = ",K.int_shape(kernel)) 
       
return K.batch_dot(x, kernel, axes=[2,1])


   
def compute_output_shape(self, input_shape):
       
return (input_shape[0], self.output_dim)

The K.batch_dot operation is giving me an error, although I seem to be specifying the axes according to the K.batch_dot documentation. 
In Tensorflow the output says...

    input_shape =  (10, 1, 5000)
    delay shape =  (1, 1)
    kernel shape =  (1, 5000)

Traceback (most recent call last):
  File "/opt/anaconda/envs/py35/lib/python3.5/site-packages/tensorflow/python/framework/common_shapes.py", line 671, in _call_cpp_shape_fn_impl
    input_tensors_as_shapes, status)
  File "/opt/anaconda/envs/py35/lib/python3.5/contextlib.py", line 66, in __exit__
    next(self.gen)
  File "/opt/anaconda/envs/py35/lib/python3.5/site-packages/tensorflow/python/framework/errors_impl.py", line 466, in raise_exception_on_not_ok_status
    pywrap_tensorflow.TF_GetCode(status))
tensorflow.python.framework.errors_impl.InvalidArgumentError: Shape must be rank 2 but is rank 3 for 'echo_layer_1/MatMul' (op: 'MatMul') with input shapes: [10,1,5000], [1,5000].




Daπid

unread,
Apr 6, 2017, 5:25:50 AM4/6/17
to Scott Hawley, Keras-users

On 6 April 2017 at 06:01, <mcsk...@gmail.com> wrote:
        kernel = self.ratio * K.exp( K.pow((self.indices-self.delay)/.6,2) ) 

I think you forgot a - before K.pow


The problem is that batch_dot iterates over pairs of elements in a batch, so the first dimension should match. A quick and dirty solution is to repeat the kernel batch_size times:

K.batch_dot(x, K.repeat_elements(K.expand_dims(kernel, 0), batch_size, 0), axes=[2, 2])

But you can also express it as a plain dot:

K.dot(x, K.expand_dims(kernel, -1))

(but you'll need to squeeze the output)

mcsk...@gmail.com

unread,
Apr 6, 2017, 11:23:14 AM4/6/17
to Keras-users, mcsk...@gmail.com
Thanks David!   You have a keen eye!  Yes, I noticed the need for both those things (the minus sign, and the repetition), as well as an apparent error in K.batch_dot() with the Tensorflow backend when the array sizes are different, so I resized my arrays by inserting dimensions of 1.

Here's where I'm at now, and it runs, and it trains (slowly)!.     
class EchoLayer(Layer):
   
# This (differentiably) tries to pick out one element from an feature vector and adds
   
# (a multiple of) it to the 0'th feature
   
# Designed to take an input of shape (batch_size, 1, window_length) and output (batch_size, 1)
   
#
   
# The question we're investigating is, for model an echo, whether this approach is faster/more
   
# efficient than training an LSTM with an entire matrix of weights to all be zero except for one element.
   
def __init__(self, output_dim, **kwargs):

       
self.output_dim = output_dim
       
assert (1 == output_dim)

       
super(EchoLayer, self).__init__(**kwargs)


   
def build(self, input_shape):  
       
# we want to define two trainable single-value (scalar) variables: index and ratio
       
#      we don't want these to be an entire matrix of weights
       
print("    input_shape = ",input_shape)

       
self.delay = self.add_weight(shape=(input_shape[1], self.output_dim), initializer='ones', trainable=True)
       
self.ratio = self.add_weight(shape=(input_shape[1], self.output_dim), initializer='ones', trainable=True)


       
# indices is just a list of indices of feature vec; constant; does not need to be trainable
        np_indices
= np.arange(0,input_shape[-1])
        np2
= np.tile(np_indices,(input_shape[0],1) )      # apprently we need batch_size copies of all indices
       
self.indices = K.variable(value=np2)  


       
# this is the array simply to hold the index for the value at the "current time"; constant, non-trainable
        np_idx2
= np.zeros((input_shape[0],input_shape[2],1))
        np_idx2
[:,0,0] = 1.0
       
self.curr_val = K.variable(value=np_idx2)

       
print("    delay shape = ",K.int_shape(self.delay))

       
print("    indices shape = ",K.int_shape(self.indices))   # "TypeError: Not a Keras tensor"

       
print("    curr_val.shape = ",K.int_shape(self.curr_val))

       
super(EchoLayer, self).build(input_shape)  


   
def call(self, x):
       
# "kernel" is a gaussian over indices, centered at delay

       
print(" self.delay, self.ratio = ",self.delay,self.ratio)
        kernel
= self.ratio * K.exp( -1*K.pow((self.indices-self.delay)/.6, 2) )
        kdim
= K.int_shape(kernel)
        kernel
= K.reshape( kernel,(kdim[0],kdim[1],1))        # because of bug in K.batch_dot for Tensorflow backend

       
print("    kernel shape = ",K.int_shape(kernel))

       
print("    curr_val.shape = ",K.int_shape(self.curr_val))
        kernel
= kernel + self.curr_val
       
#kernel[:,0,0] = 1.0

       
#output = K.batch_dot(x, kernel, axes=[2,2])
        output
= K.batch_dot(x, kernel)

       
#print("    output shape = ",K.int_shape(output))
       
return output


   
def compute_output_shape(self, input_shape):

       
return (input_shape[0], self.output_dim)



One issue I'm still having is that it won't load from a saved model.  Running load_model gives an error...

            model = load_model(checkpoint_filepath, custom_objects={"EchoLayer": EchoLayer})

Gives...
TypeError: __init__() missing 1 required positional argument: 'output_dim'


This model was saved using the 
    checkpointer = ModelCheckpoint(filepath=checkpoint_filepath, verbose=1, save_best_only=False)
method.

I didn't see any instructions in the "Writing Your Own Custom Layers" docs (https://keras.io/layers/writing-your-own-keras-layers/) about, e.g. needing to implement get_config(), so...?
Reply all
Reply to author
Forward
0 new messages