Custom layer help: accessing values of trainable variables?

mcsk...@gmail.com

unread,

Apr 4, 2017, 9:32:17 PM4/4/17

to Keras-users

I'm trying to implement a custom layer which has only two trainable scalar variables. I having trouble actually accessing the values of these variables, both for debugging and for passing to numpy.interp. Can anyone offer a suggestion?

Code:



from keras import backend as K
from keras.engine.topology import Layer
from keras.models import Sequential
import numpy as np


class EchoLayer(Layer):
    # This (differentiably) tries to pick out one element from an feature vector and adds
    # (a multiple of) it to the 0'th feature
    # Designed to take an input of shape (batch_size, 1, window_length) ...where window_length is the
    #    number of features.... and output (batch_size, 1)
    #
    # The question we're investigating is, for modeling an echo, whether this approach is faster/more 
    # efficient than training an LSTM with an entire matrix of weights that all end up needing to be
    # zero except for one element.
    def __init__(self, output_dim, **kwargs):
        assert (1 == output_dim)
        self.output_dim = output_dim
        super(EchoLayer, self).__init__(**kwargs)


    def build(self, input_shape):   
        # we want to define two trainable single-value (scalar) variables: index and ratio
        #      we don't want these to be an entire matrix of weights
        #self.index = K.variable(np.zeros(1).astype(np.float32),trainable=True)  # Didn't work; variables aren't "trainable"?
        #self.ratio = K.variable(np.ones(1).astype(np.float32),trainable=True) 
        self.index = self.add_weight(shape=(input_shape[1], self.output_dim), initializer='uniform', trainable=True)
        self.ratio = self.add_weight(shape=(input_shape[1], self.output_dim), initializer='uniform', trainable=True)
        super(EchoLayer, self).build(input_shape)  


    def call(self, x):
        # for each member of batch...
        batch_output = np.zeros([x.shape[0],1])
        for batch_i in range(x.shape[0]):

            # How to get the value of the variables (self.index and self.ratio)?   K.eval...()?
            ival = self.index[batch_i,0]  
            rval = self.ratio[batch_i,0] 

            xp = range(x.shape[-1])
            fp = x[batch_i, 0, :]  

            print("  call: x.shape = ",x.shape)
            print("        ival = ",ival)
            print("        rval = ",rval)
            print("        xp = ",xp)
            print("        fp = ",fp)
            batch_output[batch_i,0] =  x[batch_i,0,0] + self.ratio * np.interp( ival, xp, fp)
        return batch_output

    def compute_output_shape(self, input_shape):
        return (input_shape[0], self.output_dim)


def EchoNet(batch_size=1000, tsteps=1, window_length=5000):
    model = Sequential()
    model.add(EchoLayer(1, batch_input_shape=(batch_size, tsteps, window_length)))
    model.compile(loss='mae', optimizer='nadam')
    return model


model = EchoNet()

Log of run:

$ python echolayer_min.py

Using TensorFlow backend.

call: x.shape = (1000, 1, 5000)

ival = Tensor("echo_layer_1/strided_slice:0", shape=(), dtype=float32)

rval = Tensor("echo_layer_1/strided_slice_1:0", shape=(), dtype=float32)

xp = range(0, 5000)

fp = Tensor("echo_layer_1/strided_slice_2:0", shape=(5000,), dtype=float32)

Traceback (most recent call last):

File "echolayer_min.py", line 58, in <module>

model = EchoNet()

File "echolayer_min.py", line 54, in EchoNet

model.add(EchoLayer(1, batch_input_shape=(batch_size, tsteps, window_length)))

File "/opt/anaconda/envs/py35/lib/python3.5/site-packages/keras/models.py", line 422, in add

layer(x)

File "/opt/anaconda/envs/py35/lib/python3.5/site-packages/keras/engine/topology.py", line 554, in __call__

output = self.call(inputs, **kwargs)

File "echolayer_min.py", line 45, in call

batch_output[batch_i,0] = x[batch_i,0,0] + self.ratio * np.interp( ival, xp, fp)

File "/opt/anaconda/envs/py35/lib/python3.5/site-packages/numpy/lib/function_base.py", line 1881, in interp

return interp_func(x, xp, fp, left, right)

ValueError: object of too small depth for desired array

My experience indicates that the "ValueError: object of too small depth for desired array" is NumPy's way of saying, "Don't give me these Tensor objects, give me real numbers" but I don't know how to do that. Any tips?

Thanks.

mcsk...@gmail.com

unread,

Apr 5, 2017, 2:12:16 AM4/5/17

to Keras-users, mcsk...@gmail.com

Managed to get it most of the way there, although some interpolation still needs to occur. It now succeeds through the initialization but fails after I go on and run a model.fit().

Changed the Layer definition to...

class EchoLayer(Layer):
    # This (differentiably) tries to pick out one element from an feature vector and adds
    # (a multiple of) it to the 0'th feature


    # Designed to take an input of shape (batch_size, 1, window_length) and output (batch_size, 1)
    #
    # The question we're investigating is, for model an echo, whether this approach is faster/more 
    # efficient than training an LSTM with an entire matrix of weights to all be zero except for one element.


    def __init__(self, output_dim, **kwargs):
        assert (1 == output_dim)
        self.output_dim = output_dim
        super(EchoLayer, self).__init__(**kwargs)


    def build(self, input_shape):   
        # we want to define two trainable single-value (scalar) variables: index and ratio
        #      we don't want these to be an entire matrix of weights
        #self.index = K.variable(np.zeros(1).astype(np.float32),trainable=True)  # Didn't work; variables aren't "trainable"?
        #self.ratio = K.variable(np.ones(1).astype(np.float32),trainable=True)


        self.index = self.add_weight(shape=(input_shape[0], self.output_dim), initializer='zeros', trainable=True)
        self.ratio = self.add_weight(shape=(input_shape[0], self.output_dim), initializer='uniform', trainable=True)


        super(EchoLayer, self).build(input_shape)  


    def call(self, x):


        xshape = K.int_shape(x)
        ival = K.batch_get_value(self.index)
        rval = K.batch_get_value(self.ratio)
        ilo = np.floor(ival).astype(np.int32)
        ihi = np.ceil(ival).astype(np.int32)


        picker_arr = np.zeros([xshape[0], 1])
        # really need to add some kind of interpolation here...
        picker_arr[ilo] = rval[ilo] 
        picker_arr[ihi] = rval[ihi]   
        picker = K.variable(picker_arr)
        
        return K.dot(picker, x)




    def compute_output_shape(self, input_shape):
        return (input_shape[0], self.output_dim)

Traceback of failure...

...

model.fit(X, Y, batch_size=batch_size, verbose=1, epochs=1, shuffle=False, callbacks=callbacks, validation_data=validation_data)

File "/opt/anaconda/envs/py35/lib/python3.5/site-packages/keras/models.py", line 845, in fit

initial_epoch=initial_epoch)

File "/opt/anaconda/envs/py35/lib/python3.5/site-packages/keras/engine/training.py", line 1457, in fit

self._make_train_function()

File "/opt/anaconda/envs/py35/lib/python3.5/site-packages/keras/engine/training.py", line 1001, in _make_train_function

self.total_loss)

File "/opt/anaconda/envs/py35/lib/python3.5/site-packages/keras/optimizers.py", line 547, in get_updates

g_prime = g / (1. - m_schedule_new)

File "/opt/anaconda/envs/py35/lib/python3.5/site-packages/tensorflow/python/ops/math_ops.py", line 830, in r_binary_op_wrapper

x = ops.convert_to_tensor(x, dtype=y.dtype.base_dtype, name="x")

File "/opt/anaconda/envs/py35/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 639, in convert_to_tensor

as_ref=False)

File "/opt/anaconda/envs/py35/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 704, in internal_convert_to_tensor

ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)

File "/opt/anaconda/envs/py35/lib/python3.5/site-packages/tensorflow/python/framework/constant_op.py", line 113, in _constant_tensor_conversion_function

return constant(v, dtype=dtype, name=name)

File "/opt/anaconda/envs/py35/lib/python3.5/site-packages/tensorflow/python/framework/constant_op.py", line 102, in constant

tensor_util.make_tensor_proto(value, dtype=dtype, shape=shape, verify_shape=verify_shape))

File "/opt/anaconda/envs/py35/lib/python3.5/site-packages/tensorflow/python/framework/tensor_util.py", line 360, in make_tensor_proto

raise ValueError("None values not supported.")

ValueError: None values not supported.

Not sure which value is "None"

mcsk...@gmail.com

unread,

Apr 5, 2017, 2:36:05 AM4/5/17

to Keras-users, mcsk...@gmail.com

It's the gradients which are None. For some reason it's not computing gradients at all. I didn't see anything in https://keras.io/layers/writing-your-own-keras-layers/ about writing gradients

Is there another place where they need to be specified? I was under the impression that such things were automatic in Keras 2.

Daπid

unread,

Apr 5, 2017, 4:32:54 AM4/5/17

to Scott Hawley, Keras-users

On 5 April 2017 at 08:36, <mcsk...@gmail.com> wrote:

Is there another place where they need to be specified? I was under the impression that such things were automatic in Keras 2.

They are, but you need to write everything in backend operations, eitherTheano or Tensorflow, so they can compute gradients. They don't know what things like np.floor and np.zeros do. In general, numpy can only be used to initialise values.

Since you are looking for a 1D interpolation, you can get away with writing explicitly a linear interpolator at the point of interest.

mcsk...@gmail.com

unread,

Apr 6, 2017, 12:01:09 AM4/6/17

to Keras-users, mcsk...@gmail.com

That makes sense. So the variables I was using were not part of the graph. I've rewritten it to use the backend operations, but it seems that K.batch_dot is still not outputting correctly.

I need something like a "K.range()" operation, but I don't see that one exists. Is there such a thing? Currently testing for which backend...

class EchoLayer(Layer):
    # This (differentiably) tries to pick out one element from an feature vector and adds
    # (a multiple of) it to the 0'th feature
    # Designed to take an input of shape (batch_size, 1, window_length) and output (batch_size, 1)
    #
    # The question we're investigating is, for model an echo, whether this approach is faster/more 
    # efficient than training an LSTM with an entire matrix of weights to all be zero except for one element.
    def __init__(self, output_dim, **kwargs):
        assert (1 == output_dim)
        self.output_dim = output_dim
        super(EchoLayer, self).__init__(**kwargs)

    def build(self, input_shape):   
        # we want to define two trainable single-value (scalar) variables: index and ratio
        #      we don't want these to be an entire matrix of weights


        print("    input_shape = ",input_shape)
        self.delay = self.add_weight(shape=(input_shape[1], self.output_dim), initializer='zeros', trainable=True)
        self.ratio = self.add_weight(shape=(input_shape[1], self.output_dim), initializer='uniform', trainable=True)

        # Which one of these 'self.indices='' to use?  Really just want K.range()
        #self.indices = K.variable(value=np.arange(0,input_shape[-1]))  # list of indices of feature vec
        if ('tensorflow' == backend.backend()):
            self.indices = tf.range(0,limit=input_shape[-1], name='indices', dtype=tf.float32)
        else:
            self.indices = theano.tensor.arange(input_shape[-1], dtype='float32')
        print("    delay shape = ",K.int_shape(self.delay))
        #print("    indices shape = ",K.int_shape(self.indices))   # "TypeError: Not a Keras tensor"


        super(EchoLayer, self).build(input_shape)  

    def call(self, x):


        # "kernel" is a gaussian over indices, centered at delay
        kernel = self.ratio * K.exp( K.pow((self.indices-self.delay)/.6,2) )  
        print("    kernel shape = ",K.int_shape(kernel))  
        return K.batch_dot(x, kernel, axes=[2,1])



    def compute_output_shape(self, input_shape):
        return (input_shape[0], self.output_dim)

The K.batch_dot operation is giving me an error, although I seem to be specifying the axes according to the K.batch_dot documentation.
In Tensorflow the output says...

    input_shape = (10, 1, 5000)
    delay shape = (1, 1)
    kernel shape = (1, 5000)

Traceback (most recent call last):

File "/opt/anaconda/envs/py35/lib/python3.5/site-packages/tensorflow/python/framework/common_shapes.py", line 671, in _call_cpp_shape_fn_impl
    input_tensors_as_shapes, status)
File "/opt/anaconda/envs/py35/lib/python3.5/contextlib.py", line 66, in __exit__
    next(self.gen)
File "/opt/anaconda/envs/py35/lib/python3.5/site-packages/tensorflow/python/framework/errors_impl.py", line 466, in raise_exception_on_not_ok_status
    pywrap_tensorflow.TF_GetCode(status))
tensorflow.python.framework.errors_impl.InvalidArgumentError: Shape must be rank 2 but is rank 3 for 'echo_layer_1/MatMul' (op: 'MatMul') with input shapes: [10,1,5000], [1,5000].

Daπid

unread,

Apr 6, 2017, 5:25:50 AM4/6/17

to Scott Hawley, Keras-users

On 6 April 2017 at 06:01, <mcsk...@gmail.com> wrote:

kernel = self.ratio * K.exp( K.pow((self.indices-self.delay)/.6,2) )

I think you forgot a - before K.pow

The problem is that batch_dot iterates over pairs of elements in a batch, so the first dimension should match. A quick and dirty solution is to repeat the kernel batch_size times:

K.batch_dot(x, K.repeat_elements(K.expand_dims(kernel, 0), batch_size, 0), axes=[2, 2])

But you can also express it as a plain dot:

K.dot(x, K.expand_dims(kernel, -1))

(but you'll need to squeeze the output)

mcsk...@gmail.com

unread,

Apr 6, 2017, 11:23:14 AM4/6/17

to Keras-users, mcsk...@gmail.com

Thanks David! You have a keen eye! Yes, I noticed the need for both those things (the minus sign, and the repetition), as well as an apparent error in K.batch_dot() with the Tensorflow backend when the array sizes are different, so I resized my arrays by inserting dimensions of 1.

Here's where I'm at now, and it runs, and it trains (slowly)!.

class EchoLayer(Layer):
    # This (differentiably) tries to pick out one element from an feature vector and adds
    # (a multiple of) it to the 0'th feature
    # Designed to take an input of shape (batch_size, 1, window_length) and output (batch_size, 1)
    #
    # The question we're investigating is, for model an echo, whether this approach is faster/more 
    # efficient than training an LSTM with an entire matrix of weights to all be zero except for one element.
    def __init__(self, output_dim, **kwargs):


        self.output_dim = output_dim
        assert (1 == output_dim)


        super(EchoLayer, self).__init__(**kwargs)


    def build(self, input_shape):   
        # we want to define two trainable single-value (scalar) variables: index and ratio
        #      we don't want these to be an entire matrix of weights
        print("    input_shape = ",input_shape)


        self.delay = self.add_weight(shape=(input_shape[1], self.output_dim), initializer='ones', trainable=True)
        self.ratio = self.add_weight(shape=(input_shape[1], self.output_dim), initializer='ones', trainable=True)


        # indices is just a list of indices of feature vec; constant; does not need to be trainable
        np_indices = np.arange(0,input_shape[-1])
        np2 = np.tile(np_indices,(input_shape[0],1) )      # apprently we need batch_size copies of all indices
        self.indices = K.variable(value=np2)  


        # this is the array simply to hold the index for the value at the "current time"; constant, non-trainable
        np_idx2 = np.zeros((input_shape[0],input_shape[2],1))
        np_idx2[:,0,0] = 1.0
        self.curr_val = K.variable(value=np_idx2)


        print("    delay shape = ",K.int_shape(self.delay))


        print("    indices shape = ",K.int_shape(self.indices))   # "TypeError: Not a Keras tensor"


        print("    curr_val.shape = ",K.int_shape(self.curr_val))


        super(EchoLayer, self).build(input_shape)  


    def call(self, x):
        # "kernel" is a gaussian over indices, centered at delay


        print(" self.delay, self.ratio = ",self.delay,self.ratio)
        kernel = self.ratio * K.exp( -1*K.pow((self.indices-self.delay)/.6, 2) )
        kdim = K.int_shape(kernel)
        kernel = K.reshape( kernel,(kdim[0],kdim[1],1))        # because of bug in K.batch_dot for Tensorflow backend


        print("    kernel shape = ",K.int_shape(kernel))

 
        print("    curr_val.shape = ",K.int_shape(self.curr_val))
        kernel = kernel + self.curr_val
        #kernel[:,0,0] = 1.0

        #output = K.batch_dot(x, kernel, axes=[2,2])
        output = K.batch_dot(x, kernel)

        #print("    output shape = ",K.int_shape(output))
        return output


    def compute_output_shape(self, input_shape):


        return (input_shape[0], self.output_dim)

One issue I'm still having is that it won't load from a saved model. Running load_model gives an error...

model = load_model(checkpoint_filepath, custom_objects={"EchoLayer": EchoLayer})

Gives...

TypeError: __init__() missing 1 required positional argument: 'output_dim'

This model was saved using the

checkpointer = ModelCheckpoint(filepath=checkpoint_filepath, verbose=1, save_best_only=False)

method.

I didn't see any instructions in the "Writing Your Own Custom Layers" docs (https://keras.io/layers/writing-your-own-keras-layers/) about, e.g. needing to implement get_config(), so...?

Reply all

Reply to author

Forward