I've seen much discussion about this, but no simple answers. Dense does not support masking, so how to get around it?
If I don't mask, I am asking the model to learn non-useful input-output mappings.
My simple-minded approach tried an Embedding layer before the LSTM, but that gives the "Layer dense_1 does not support masking' error.
Without Embedding the following code runs.
I have 2-dim inputs and binary one-hot outputs with maximum sequence length of 9, and 5 of these sequences.
Many thanks for a pointer, maybe how to replace the Dense layer with a new Layer like "DenseWithMasking"?
import numpy as np
from keras.models import Sequential
from keras.engine.topology import Layer
from keras.layers import Dense, LSTM, Embedding
from keras.optimizers import SGD
def test():
input_n_features = 2
input_max_length = 9
n_classes = 2
n_lstm_cells = 1
n_epochs = 10
n_sequences = 5
train_input = np.array([[[1,1],[2,2],[3,3],[4,4],[5,5],[6,6],[7,7],[8,8],[9,9]],
[[1,1],[2,2],[3,3],[4,4],[5,5],[6,6],[7,7],[8,8],[0,0]], # last 1 sample to mask
[[1,1],[2,2],[3,3],[4,4],[5,5],[6,6],[7,7],[0,0],[0,0]], # last 2 sample to mask
[[1,1],[2,2],[3,3],[4,4],[5,5],[6,6],[0,0],[0,0],[0,0]], # last 3 sample to mask
[[1,1],[2,2],[3,3],[4,4],[5,5],[0,0],[0,0],[0,0],[0,0]]], # last 4 sample to mask
dtype=np.float64)
train_output = np.array([[[0,1],[0,1],[0,1],[0,1],[0,1],[0,1],[0,1],[0,1],[0,1]],
[[0,1],[0,1],[0,1],[0,1],[0,1],[0,1],[0,1],[0,1],[0,0]],
[[1,0],[1,0],[1,0],[1,0],[1,0],[1,0],[1,0],[0,0],[0,0]],
[[1,0],[1,0],[1,0],[1,0],[1,0],[1,0],[0,0],[0,0],[0,0]],
[[1,0],[1,0],[1,0],[1,0],[1,0],[0,0],[0,0],[0,0],[0,0]]],
dtype=np.float64)
model = Sequential()
model.add(Embedding(output_dim=n_classes, input_dim=input_n_features, mask_zero=True))
model.add(LSTM(n_lstm_cells,
#input_shape=(input_max_length, input_n_features),
return_sequences=True,
stateful=False,
init='he_normal',
activation='tanh'))
model.add(Dense(n_classes, init='he_normal',
activation='softmax'))
sgd = SGD()
model.compile(loss='categorical_crossentropy',
optimizer=sgd,
#optimizer=rmsprop,
metrics=['accuracy'])
print('Training')
for i in range(n_epochs):
print('Epoch', i, '/', n_epochs)
model.fit(train_input,
train_output,
verbose=1,
nb_epoch=1,
shuffle=True)
#model.reset_states()
print('Predicting')
validation_predicted_output = model.predict(train_input)
if __name__ == "__main__":
test()