A = Input(shape=(1,), dtype='int32')
embeddedA = Embedding(input_dim=5345, output_dim=16, input_length=1)(A)
embeddedA = Flatten()(embeddedA)
B = Input(shape=(1,), dtype='int32')
embeddedB = Embedding(input_dim=9453, output_dim=16, input_length=1)(B)
embeddedB = Flatten()(embeddedB)
C = Input(shape=(392,), dtype='int32')
embeddedC = Embedding(input_dim=19240, output_dim=16, input_length=392)(C)
embeddedC = Flatten()(embeddedC)
D = Input(shape=(64,))
mergedX = merge([
embeddedA,
embeddedB,
embeddedC,
D],
mode='concat')
mergedX = Reshape((1, 6368))(mergedX)
a = LSTM(256, stateful=True, return_sequences=True, batch_input_shape=(1,1,6368))(mergedX)
b = LSTM(256, stateful=True)(a)
c = Dense(128, activation='relu')(b)
y = Dense(16, activation="softmax")(c)
model = Model(input=[A, B, C, D], output=[y])Exception: If a RNN is stateful, a complete input_shape must be provided (including batch size).stateful=True in the layer constructor. - specify a fixed batch size for your model, by passing a batch_input_shape=(...) to the first layer in your model. This is the expected shape of your inputs including the batch size."A = Input(shape=(12,), dtype='int32')
embeddedA = Embedding(input_dim=5345, output_dim=16, input_length=12, mask_zero=True)(A)
B = Input(shape=(12,), dtype='int32')
embeddedB = Embedding(input_dim=9453, output_dim=16, input_length=12, mask_zero=True)(B)
C = Input(shape=(12*392,), dtype='int32')
embeddedC = Embedding(input_dim=19240, output_dim=16, input_length=12*392, mask_zero=True)(C)
embeddedC = Reshape((12, 16*392))
# Idealy C would actually look like this, as to avoid a reshape, but embedding can't seem to handle more than 1d input properly:
# C = Input(shape=(12,392), dtype='int32')
# embeddedC = Embedding(input_dim=19240, output_dim=16, input_length=(12,392), mask_zero=True)(C)
D = Input(shape=(12, 64))
D = Masking()(D)
mergedX = merge([
embeddedA,
embeddedB,
embeddedC,
D],
mode='concat')
# Can't mask here because embedding layers will obscure mask value for A, B, and Cs...
a = LSTM(256, return_sequences=True)(mergedX)
b = LSTM(256)(a)
c = Dense(128, activation='relu')(b)
y = Dense(16, activation="softmax")(c)
model = Model(input=[A, B, C, D], output=[y])rx = np.random.random_integers(1, 19240, (500, 2, 3))
ry = np.random.random((500, 4))
x = Input(shape=(2, 3), dtype='int32')
e = Embedding(19240, 16, input_length=(2, 3), mask_zero=True)(x) # Ideal would output shape (2, 3*16)
a = LSTM(32)(e)
y = Dense(4)(a)
model = Model(input=x, output=y)
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
model.fit(rx, ry)rx11 = np.random.random_integers(1, 19240, (500, 1))rx12 = np.random.random_integers(1, 19240, (500, 1))rx13 = np.random.random_integers(1, 19240, (500, 1))
rx21 = np.zeros((500, 1), dtype='int32')rx22 = np.zeros((500, 1), dtype='int32')rx23 = np.zeros((500, 1), dtype='int32')
ry = np.random.random((500, 4))
x11 = Input(shape=(1,), dtype='int32')x12 = Input(shape=(1,), dtype='int32')x13 = Input(shape=(1,), dtype='int32')
# Either all or none are zerosx21 = Input(shape=(1,), dtype='int32')x22 = Input(shape=(1,), dtype='int32')x23 = Input(shape=(1,), dtype='int32')
e = Embedding(19240, 16, input_length=1, mask_zero=True)
e11 = e(x11) # (1, 16)e12 = e(x12) # (1, 16)e13 = e(x13) # (1, 16)
e21 = e(x21) # (1, 16)e22 = e(x22) # (1, 16)e23 = e(x23) # (1, 16)
e1 = merge([e11, e12, e13], mode='concat', concat_axis=-1) # (1, 48)e2 = merge([e21, e22, e23], mode='concat', concat_axis=-1) # (1, 48)
e = merge([e1, e2], mode='concat', concat_axis=1) # (2, 48)
a = LSTM(32)(e)y = Dense(4)(a)
model = Model(input=[x11, x12, x13, x21, x22, x23], output=y)model.compile(loss='categorical_crossentropy', optimizer='rmsprop')model.fit([rx11, rx12, rx13, rx21, rx22, rx23], ry)numSamples = 1337
aX = np.random.random_integers(0, 5344, (numSamples, 1))
bX = np.random.random_integers(0, 9452, (numSamples, 1))
cX = np.random.random_integers(0, 19239, (numSamples, 392))
dX = np.random.random((numSamples, 1, 64))
y = np.random.random((numSamples, 16))
A = Input(shape=(1,), batch_shape=(1, 1), dtype='int32')
embeddedA = Embedding(input_dim=5345, output_dim=16, input_length=1)(A)
B = Input(shape=(1,), batch_shape=(1, 1), dtype='int32')
embeddedB = Embedding(input_dim=9453, output_dim=16, input_length=1)(B)
C = Input(shape=(392,), batch_shape=(1, 392), dtype='int32')
embeddedC = Embedding(input_dim=19240, output_dim=16, input_length=392)(C) # (392, 16)
embeddedC = Reshape((1, 392*16))(embeddedC) # Flatten (392, 16) per batch, so (1, 392*16)
D = Input(shape=(1, 64), batch_shape=(1, 1, 64))
mergedX = merge([
embeddedA,
embeddedB,
embeddedC,
D],
mode='concat') # (1, 394*16 + 64)
a = LSTM(256, stateful=True, return_sequences=True)(mergedX)
b = LSTM(256, stateful=True)(a)
c = Dense(128, activation='relu')(b)
Y = Dense(16, activation="softmax")(c)
model = Model(input=[A, B, C, D], output=[Y])
model.compile(loss="categorical_crossentropy",
optimizer='rmsprop',
metrics=["accuracy"])
for i in range(numSamples):
# Add your state reset logic here, for example during use of extremely long sequences with high variability (that's the only good reason you'd even be doing it this way...)
decidedToResetStateForCurrentBatchDependingOnSomeCondition = False
if decidedToResetStateForCurrentBatchDependingOnSomeCondition:
model.reset_states()
loss = model.train_on_batch([aX[[i]], bX[[i]], cX[[i]], dX[[i]]], [y[[i]]])
progress = int(math.floor(30.0 * (i + 1) / numSamples))
progressBar = '\r' + str(i + 1) + '/' + str(numSamples) + ' [' + ('=' * progress) + ('>' if 0 < progress < 30 else '') + ('.' * (30 - progress)) + '] - loss: %f - acc: %f'%tuple(loss)
sys.stdout.write(progressBar)
sys.stdout.flush()