I am struggling to use pre-trained word vectors for my keras model. It seems that either the shape information is not propagated from the Input layer (the first layer) or my use of the Embedding layer is wrong. My code looks like this:
from keras.models import Model, Sequential
from keras.layers import Activation, Convolution1D, Convolution2D, Dense, Dropout, Embedding, Input, Flatten, MaxPooling1D, MaxPooling2D, Reshape
from keras.wrappers.scikit_learn import KerasClassifier
from keras.callbacks import EarlyStopping, TensorBoard
from sklearn.model_selection import StratifiedKFold, GridSearchCV
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
# Load data
data_loader = DataLoader(filename='...')
data_loader.load_normalized()
X, y = data_loader.get_balanced_dataset()
# Load embedding (cherry pick embedding vectors because full embedding uses too much memory)
embedding_loader = EmbeddingLoader(filename='...')
vocab, embedding = embedding_loader.load()
VOCAB_DIM = embedding.shape[0]
EMBEDDING_DIM = embedding.shape[1]
MAX_SEQUENCE_LENGTH = 128
# Transform data to embedding
tokenizer = Tokenizer()
tokenizer.fit_on_texts(X)
X_sequences = tokenizer.texts_to_sequences(X)
X_padded_sequences = pad_sequences(X_sequences, maxlen=MAX_SEQUENCE_LENGTH)
# Neural network builder
def build_fn(nb_filters, filter_sizes, max_pooling_sizes, nb_hidden_units, init, activation, dropout, optimizer, embedding_trainable):
model = Sequential()
# Input layer
model.add(Input(batch_shape=(None, MAX_SEQUENCE_LENGTH)))
model.add(Embedding(VOCAB_DIM, EMBEDDING_DIM, weights=[embedding], trainable=embedding_trainable))
# Convolution layers
model.add(Reshape((1, MAX_SEQUENCE_LENGTH, EMBEDDING_DIM), input_shape=(MAX_SEQUENCE_LENGTH, EMBEDDING_DIM)))
for nb_filter in nb_filters:
model.add(Convolution2D(nb_filter, filter_sizes, filter_sizes, init=init, border_mode='same'))
model.add(Activation(activation))
model.add(MaxPooling2D(max_pooling_sizes))
model.add(Dropout(dropout))
# Dense layers
model.add(Flatten())
for nb_hidden_unit in nb_hidden_units:
model.add(Dense(nb_hidden_unit, init=init))
model.add(Activation(activation))
model.add(Dropout(dropout))
# Output layer
model.add(Dense(1))
model.add(Activation('sigmoid'))
# Build network
model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['acc'])
# Neural network parameters
param_grid = dict(
nb_filters=[[64, 32, 16]],
filter_sizes = [3],
nb_hidden_units = [[16, 8]],
max_pooling_sizes=[8, 16],
embedding_trainable=[True, False],
init=['uniform'],
activation=['relu'],
dropout=[0.5],
optimizer=['adam'])
# Callbacks
tensor_board = TensorBoard(log_dir='./TensorBoard')
early_stopping = EarlyStopping(monitor='acc', patience=8, verbose=1)
# Search parameter space for best neural network architecture, fix internal random state for reproducibility
classifier = KerasClassifier(build_fn=build_fn, batch_size=16, verbose=1)
skfold = StratifiedKFold(n_splits=2, shuffle=True, random_state=8)
grid_search = GridSearchCV(classifier, param_grid, cv=skfold, verbose=1, fit_params={'callbacks': [tensor_board, early_stopping], 'batch_size': 16})
grid_result = grid_search.fit(X_padded_sequences, y)
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
for params, mean_score, scores in grid_result.grid_scores_:
print("%f (%f) with: %r" % (scores.mean(), scores.std(), params))
/Users/kasper/anaconda/bin/python /Users/kasper/Development/peergrade_bachelorproject/cnn/cnn_keras.py
Using TensorFlow backend.
Fitting 2 folds for each of 4 candidates, totalling 8 fits
Traceback (most recent call last):
File "/Users/kasper/Development/peergrade_bachelorproject/cnn/cnn_keras.py", line 90, in <module>
grid_result = grid_search.fit(X_padded_sequences, y)
File "/Users/kasper/anaconda/lib/python2.7/site-packages/sklearn/model_selection/_search.py", line 940, in fit
return self._fit(X, y, groups, ParameterGrid(self.param_grid))
File "/Users/kasper/anaconda/lib/python2.7/site-packages/sklearn/model_selection/_search.py", line 562, in _fit
for parameters in parameter_iterable
File "/Users/kasper/anaconda/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py", line 758, in __call__
while self.dispatch_one_batch(iterator):
File "/Users/kasper/anaconda/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py", line 608, in dispatch_one_batch
self._dispatch(tasks)
File "/Users/kasper/anaconda/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py", line 571, in _dispatch
job = self._backend.apply_async(batch, callback=cb)
File "/Users/kasper/anaconda/lib/python2.7/site-packages/sklearn/externals/joblib/_parallel_backends.py", line 109, in apply_async
result = ImmediateResult(func)
File "/Users/kasper/anaconda/lib/python2.7/site-packages/sklearn/externals/joblib/_parallel_backends.py", line 322, in __init__
self.results = batch()
File "/Users/kasper/anaconda/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py", line 131, in __call__
return [func(*args, **kwargs) for func, args, kwargs in self.items]
File "/Users/kasper/anaconda/lib/python2.7/site-packages/sklearn/model_selection/_validation.py", line 238, in _fit_and_score
estimator.fit(X_train, y_train, **fit_params)
File "/Users/kasper/anaconda/lib/python2.7/site-packages/keras/wrappers/scikit_learn.py", line 137, in fit
self.model = self.build_fn(**self.filter_sk_params(self.build_fn))
File "/Users/kasper/Development/peergrade_bachelorproject/cnn/cnn_keras.py", line 39, in build_fn
model.add(Input(batch_shape=(None, MAX_SEQUENCE_LENGTH)))
File "/Users/kasper/anaconda/lib/python2.7/site-packages/keras/models.py", line 264, in add
if len(layer.inbound_nodes) == 0:
AttributeError: 'Tensor' object has no attribute 'inbound_nodes'
Process finished with exit code 1