Can't use LSTM layer with CUDA 10.0 on google colab.

Thomas Reynaud

unread,

Feb 18, 2019, 9:25:58 AM2/18/19

to TensorFlow Community Testing

I'm using tf-nightly-gpu-2.0-preview==2.0.0.dev20190218 on Google colab with CUDA 10.0.

I am building and fitting a simple model with tf.keras like so:

def create_model(vocab_size=100): model = tf.keras.Sequential([
 tf.keras.layers.Embedding(vocab_size, 80),


 tf.keras.layers.LSTM(64),
 tf.keras.layers.Dense(32, activation='relu'),
 tf.keras.layers.Dense(32, activation='relu'),
 tf.keras.layers.Dense(1, activation='sigmoid')
 ])
 return model


def training_pipeline(train_file,
 train_labels,
 val_file,
 val_labels,
 vocab_file,
 labels_list,
 vocab_size=100,
 epochs=10,
 batch_size=7):
 training_set = read_data(train_file, train_labels, vocab_file, labels_list, is_training=True, batch_size=batch_size)
 validation_set = read_data(val_file, val_labels, vocab_file, labels_list, is_training=False, batch_size=batch_size)

 model = create_model(vocab_size=vocab_size)
 model.compile(loss='binary_crossentropy',
 optimizer='adam',
 metrics=['accuracy'])


 model.fit(
 x=training_set,
 epochs=epochs,
 validation_data=validation_set,
 verbose=1)

Running this pipeline will fail when running on gpu with the following error:


---------------------------------------------------------------------------

UnknownError Traceback (most recent call last)

<ipython-input-16-6c7a6651d7fd> in <module>()
 7 vocab_size=vocab_size,
 8 epochs=3,
----> 9 batch_size=100)


<ipython-input-14-06067a0270a4> in training_pipeline(train_file, train_labels, val_file, val_labels, vocab_file, labels_list, vocab_size, epochs, batch_size)
 59 epochs=epochs,
 60 validation_data=validation_set,
---> 61 verbose=1)


/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
 762 workers=0,
 763 shuffle=shuffle,
--> 764 initial_epoch=initial_epoch)
 765 
 766 # Case 3: Symbolic tensors or Numpy array-like.


/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py in fit_generator(self, generator, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, validation_freq, class_weight, max_queue_size, workers, use_multiprocessing, shuffle, initial_epoch)
 1482 shuffle=shuffle,
 1483 initial_epoch=initial_epoch,
-> 1484 steps_name='steps_per_epoch')
 1485 
 1486 def evaluate_generator(self,


/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training_generator.py in model_iteration(model, data, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, validation_freq, class_weight, max_queue_size, workers, use_multiprocessing, shuffle, initial_epoch, mode, batch_size, steps_name, **kwargs)
 244 
 245 is_deferred = not model._is_compiled
--> 246 batch_outs = batch_function(*batch_data)
 247 if not isinstance(batch_outs, list):
 248 batch_outs = [batch_outs]


/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py in train_on_batch(self, x, y, sample_weight, class_weight, reset_metrics)
 1226 else:
 1227 self._make_fit_function()
-> 1228 outputs = self._fit_function(ins) # pylint: disable=not-callable
 1229 
 1230 if reset_metrics:


/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/backend.py in __call__(self, inputs)
 3207 value = math_ops.cast(value, tensor.dtype)
 3208 converted_inputs.append(value)
-> 3209 outputs = self._graph_fn(*converted_inputs)
 3210 return nest.pack_sequence_as(self._outputs_structure,
 3211 [x.numpy() for x in outputs])


/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/function.py in __call__(self, *args, **kwargs)
 438 raise TypeError("Keyword arguments {} unknown. Expected {}.".format(
 439 list(kwargs.keys()), list(self._arg_keywords)))
--> 440 return self._call_flat(args)
 441 
 442 def _filtered_call(self, args, kwargs):


/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/function.py in _call_flat(self, args)
 507 # Only need to override the gradient in graph mode and when we have outputs.
 508 if context.executing_eagerly() or not self.outputs:
--> 509 outputs = self._inference_function.call(ctx, args)
 510 else:
 511 self._register_gradient()


/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/function.py in call(self, ctx, args)
 295 attrs=("executor_type", executor_type,
 296 "config_proto", config),
--> 297 ctx=ctx)
 298 # Replace empty list with None
 299 outputs = outputs or None


/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
 64 else:
 65 message = e.message
---> 66 six.raise_from(core._status_to_exception(e.code, message), None)
 67 except TypeError as e:
 68 if any(ops._is_keras_symbolic_tensor(x) for x in inputs):


/usr/local/lib/python3.6/dist-packages/six.py in raise_from(value, from_value)


UnknownError: Fail to find the dnn implementation.
 [[{{node unified_lstm_1/CudnnRNN}}]]
 [[training_1/Adam/gradients/loss_1/dense_5_loss/binary_crossentropy/Mean_grad/Prod_1/_62]] [Op:__inference_keras_scratch_graph_3795]


The above code runs correctly on CPU.

Thomas Reynaud

unread,

Feb 18, 2019, 9:30:59 AM2/18/19

to TensorFlow Community Testing

Maybe I am doing something wrong on Colab. If someone else encountered this problem, I'll open an issue on GitHub.

Paige Bailey

unread,

Feb 21, 2019, 1:17:33 PM2/21/19

to Thomas Reynaud, TensorFlow Community Testing

Hi, Thomas -

I'm attempting to replicate the error you shared, but am not able to reproduce (given the latest version of tf-nightly-gpu-2.0-preview).

https://colab.research.google.com/drive/1F1aOjVUoEmJBVHiPwFfPqKAsau-m2XFr

Could you try running the notebook above and seeing if it throws the same error?

Thanks!

.pb

--
You received this message because you are subscribed to the Google Groups "TensorFlow Community Testing" group.
To unsubscribe from this group and stop receiving emails from it, send an email to testing+u...@tensorflow.org.

Reply all

Reply to author

Forward