Can't use LSTM layer with CUDA 10.0 on google colab.

636 views
Skip to first unread message

Thomas Reynaud

unread,
Feb 18, 2019, 9:25:58 AM2/18/19
to TensorFlow Community Testing
I'm using tf-nightly-gpu-2.0-preview==2.0.0.dev20190218 on Google colab with CUDA 10.0.

I am building and fitting a simple model with tf.keras like so:

def create_model(vocab_size=100): model = tf.keras.Sequential([
 tf
.keras.layers.Embedding(vocab_size, 80),


 tf
.keras.layers.LSTM(64),
 tf
.keras.layers.Dense(32, activation='relu'),
 tf
.keras.layers.Dense(32, activation='relu'),
 tf
.keras.layers.Dense(1, activation='sigmoid')
 
])
 
return model


def training_pipeline(train_file,
 train_labels
,
 val_file
,
 val_labels
,
 vocab_file
,
 labels_list
,
 vocab_size
=100,
 epochs
=10,
 batch_size
=7):
 training_set
= read_data(train_file, train_labels, vocab_file, labels_list, is_training=True, batch_size=batch_size)
 validation_set
= read_data(val_file, val_labels, vocab_file, labels_list, is_training=False, batch_size=batch_size)

 model
= create_model(vocab_size=vocab_size)
 model
.compile(loss='binary_crossentropy',
 optimizer
='adam',
 metrics
=['accuracy'])


 model
.fit(
 x
=training_set,
 epochs
=epochs,
 validation_data
=validation_set,
 verbose
=1)

Running this pipeline will fail when running on gpu with the following error:

---------------------------------------------------------------------------

UnknownError Traceback (most recent call last)

<ipython-input-16-6c7a6651d7fd> in <module>()
 
7 vocab_size=vocab_size,
 
8 epochs=3,
----> 9 batch_size=100)


<ipython-input-14-06067a0270a4> in training_pipeline(train_file, train_labels, val_file, val_labels, vocab_file, labels_list, vocab_size, epochs, batch_size)
 
59 epochs=epochs,
 
60 validation_data=validation_set,
---> 61 verbose=1)


/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
 
762 workers=0,
 
763 shuffle=shuffle,
--> 764 initial_epoch=initial_epoch)
 
765
 
766 # Case 3: Symbolic tensors or Numpy array-like.


/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py in fit_generator(self, generator, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, validation_freq, class_weight, max_queue_size, workers, use_multiprocessing, shuffle, initial_epoch)
 
1482 shuffle=shuffle,
 
1483 initial_epoch=initial_epoch,
-> 1484 steps_name='steps_per_epoch')
 
1485
 
1486 def evaluate_generator(self,


/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training_generator.py in model_iteration(model, data, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, validation_freq, class_weight, max_queue_size, workers, use_multiprocessing, shuffle, initial_epoch, mode, batch_size, steps_name, **kwargs)
 
244
 
245 is_deferred = not model._is_compiled
--> 246 batch_outs = batch_function(*batch_data)
 
247 if not isinstance(batch_outs, list):
 
248 batch_outs = [batch_outs]


/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py in train_on_batch(self, x, y, sample_weight, class_weight, reset_metrics)
 
1226 else:
 
1227 self._make_fit_function()
-> 1228 outputs = self._fit_function(ins) # pylint: disable=not-callable
 
1229
 
1230 if reset_metrics:


/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/backend.py in __call__(self, inputs)
 
3207 value = math_ops.cast(value, tensor.dtype)
 
3208 converted_inputs.append(value)
-> 3209 outputs = self._graph_fn(*converted_inputs)
 
3210 return nest.pack_sequence_as(self._outputs_structure,
 
3211 [x.numpy() for x in outputs])


/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/function.py in __call__(self, *args, **kwargs)
 
438 raise TypeError("Keyword arguments {} unknown. Expected {}.".format(
 
439 list(kwargs.keys()), list(self._arg_keywords)))
--> 440 return self._call_flat(args)
 
441
 
442 def _filtered_call(self, args, kwargs):


/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/function.py in _call_flat(self, args)
 
507 # Only need to override the gradient in graph mode and when we have outputs.
 
508 if context.executing_eagerly() or not self.outputs:
--> 509 outputs = self._inference_function.call(ctx, args)
 
510 else:
 
511 self._register_gradient()


/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/function.py in call(self, ctx, args)
 
295 attrs=("executor_type", executor_type,
 
296 "config_proto", config),
--> 297 ctx=ctx)
 
298 # Replace empty list with None
 
299 outputs = outputs or None


/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
 
64 else:
 
65 message = e.message
---> 66 six.raise_from(core._status_to_exception(e.code, message), None)
 
67 except TypeError as e:
 
68 if any(ops._is_keras_symbolic_tensor(x) for x in inputs):


/usr/local/lib/python3.6/dist-packages/six.py in raise_from(value, from_value)


UnknownError: Fail to find the dnn implementation.
 
[[{{node unified_lstm_1/CudnnRNN}}]]
 
[[training_1/Adam/gradients/loss_1/dense_5_loss/binary_crossentropy/Mean_grad/Prod_1/_62]] [Op:__inference_keras_scratch_graph_3795]


The above code runs correctly on CPU.

Thomas Reynaud

unread,
Feb 18, 2019, 9:30:59 AM2/18/19
to TensorFlow Community Testing
Maybe I am doing something wrong on Colab. If someone else encountered this problem, I'll open an issue on GitHub.

Paige Bailey

unread,
Feb 21, 2019, 1:17:33 PM2/21/19
to Thomas Reynaud, TensorFlow Community Testing
Hi, Thomas -
I'm attempting to replicate the error you shared, but am not able to reproduce (given the latest version of tf-nightly-gpu-2.0-preview). 


Could you try running the notebook above and seeing if it throws the same error?

Thanks!
.pb

Screenshot from 2019-02-21 10-16-31.png

--
You received this message because you are subscribed to the Google Groups "TensorFlow Community Testing" group.
To unsubscribe from this group and stop receiving emails from it, send an email to testing+u...@tensorflow.org.
Reply all
Reply to author
Forward
0 new messages