ctc loss function for speech recognition

235 views

Skip to first unread message

stepha...@gmail.com

unread,

Apr 22, 2018, 4:30:54 PM4/22/18

to Keras-users

Hi,

I'm trying to implement a ctc loss function for speech recognition (with this article https://arxiv.org/pdf/1412.5567.pdf) but I have problem with ctc_batch_cost, I have problem with input. Can you help me please.

Thank you

import numpy as np
	import tensorflow as tf
	import keras
	import sound
	from keras.layers import Input, Dense, Add
	from keras.layers.recurrent import GRU
	from keras.models import Model
	import keras.backend as K
	from keras.optimizers import SGD


	def ctc_loss(y_true, y_pred):
	input_length = np.zeros((5, 1)) # a changer
	label_length = np.zeros((5, 1)) # a changer

	input_length = tf.convert_to_tensor(input_length, dtype=tf.float32)
	label_length = tf.convert_to_tensor(label_length, dtype=tf.float32)
	return K.mean(K.ctc_batch_cost(K.batch_flatten(y_true), y_pred, input_length, label_length), axis=-1)


	# model.load_weights('models/')

	NB_FREQUENCIES = 9000
	MAX_TIME_FRAMES = 500
	MAX_LABEL_SIZE = 100

	inputs = Input(shape=(MAX_TIME_FRAMES, NB_FREQUENCIES))

	h1 = Dense(64, activation='relu')(inputs)
	h2 = Dense(64, activation='relu')(h1)
	h3 = Dense(64, activation='relu')(h2)

	lb = GRU(64, go_backwards=True, return_sequences=True)(h3)
	lf = GRU(64, return_sequences=True)(h3)

	h4 = Add()([lb,lf]) #merge

	h5 = Dense(64, activation='relu')(h4)

	outputs = Dense(29, activation = 'softmax')(h5)

	model = keras.models.Model(inputs=inputs, outputs=outputs)
	model.summary()

	sgd = SGD(nesterov=True)
	model.compile(loss=ctc_loss, metrics=['accuracy'], optimizer=sgd)