ctc loss function for speech recognition

235 views
Skip to first unread message

stepha...@gmail.com

unread,
Apr 22, 2018, 4:30:54 PM4/22/18
to Keras-users
Hi,
I'm trying to implement a ctc loss function for speech recognition (with this article https://arxiv.org/pdf/1412.5567.pdf) but I have problem with ctc_batch_cost, I have problem with input. Can you help me please.

Thank you

import numpy as np
import tensorflow as tf
import keras
import sound
from keras.layers import Input, Dense, Add
from keras.layers.recurrent import GRU
from keras.models import Model
import keras.backend as K
from keras.optimizers import SGD
def ctc_loss(y_true, y_pred):
input_length = np.zeros((5, 1)) # a changer
label_length = np.zeros((5, 1)) # a changer
input_length = tf.convert_to_tensor(input_length, dtype=tf.float32)
label_length = tf.convert_to_tensor(label_length, dtype=tf.float32)
return K.mean(K.ctc_batch_cost(K.batch_flatten(y_true), y_pred, input_length, label_length), axis=-1)
# model.load_weights('models/')
NB_FREQUENCIES = 9000
MAX_TIME_FRAMES = 500
MAX_LABEL_SIZE = 100
inputs = Input(shape=(MAX_TIME_FRAMES, NB_FREQUENCIES))
h1 = Dense(64, activation='relu')(inputs)
h2 = Dense(64, activation='relu')(h1)
h3 = Dense(64, activation='relu')(h2)
lb = GRU(64, go_backwards=True, return_sequences=True)(h3)
lf = GRU(64, return_sequences=True)(h3)
h4 = Add()([lb,lf]) #merge
h5 = Dense(64, activation='relu')(h4)
outputs = Dense(29, activation = 'softmax')(h5)
model = keras.models.Model(inputs=inputs, outputs=outputs)
model.summary()
sgd = SGD(nesterov=True)
model.compile(loss=ctc_loss, metrics=['accuracy'], optimizer=sgd)
main.py
Reply all
Reply to author
Forward
0 new messages