cost(y, label, blank_symbol)WARNING (theano.tensor.opt): Failed to infer_shape from Op GpuDnnPool.y = lasagne.layers.get_output(network)ctc_cost = CTC.cost(y, label, blank_symbol)params = lasagne.layers.get_all_params(network, trainable=True)updates = lasagne.updates.nesterov_momentum( ctc_cost, params, learning_rate=0.0001, momentum=0.9)train_fn = theano.function([input_var, label, blank_symbol], ctc_cost, updates=updates,allow_input_downcast=True)#!/usr/bin/env pythonfrom __future__ import print_functionimport sysimport osimport time
import numpy as npimport theanoimport theano.tensor as T
import lasagneimport cPickleimport reimport randomimport lasagne.layers.dnn
import PER_jin as PERimport jinctc as CTC
# ############################# Batch iterator ################################ Load data setdef loadArray(dirpath): # pattern = regex = str variable = '.+\.label' (recommended) pattern = '.+\.label' # another = 'array' (recommended) another = 'array' names = os.listdir(dirpath) random.shuffle(names) for name in names: if re.match(pattern,name) != None: #print name folder,prename,num,suffix = name.split('.') target = folder + '.' + prename + '.' + num + '.' + another targetpath = dirpath + '/' + target # find another suffix data file # meanwhile examine the num, length of spectrogram = length of label if os.path.exists(targetpath): # extract object from a file with file(target,'rb') as f: spectroArray = cPickle.load(f) # GPU default type is float32 spectroArray = np.float32(spectroArray) with file(name,'rb') as f: labelArray = cPickle.load(f) # label should be int type labelArray = np.int32(labelArray) yield spectroArray,labelArray,int(num)
# ##################### Build the neural network model #######################def build_cnn(input_var): # Input layer, as usual: network = lasagne.layers.InputLayer(shape=(None, 3, 40, 40), input_var=input_var) # This time we do not apply input dropout, as it tends to work less well # for convolutional layers. #jin ###################### Layer One: ###################### # Convolutional layer with 32 kernels of size 7x7, pad=3: network = lasagne.layers.dnn.Conv2DDNNLayer( network, num_filters=32, filter_size=(7, 7), stride=(1,1),pad=3, nonlinearity=lasagne.nonlinearities.rectify, W=lasagne.init.GlorotUniform()) #jin netshape = lasagne.layers.get_output_shape(network) print('Layer 1 Conv shape:') print(netshape) # Max-pooling layer of factor 2 in both dimensions: network = lasagne.layers.dnn.MaxPool2DDNNLayer( network, pool_size=(2, 2), stride=None, pad=(1,1)) #jin netshape = lasagne.layers.get_output_shape(network) print('Layer 1 MaxPool shape:') print(netshape) network = lasagne.layers.DenseLayer( network, num_units=40, nonlinearity=lasagne.nonlinearities.softmax) #jin netshape = lasagne.layers.get_output_shape(network) print('softmax shape:') print(netshape)
return network
# ############################## Main program ################################
def main(paramFile="",num_epochs=10): # Prepare Theano variables for inputs and targets input_var = T.tensor4('inputs') target_var = T.ivector('targets') #y = T.matrix() label = T.vector() blank_symbol = T.scalar()
# Create neural network model (depending on first command line parameter) print("Building model and compiling functions...") network = build_cnn(input_var) #jin if paramFile=="": print("Train a new network!") else: print("Load well trained parameters from "+paramFile) f = file(paramFile,'rb') params = cPickle.load(f) f.close() lasagne.layers.set_all_param_values(network,params) # Create a loss expression for training, i.e., a scalar objective we want # to minimize the objective function: y = lasagne.layers.get_output(network) ctc_cost = CTC.cost(y, label, blank_symbol) params = lasagne.layers.get_all_params(network, trainable=True) updates = lasagne.updates.nesterov_momentum( ctc_cost, params, learning_rate=0.0001, momentum=0.9) train_fn = theano.function([input_var, label, blank_symbol], ctc_cost, updates=updates,allow_input_downcast=True)
test_prediction = lasagne.layers.get_output(network, deterministic=True) test_loss = lasagne.objectives.categorical_crossentropy(test_prediction, target_var) test_loss = test_loss.mean() test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var), dtype=theano.config.floatX) val_fn = theano.function([input_var, target_var], [test_loss, test_acc])
# Finally, launch the training loop. print("Starting training...") #jin # return numpy.ndarray train_out = T.argmax(test_prediction, axis=1) train_acc = T.mean(T.eq(train_out, target_var), dtype=theano.config.floatX) train_label = theano.function([input_var,target_var],[train_out,train_acc,test_prediction]) val_out = T.argmax(test_prediction, axis=1) val_label = theano.function([input_var],val_out) # We iterate over epochs: #jin # train set and validation set dirpath = os.getcwd() print('dirpath = '+dirpath) train_dirpath = dirpath + '/train' test_dirpath = dirpath + '/test' total = len(os.listdir(train_dirpath)) / 2 train_total_num = int(0.9 * total) validation_total_num = total - train_total_num print('Train num = ' + str(train_total_num)) print('Validation num = '+str(validation_total_num)) blank_symbol_num = 39
for epoch in range(num_epochs): # change current directory os.chdir(train_dirpath) # In each epoch, we do a full pass over the training data: train_err = 0 train_batches = 0 start_time = time.time() counter = 0 # And a full pass over the validation data: val_err = 0 val_acc = 0 val_batches = 0 #for batch in loadArray(train_dirpath): for batch in loadArray(train_dirpath): inputs, targets, batchNum = batch print('spectro shape:') print(inputs.shape) print('label shape:') print(targets.shape) label_without_blank = PER.phn2targetseq(targets,blank_symbol_num) label_without_blank = label_without_blank[0,:] print('noblanklabel shape = '+str(label_without_blank.shape)) counter += 1 if counter < train_total_num: train_batches += batchNum # valwrd = predicting output frames # wrd = predicting output phoneme trainwrd, acc, yy = train_label(inputs,targets) print("y shape = "+str(yy.shape)) ctc_loss = train_fn(inputs, label_without_blank, blank_symbol_num) train_err += ctc_loss #ctc_loss = ctc_fn(yy, label_without_blank, blank_symbol_num) print('ctc loss = '+str(ctc_loss)) print('train acc = '+str(acc)) wrd = PER.phn2word(trainwrd) print('train output word=') print(wrd) labelphn = PER.phn2word(targets) print('labelphn=') print(labelphn) print(' Train set completed : '+str(float(counter)/train_total_num*100)) else: err, acc = val_fn(inputs, targets) val_err += err * batchNum val_acc += acc * batchNum val_batches += batchNum # valwrd = predicting output frames # wrd = predicting output phoneme valwrd = val_label(inputs) print('test acc = '+str(acc)) print('test output word=') valwrd = PER.phn2word(valwrd) print(valwrd) labelphn = PER.phn2word(targets) print('labelphn=') print(labelphn) print(' Validation set completed : '+str(float(counter-train_total_num)/validation_total_num*100))
# Then we print the results for this epoch: print("Epoch {} of {} took {:.3f}s".format(epoch + 1, num_epochs, time.time() - start_time)) print(" training loss:\t\t{:.6f}".format(train_err / train_batches)) print(" validation loss:\t\t{:.6f}".format(val_err / val_batches)) print(" validation accuracy:\t\t{:.2f} %".format(val_acc / val_batches * 100)) # change current directory os.chdir(dirpath) # store parameters print(" should store epoch {}".format(epoch+1)) pythonName,suffix = os.path.splitext(__file__) param2store = lasagne.layers.get_all_param_values(network) storename = pythonName+"_"+str((epoch+1))+"_accu="+str(val_acc / val_batches * 100)+".save" with file(storename,'wb') as f: cPickle.dump(param2store,f) # change current directory os.chdir(test_dirpath) # After training, we compute and print the test error: test_err = 0 test_acc = 0 test_batches = 0 for batch in loadArray(test_dirpath): inputs, targets, batchNum = batch err, acc = val_fn(inputs, targets) test_err += err*batchNum test_acc += acc*batchNum test_batches += batchNum print("Final results:") print(" test loss:\t\t\t{:.6f}".format(test_err / test_batches)) print(" test accuracy:\t\t{:.2f} %".format(test_acc / test_batches * 100)) if __name__ == '__main__': if ('--help' in sys.argv) or ('-h' in sys.argv): print("Trains a neural network on TIMIT using Lasagne.") print("Usage: %s [paramFile [EPOCHS]]" % sys.argv[0]) print() print("paramFile: the file of well trained parameters") print("EPOCHS: number of training epochs to perform (default: 500)") else: kwargs = {} if len(sys.argv) > 1: kwargs['paramFile'] = sys.argv[1] if len(sys.argv) > 2: kwargs['num_epochs'] = int(sys.argv[2]) main(**kwargs)
import theanoimport theano.tensor as Timport numpy as npfrom theano.ifelse import ifelse'''Reference: Alex Graves, Connectionist Temporal Classification
Variables:T: length of input sequenceL: length of label sequence without blanksC: class number except blank symbol y: T x C+1 extracted from feature label: 1 x L label sequence without blanks alpha: T x 2L+1 state transfer matrix according to Alex's paper alpha[t-1,s-1] = \alpha_t(s) #in latex style'''def add_blanks(label, blank_symbol): ''' Add blanks to label sequence Input shape: 1 x L Output shape: 1 x 2L+1 ''' extend_label = label.T.dimshuffle(0,'x') blanks = T.zeros_like(extend_label) + blank_symbol concat = T.concatenate([extend_label, blanks], axis=1) res = concat.reshape((1,concat.shape[0]*concat.shape[1])) begining_blanks = T.zeros((1, res.shape[0])) + blank_symbol blanked_label = T.concatenate([begining_blanks, res], axis=1) return blanked_label
def recurrence(blanked_label): ''' A(t,s) = alpha(t,s) + alpha(t,s-1) if s is odd = alpha(t,s) + alpha(t,s-1) + alpha(t,s-2) if s is even we can define a L' x L' matrix R to help do this(L' = 2L+1) A(t,:) = alpha(t,:) * R ''' R = T.eye(blanked_label.shape[1])+T.eye(blanked_label.shape[1],k=1) ''' usually blanked_label.shape[1]>3 because L=1 label is nonsense so I ignore that situation Programming Attention: 1. T.imatrix[0,0] is index (1,1), i.e. the begining index is 0 instead of 1 2. theano.scan fn's arguments import order is that first: sequences value(s) second: outputs_info value(s) third: non_sequences values(s) so, when define a fn function, consider the arguments' order ''' def set_value_at_position(pos,mtrx): return T.set_subtensor(mtrx[2*pos+1,2*pos+3], 1) result,updates = theano.scan(fn=set_value_at_position, outputs_info=R, sequences=T.arange(blanked_label.shape[1]/2-1)) return result[-1]
def cost(y, label, blank_symbol): blanked_label = add_blanks(label, blank_symbol) R = recurrence(blanked_label) ''' A(t,:) = alpha(t,:) * R Attention: alpha(t,:)=alpha[t-1,:] alpha[t,s] = A[t-1,s] * y[t,blanked_label[s]] t = 1,2,...T-1 ''' # Initialize alpha alpha = T.eye(y.shape[0],R.shape[0]) alpha = T.zeros_like(alpha) alpha = T.set_subtensor(alpha[0,0],y[0,blanked_label[0,0].astype('int32')]) alpha = T.set_subtensor(alpha[0,1],y[0,blanked_label[0,1].astype('int32')]) A = T.zeros_like(alpha) # C length = T C = T.zeros_like(alpha[:,0]) C = T.set_subtensor(C[0],alpha[0,0]+alpha[0,1]) # judge = 2T-L' judge = 2*alpha.shape[0]-alpha.shape[1] # row = t; col = s def rowScan(row,alpha,A,C): def columnScan(col,mtrx,row): return ifelse(T.gt(2*row-col+2,judge),T.set_subtensor(mtrx[row+1,col],0),T.set_subtensor(mtrx[row+1,col],A[row,col]*y[row+1,blanked_label[0,col].astype('int32')])) A = T.set_subtensor(A[row,:],T.dot(alpha[row,:],R)) colresult,colupdate=theano.scan(fn=columnScan, sequences=T.arange(alpha.shape[1]), outputs_info=alpha, non_sequences=row) alpha = colresult[-1] rowsum = alpha[row+1,:].sum() C = T.set_subtensor(C[row+1],rowsum) alpha = T.set_subtensor(alpha[row+1,:],alpha[row+1,:]/rowsum) return [alpha,A,C] ([rowresult,rowa,rowc],rowupdate)=theano.scan(fn=rowScan, sequences=T.arange(alpha.shape[0]-1), outputs_info=[alpha,A,C]) alpha = rowresult[-1] A = rowa[-1] C = rowc[-1] return -T.log(C).sum()
'''run example:y = T.dmatrix()label = T.lvector()blank_symbol = T.iscalar()ctccost = cost(y, label, blank_symbol)import PER_jin as PERy = T.dmatrix()label = T.lvector()blank_symbol = T.iscalar()ctccost = cost(y, label, blank_symbol)lpri = np.array([2,0,0,2,1,1,2,0,0,2,1,1])l = PER.phn2targetseq(lpri,2)l = l[0,:]#l = np.array([0,1,0,1])print lprint l.shapeyy = np.array([[.1,.2,.7],[.2,.3,.5],[.3,.4,.3],[.4,.3,.3], [.3,.5,.2],[.2,.6,.2],[.1,.4,.5],[.3,.4,.3], [.2,.3,.5],[.3,.4,.3],[.3,.5,.2],[.2,.3,.5]])f = theano.function([y,label,blank_symbol],ctccost)cst = f(yy,l,2)print cst# it will print 3.48256520603'''
import numpy as np
# input phoneme is np.array# output word is listdef phn2word(phoneme): word=[] prev = -1 for phn in phoneme: if phn != prev: prev = phn word.append(phn) return word
# return target sequence without blank and # continuous repeated frame label# phoneme = primitive frame label# clsnum = number of classes without blank# clsnum = the sign of blank as well# return a 1 x L ndarraydef phn2targetseq(phoneme,clsnum): word=[] prev = -1 for phn in phoneme: if phn != prev: prev = phn if phn != clsnum: word.append(phn) word = np.array([word]) return word
--
You received this message because you are subscribed to the Google Groups "lasagne-users" group.
To unsubscribe from this group and stop receiving emails from it, send an email to lasagne-user...@googlegroups.com.
To post to this group, send email to lasagn...@googlegroups.com.
To view this discussion on the web visit https://groups.google.com/d/msgid/lasagne-users/89ac63a1-15be-43e8-bb7e-26835a3429cf%40googlegroups.com.
For more options, visit https://groups.google.com/d/optout.
updates = lasagne.updates.nesterov_momentum(ctc_cost, params, learning_rate=0.0001, momentum=0.9)This is a problem already fixed in Theano. Update Theano to the latest development version.Fred