def build_model(dbn, input_var, lstm_size):
gate_parameters = Gate( W_in=las.init.Orthogonal(), W_hid=las.init.Orthogonal(), b=las.init.Constant(0.)) cell_parameters = Gate( W_in=las.init.Orthogonal(), W_hid=las.init.Orthogonal(), # Setting W_cell to None denotes that no cell connection will be used. W_cell=None, b=las.init.Constant(0.), # By convention, the cell nonlinearity is tanh in an LSTM. nonlinearity=tanh)
# use pretrained encoder layers dbn_layers = dbn.get_all_layers() l_in = InputLayer((None, 1200), input_var=input_var) l_1 = DenseLayer(l_in, 2000, W=dbn_layers[1].W, b=dbn_layers[1].b, nonlinearity=sigmoid) l_2 = DenseLayer(l_1, 1000, W=dbn_layers[2].W, b=dbn_layers[2].b, nonlinearity=sigmoid) l_3 = DenseLayer(l_2, 500, W=dbn_layers[3].W, b=dbn_layers[3].b, nonlinearity=sigmoid) l_4 = DenseLayer(l_3, 50, W=dbn_layers[4].W, b=dbn_layers[4].b, nonlinearity=linear) l_lstm = LSTMLayer( l_4, lstm_size, # Here, we supply the gate parameters for each gate ingate=gate_parameters, forgetgate=gate_parameters, cell=cell_parameters, outgate=gate_parameters, # We'll learn the initialization and use gradient clipping learn_init=True, grad_clipping=5.) l_out = DenseLayer(l_lstm, 26, nonlinearity=las.nonlinearities.softmax) print(l_out.output_shape) return l_out
dbn = load_pretrained_layers()
input = T.matrix('input')network = build_model(dbn, input, 250)prediction = las.layers.get_output(network)Is it possible to implement something like this in Lasagne?
Thanks for the advice, so in this case, I presume I can't use variable batch size and sequence len as the reshapeLayers complains
ValueError: `shape` input references must be single-element lists of int >= 0when I use some thing like (None, None, 1200).
dbn_layers = dbn.get_all_layers() weights = [] biases = [] weights.append(dbn_layers[1].W) weights.append(dbn_layers[2].W) weights.append(dbn_layers[3].W) weights.append(dbn_layers[4].W) biases.append(dbn_layers[1].b) biases.append(dbn_layers[2].b) biases.append(dbn_layers[3].b) biases.append(dbn_layers[4].b)
l_in = InputLayer(input_shape, input_var, 'input') l_mask = InputLayer(mask_shape, mask_var)
symbolic_batchsize = l_in.input_var.shape[0] symbolic_seqlen = l_in.input_var.shape[1]
print(las.layers.get_output_shape(l_in)) l_reshape1 = ReshapeLayer(l_in, (-1, input_shape[-1])) print(las.layers.get_output_shape(l_reshape1)) l_encoder = create_encoder(weights, biases, l_reshape1) print(las.layers.get_output_shape(l_encoder)) l_reshape2 = ReshapeLayer(l_encoder, (symbolic_batchsize, symbolic_seqlen, -1)) print(las.layers.get_output_shape(l_reshape2))
gate_parameters = Gate( W_in=las.init.Orthogonal(), W_hid=las.init.Orthogonal(), b=las.init.Constant(0.)) cell_parameters = Gate( W_in=las.init.Orthogonal(), W_hid=las.init.Orthogonal(), # Setting W_cell to None denotes that no cell connection will be used. W_cell=None, b=las.init.Constant(0.), # By convention, the cell nonlinearity is tanh in an LSTM. nonlinearity=tanh)
N_HIDDEN = lstm_size l_lstm = LSTMLayer( l_reshape2, N_HIDDEN, # We need to specify a separate input for masks mask_input=l_mask, # Here, we supply the gate parameters for each gate ingate=gate_parameters, forgetgate=gate_parameters, cell=cell_parameters, outgate=gate_parameters, # We'll learn the initialization and use gradient clipping learn_init=True, grad_clipping=5.) print(las.layers.get_output_shape(l_lstm)) l_forward_slice = SliceLayer(l_lstm, -1, 1) print(las.layers.get_output_shape(l_forward_slice))
# Now, we can apply feed-forward layers as usual. # We want the network to predict a classification for the sequence, # so we'll use a the number of classes. l_out = DenseLayer( l_forward_slice, num_units=26, nonlinearity=las.nonlinearities.softmax) print(las.layers.get_output_shape(l_out))
return l_out(None, None, 1200)(None, 1200)(None, 50)(None, None, None)Traceback (most recent call last): File "/Applications/PyCharm.app/Contents/helpers/pydev/pydevd.py", line 1531, in <module> globals = debugger.run(setup['file'], None, None, is_module) File "/Applications/PyCharm.app/Contents/helpers/pydev/pydevd.py", line 938, in run pydev_imports.execfile(file, globals, locals) # execute the script File "/Users/zu/school/Individual Project/code/lasagne/avletters/end_to_end.py", line 741, in <module> main() File "/Users/zu/school/Individual Project/code/lasagne/avletters/end_to_end.py", line 672, in main (None, None), mask, 250) File "/Users/zu/school/Individual Project/code/lasagne/avletters/end_to_end.py", line 183, in create_end_to_end_model learn_init=True, grad_clipping=5.) File "/Users/zu/school/Individual Project/code/lasagne/examples/src/lasagne/lasagne/layers/recurrent.py", line 862, in __init__ self.nonlinearity_ingate) = add_gate_params(ingate, 'ingate') File "/Users/zu/school/Individual Project/code/lasagne/examples/src/lasagne/lasagne/layers/recurrent.py", line 852, in add_gate_params name="W_in_to_{}".format(gate_name)), File "/Users/zu/school/Individual Project/code/lasagne/examples/src/lasagne/lasagne/layers/base.py", line 213, in add_param param = utils.create_param(spec, shape, name) File "/Users/zu/school/Individual Project/code/lasagne/examples/src/lasagne/lasagne/utils.py", line 302, in create_param "Tried to create param with shape=%r, name=%r") % (shape, name))ValueError: Cannot create param with a non-positive shape dimension. Tried to create param with shape=(None, 250), name='W_in_to_ingate'Exception TypeError: TypeError("'NoneType' object is not callable",) in <function _remove at 0x100774758> ignoredI replaced the last dimension by initializing the reshape with the compressed dimension, this allowed the LSTM layer to be constructed.