I am trying to get a modified version of the mnist example working on some test data. I am trying to approximate a nonlinear function, so the input would be a vector of length 64, and the output would be a single value. I can't quite figure out how the data dimensions are not working out. This is the output I get when I try to run:
/usr/local/lib/python2.7/dist-packages/theano/tensor/signal/downsample.py:6: UserWarning: downsample module has been moved to the theano.tensor.signal.pool module.
"downsample module has been moved to the theano.tensor.signal.pool module.")
Loading data...
(3000, 64)
(3000,)
(572, 64)
(572,)
float32
float32
float32
float32
Data Loaded
Building model and compiling functions...
Starting training...
Traceback (most recent call last):
File "FF_net3.py", line 173, in <module>
main(**kwargs)
File "FF_net3.py", line 123, in main
train_err += train_fn(inputs, targets)
File "/usr/local/lib/python2.7/dist-packages/theano/compile/function_module.py", line 871, in __call__
storage_map=getattr(self.fn, 'storage_map', None))
File "/usr/local/lib/python2.7/dist-packages/theano/gof/link.py", line 314, in raise_with_op
reraise(exc_type, exc_value, exc_trace)
File "/usr/local/lib/python2.7/dist-packages/theano/compile/function_module.py", line 859, in __call__
outputs = self.fn()
ValueError: Input dimension mis-match. (input[0].shape[1] = 1, input[1].shape[1] = 500)
Apply node that caused the error: Elemwise{sub,no_inplace}(SoftmaxWithBias.0, InplaceDimShuffle{x,0}.0)
Toposort index: 43
Inputs types: [TensorType(float32, matrix), TensorType(int32, row)]
Inputs shapes: [(500, 1), (1, 500)]
Inputs strides: [(4, 4), (2000, 4)]
Inputs values: ['not shown', 'not shown']
Outputs clients: [[Elemwise{Composite{Cast{float32}(((i0 * i1) / i2))}}(TensorConstant{(1, 1) of 2.0}, Elemwise{sub,no_inplace}.0, Elemwise{mul,no_inplace}.0), Elemwise{Sqr}[(0, 0)](Elemwise{sub,no_inplace}.0)]]
Backtrace when the node is created(use Theano flag traceback.limit=N to make it longer):
File "FF_net3.py", line 173, in <module>
main(**kwargs)
File "FF_net3.py", line 93, in main
loss = lasagne.objectives.squared_error(prediction, target_var)
File "/usr/local/lib/python2.7/dist-packages/lasagne/objectives.py", line 152, in squared_error
return (a - b)**2
HINT: Use the Theano flag 'exception_verbosity=high' for a debugprint and storage map footprint of this apply node.
And this is the code I use:
#!/usr/bin/env python
from __future__ import print_function
import sys
import os
import time
import numpy as np
import theano
import theano.tensor as T
import lasagne
theano.config.floatX = 'float32'
#theano.config.compute_test_value = 'warn'
def build_custom_mlp(input_var=None, depth=3, width=1024, drop_input=0,
drop_hidden=.5):
# By default, this creates the same network as `build_mlp`, but it can be
# customized with respect to the number and size of hidden layers. This
# mostly showcases how creating a network in Python code can be a lot more
# flexible than a configuration file. Note that to make the code easier,
# all the layers are just called `network` -- there is no need to give them
# different names if all we return is the last one we created anyway; we
# just used different names above for clarity.
# Input layer and dropout (with shortcut `dropout` for `DropoutLayer`):
network = lasagne.layers.InputLayer(shape=(None, 64),
input_var=input_var)
if drop_input:
network = lasagne.layers.dropout(network, p=drop_input)
# Hidden layers and dropout:
nonlin = lasagne.nonlinearities.rectify
for _ in range(depth):
network = lasagne.layers.DenseLayer(
network, width, nonlinearity=nonlin)
if drop_hidden:
network = lasagne.layers.dropout(network, p=drop_hidden)
# Output layer:
softmax = lasagne.nonlinearities.softmax
network = lasagne.layers.DenseLayer(network, 1, nonlinearity=softmax)
return network
def iterate_minibatches(inputs, targets, batchsize, shuffle=False):
assert len(inputs) == len(targets)
if shuffle:
indices = np.arange(len(inputs))
np.random.shuffle(indices)
for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):
if shuffle:
excerpt = indices[start_idx:start_idx + batchsize]
else:
excerpt = slice(start_idx, start_idx + batchsize)
yield inputs[excerpt], targets[excerpt]
def main(num_epochs=500):
# Load the dataset
print("Loading data...")
inputVectors=np.load('trainingData.npy')
inputResults=np.load('trainingScores.npy')
iVtrain=inputVectors[0:3000,:]
iRtrain=inputResults[0:3000]
iVtest=inputVectors[3000:3572,:]
iRtest=inputResults[3000:3572]
print(iVtrain.shape)
print(iRtrain.shape)
print(iVtest.shape)
print(iRtest.shape)
print(iVtrain.dtype)
print(iRtrain.dtype)
print(iVtest.dtype)
print(iRtest.dtype)
print("Data Loaded")
# iRtrain.astype(int)
# iRtest.astype(int)
iRtrain.reshape(3000,1)
iRtest.reshape(572,1)
# Prepare Theano variables for inputs and targets
# input_var = T.tensor4('inputs')
input_var = T.matrix('inputs')
target_var = T.ivector('targets')
# Create neural network model (depending on first command line parameter)
print("Building model and compiling functions...")
FF_NN=build_custom_mlp(input_var)
prediction = lasagne.layers.get_output(FF_NN)
loss = lasagne.objectives.squared_error(prediction, target_var)
loss = loss.mean()
params = lasagne.layers.get_all_params(FF_NN, trainable=True)
updates = lasagne.updates.nesterov_momentum(
loss, params, learning_rate=0.01, momentum=0.9)
test_prediction = lasagne.layers.get_output(FF_NN, deterministic=True)
test_loss = lasagne.objectives.squared_error(test_prediction,target_var)
test_loss = test_loss.mean()
test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
dtype=theano.config.floatX)
train_fn = theano.function([input_var, target_var], loss, updates=updates, allow_input_downcast=True)
# train_fn = theano.function([input_var, target_var], loss, updates=updates, allow_input_downcast=True, mode='DebugMode')
# Compile a second function computing the validation loss and accuracy:
val_fn = theano.function([input_var, target_var], [test_loss, test_acc])
# Finally, launch the training loop.
print("Starting training...")
# We iterate over epochs:
for epoch in range(num_epochs):
# In each epoch, we do a full pass over the training data:
train_err = 0
train_batches = 0
start_time = time.time()
for batch in iterate_minibatches(iVtrain, iRtrain, 500, shuffle=True):
inputs, targets = batch
train_err += train_fn(inputs, targets)
train_batches += 1
# And a full pass over the validation data:
val_err = 0
val_acc = 0
val_batches = 0
for batch in iterate_minibatches(iVtest, iRtest, 500, shuffle=False):
inputs, targets = batch
err, acc = val_fn(inputs, targets)
val_err += err
val_acc += acc
val_batches += 1
# Then we print the results for this epoch:
print("Epoch {} of {} took {:.3f}s".format(
epoch + 1, num_epochs, time.time() - start_time))
print(" training loss:\t\t{:.6f}".format(train_err / train_batches))
print(" validation loss:\t\t{:.6f}".format(val_err / val_batches))
print(" validation accuracy:\t\t{:.2f} %".format(
val_acc / val_batches * 100))
np.savez('FF_OverUnder_model1.npz', *lasagne.layers.get_all_param_values(FF_NN))
# Optionally, you could now dump the network weights to a file like this:
# np.savez('model.npz', *lasagne.layers.get_all_param_values(network))
#
# And load them again later on like this:
# with np.load('model.npz') as f:
# param_values = [f['arr_%d' % i] for i in range(len(f.files))]
# lasagne.layers.set_all_param_values(network, param_values)
if __name__ == '__main__':
if ('--help' in sys.argv) or ('-h' in sys.argv):
print("Trains a neural network on MNIST using Lasagne.")
print("Usage: %s [MODEL [EPOCHS]]" % sys.argv[0])
print()
print("MODEL: 'mlp' for a simple Multi-Layer Perceptron (MLP),")
print(" 'custom_mlp:DEPTH,WIDTH,DROP_IN,DROP_HID' for an MLP")
print(" with DEPTH hidden layers of WIDTH units, DROP_IN")
print(" input dropout and DROP_HID hidden dropout,")
print(" 'cnn' for a simple Convolutional Neural Network (CNN).")
print("EPOCHS: number of training epochs to perform (default: 500)")
else:
kwargs = {}
if len(sys.argv) > 1:
kwargs['model'] = sys.argv[1]
if len(sys.argv) > 2:
kwargs['num_epochs'] = int(sys.argv[2])
main(**kwargs)
Any thoughts? Thanks!