To teach myself how to use Torch7 and Lua, I am trying to apply Torch7 to the "Digit Recognizer" problem from Kaggle. Just for background, this uses the MNIST data in which the training data is given as a 42000 x 785 csv file, where there are 42000 examples, of which the first column is the label, and the remaining 784 items in the row are the 28 x 28 image.
I have mainly modified the convolution neural network that was given in the example program to reflect the fact that I am using a 28 x 28 input image rather than a 32 x 32 input image.
I am stumped and not sure what to make of this. I would really appreciate help in figuring out what I am doing wrong. Thank you all very much.
trainset = torch.load('train_mnist.th7')
trainsetLabels = torch.load('train_label_mnist.th7')
trainData = trainset[{ {1,40000}, {}, {}, {} }]
trainLabel = trainsetLabels[{ {1,40000} }]
validData = trainset[{ {40001, 42000}, {}, {}, {} }]
validLabel = trainsetLabels[{ {40001, 42000} }]
-- Preparing training data for use with nn.StochasticGradient
-- nn.StochasticGradient requires that the training set have an index
setmetatable(train, {__index = function(t, i) return { t.data[i], t.label[i] } end} );
setmetatable(validate, {__index = function(t, i) return { t.data[i], t.label[i] } end} );
function validate:size() return self.data:size(1) end
classes = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9'}
-- Preprocessing training data
mean[i] = train.data[{ {i}, {}, {}, {} }]:mean()
train.data[{ {i}, {}, {}, {} }]:add(-mean[i])
stdv[i] = train.data[{ {i}, {}, {}, {} }]:std()
train.data[{ {i}, {}, {}, {} }]:div(stdv[i])
-- Preprocessing validation data
mean[i] = validate.data[{ {i}, {}, {}, {} }]:mean()
validate.data[{ {i}, {}, {}, {} }]:add(-mean[i])
stdv[i] = validate.data[{ {i}, {}, {}, {} }]:std()
validate.data[{ {i}, {}, {}, {} }]:div(stdv[i])
-- 1 x 28 x 28 -> 6 x 24 x 24
net:add(nn.SpatialConvolution(1, 6, 5, 5))
-- 6 x 24 x 24 -> 6 x 12 x 12
net:add(nn.SpatialMaxPooling(2, 2, 2, 2))
-- 6 x 12 x 12 -> 16 x 8 x 8
net:add(nn.SpatialConvolution(6, 16, 5, 5))
-- 16 x 8 x 8 -> 16 x 4 x 4
net:add(nn.SpatialMaxPooling(2, 2, 2, 2))
net:add(nn.View(16 * 4 * 4))
net:add(nn.Linear(16 * 4 * 4, 120))
net:add(nn.Linear(120, 84))
net:add(nn.Linear(84, #classes))
criterion = nn.ClassNLLCriterion()
trainer = nn.StochasticGradient(net, criterion)
trainer.learningRate = 0.001
local groundtruth = validate.label[i]
local prediction = net:forward(validate.data[i])
-- true here means sorting in descending order
local confidences, indices = torch.sort(prediction, true)
if groundtruth == indices[1] then