When I train a network on a server, it freezes (after it prints ### Epoch###). (Ctrl+C does not even work)
I rebooted the server, and the problem remains.
import matplotlib
matplotlib.use('Agg')
import lasagne
from vgg16_solution import build_model
import pickle
from load_data_solution import batch_gen
import theano.tensor as T
import theano
import numpy as np
import time,sys
import matplotlib
matplotlib.use('Agg')
import os
os.environ['THEANO_FLAGS']='floatX=float32,device=gpu0,nvcc.fastmath=True'
from lasagne.layers import InputLayer, DenseLayer, NonlinearityLayer,DropoutLayer
import theano
from lasagne.layers import Conv2DLayer as ConvLayer
from lasagne.layers import Pool2DLayer as PoolLayer
from lasagne.nonlinearities import softmax
from collections import OrderedDict
from nolearn.lasagne import NeuralNet
from nolearn.lasagne import TrainSplit
from nolearn.lasagne import objective
print 'Building model...'
#build net model using vgg16 architecture (changed to have 5 outputs rather than 1000)
#net_vgg16=build_model()
print 'Loading weights...'
#load pretrained weights for all layers before output layer
model = pickle.load(open('vgg16.pkl'))
pretrained_weights = model['param values'][:-2] # drop W, b for Imagenet 1000 class softmax
#lasagne.layers.set_all_param_values(net_vgg16['fc7'], pretrained_weights)
#store maximal number of instances
#N_instances= 30 #maximal number of images in one bag. average number is 10.7, and median is 11.
#N_instances=2
print 'building the modified net...'
#My modified net: #add dropout later
net = OrderedDict()
net['input'] = InputLayer((None, 3, 224, 224))
net['conv1_1'] = ConvLayer(net['input'], 64, 3, pad=1)
net['conv1_2'] = ConvLayer(net['conv1_1'], 64, 3, pad=1)
net['pool1'] = PoolLayer(net['conv1_2'], 2)
net['conv2_1'] = ConvLayer(net['pool1'], 128, 3, pad=1)
net['conv2_2'] = ConvLayer(net['conv2_1'], 128, 3, pad=1)
net['pool2'] = PoolLayer(net['conv2_2'], 2)
net['conv3_1'] = ConvLayer(net['pool2'], 256, 3, pad=1)
net['conv3_2'] = ConvLayer(net['conv3_1'], 256, 3, pad=1)
net['conv3_3'] = ConvLayer(net['conv3_2'], 256, 3, pad=1)
net['pool3'] = PoolLayer(net['conv3_3'], 2)
net['conv4_1'] = ConvLayer(net['pool3'], 512, 3, pad=1)
net['conv4_2'] = ConvLayer(net['conv4_1'], 512, 3, pad=1)
net['conv4_3'] = ConvLayer(net['conv4_2'], 512, 3, pad=1)
net['pool4'] = PoolLayer(net['conv4_3'], 2)
net['conv5_1'] = ConvLayer(net['pool4'], 512, 3, pad=1)
net['conv5_2'] = ConvLayer(net['conv5_1'], 512, 3, pad=1)
net['conv5_3'] = ConvLayer(net['conv5_2'], 512, 3, pad=1)
net['pool5'] = PoolLayer(net['conv5_3'], 2)
net['fc6'] = DenseLayer(net['pool5'], num_units=4096)
net['fc7'] = DenseLayer(net['fc6'], num_units=4096)
#Merge upper layers of each instance by taking the maximum (refinement:replace maximum with maximum over random subset: dropout on instances)
#net['fc_merged'] = lasagne.layers.ElemwiseMergeLayer( [net['fc7'+ str(n)] for n in range(N_instances) ], theano.tensor.maximum)
#Add dense layer with 4 outputs, for the 4 labels: 'plants','window','grass','lake'.
net['fc8'] = DenseLayer(net['fc7'], num_units=10, nonlinearity=None)
net['prob'] = NonlinearityLayer(net['fc8'], softmax)
print 'modified net built!'
#end of modified net
print 'pre-training the modified net...'
# pre-training
#for n in range(N_instances):
lasagne.layers.set_all_param_values(net['fc7'], pretrained_weights)
#print 'end of pre-training!'
#define tensor variables for network input and labels
#X = {'input'+ str(n): T.tensor4('x') for n in range(N_instances)}
X= T.tensor4('x')
y = T.imatrix('y')
#get output of network in terms of tensor X
output_layer=net['prob']
output = lasagne.layers.get_output(output_layer, X)
#get output with dropuut switched off
output_deterministic = lasagne.layers.get_output(output_layer,X , deterministic=True)
#prediction
pred = T.argmax(output_deterministic, axis=1)
accuracy = T.mean(T.eq(pred, T.argmax(y, axis=1)), dtype=theano.config.floatX)
# Create a loss expression for training, i.e., a scalar objective we want
# to minimize (for our multi-class problem, it is the cross-entropy loss):
train_loss = lasagne.objectives.categorical_crossentropy(output, y)
train_loss = lasagne.objectives.aggregate(train_loss, mode='mean')
# Create update expressions for training, i.e., how to modify the
# parameters at each training step. Here, we'll use Stochastic Gradient
# Descent (SGD) with Nesterov momentum, but Lasagne offers plenty more.
params = lasagne.layers.get_all_params(output_layer, trainable=True)
updates = lasagne.updates.nesterov_momentum(train_loss, params, learning_rate=0.001, momentum=0.9)
t0=time.time()
print 'Compiling train function...',
sys.stdout.flush()
# Compile a function performing a training step on a mini-batch (by giving
# the updates dictionary) and returning the corresponding training loss:
iter_train = theano.function([X,y],
[train_loss,accuracy],
updates=updates
)
print 'Done [{} s]'.format(time.time()-t0)
#loop through data and train network
n_epochs=10
batch_size=32
for epoch in range(n_epochs):
print '#########'
print ' Epoch ',epoch
print '#########'
batches=batch_gen('train.txt',batch_size)
for i,batch in enumerate(batches):
x_i,y_i=batch
loss,acc=iter_train(x_i,y_i)
print 'Iteration {0}, train loss: {1}, train accuracy: {2}'.format(i,loss,acc)