class RasmusInit(lasagne.init.Initializer):
"""Sample initial weights from the Gaussian distribution.
Initial weight parameters are sampled from N(mean, std).
Parameters
https://github.com/arasmus/ladder
----------
std : float
Std of initial parameters.
mean : float
Mean of initial parameters.
"""
def __init__(self, std=1.0, mean=0.0):
self.std = std
self.mean = mean
# std one should reproduce rasmus init...
def sample(self, shape):
return lasagne.utils.floatX(lasagne.random.get_rng().normal(
self.mean, self.std, size=shape) /
np.sqrt(shape[0]))
filename_script = os.path.basename(os.path.realpath(__file__))
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("-lr", type=str, default='0.0001')
parser.add_argument("-optimizer", type=str, default='rmsprop')
parser.add_argument("-init", type=str, default='None')
parser.add_argument("-initval", type=str, default='None')
parser.add_argument("-gradclip", type=str, default='1')
args = parser.parse_args()
num_classes = 2
batch_size = 100 # fails if batch_size != batch_size
np.random.seed(1234) # reproducibility
#----------------------------------------------------------------------
output_folder = os.path.join("results", os.path.splitext(filename_script)[0] + str(uuid.uuid4())[:18].replace('-', '_'))
if not os.path.exists(output_folder):
os.makedirs(output_folder)
output_file = os.path.join(output_folder, 'results.log')
with open(output_file, 'wb') as f:
f.write("#"*80 + "\n")
for name, val in sorted(vars(args).items()):
s = str(name) + " "*(40-len(name)) + str(val)
f.write(s + "\n")
f.write("#"*80 + "\n")
#-----------------------------------------------------------------------
optimizers = {'adam': lasagne.updates.adam,
'adadelta': lasagne.updates.adadelta,
'rmsprop': lasagne.updates.rmsprop,
'sgd': lasagne
.updates.sgd,
'nag': lasagne.updates.nesterov_momentum
}
optimizer = lasagne.updates.adam #optimizers[args.optimizer]
if args.init == 'None': # default to antti rasmus init
init = RasmusInit()
else:
if args.initval != 'None':
# if `-initval` is not `'None'` use it as first argument to Lasange initializer
initargs = [float(args.initval)]
else:
# use default arguments for Lasange initializers
initargs = []
inits = {'he': lasagne.init.HeUniform(*initargs),
'glorot': lasagne.init.GlorotUniform(*initargs),
'uniform': lasagne.init.Uniform(*initargs),
'normal': lasagne.init.Normal(*initargs)}
init = inits[args.init]
if args.gradclip == 'None':
gradclip = None
else:
gradclip = float(args.gradclip)
unit = lasagne.nonlinearities.leaky_rectify
lasagne.random.set_rng(np.random.RandomState(seed=1))
num_classes = 2
num_inputs = 784
lr = float(args.lr)
noise = 0.02
num_epochs = 300
start_decay = 20
# generate symbolic variables for input (x and y represent a
# minibatch)
sym_x = T.tensor4('sym_x', dtype='float32')
sym_t = T.ivector('sym_t')
sh_lr = theano.shared(lasagne.utils.floatX(lr))
z_pre0 = InputLayer(shape=(None, 1,28,28))
z0 = z_pre0 # for consistency with other layers
z_noise0 = GaussianNoiseLayer(z0, sigma=noise, name='enc_noise0')
h0 = z_noise0 # no nonlinearity on input
def create_encoder_conv(incoming, num_conv, convsize, layer_num):
i = layer_num
z_pre_1 = Conv2DLayer(incoming, num_conv, convsize, nonlinearity=lasagne.nonlinearities.leaky_rectify)
z_pre= lasagne.layers.dropout(z_pre_1, p=.5)
norm_list = batch_norm(
z_pre, name='enc_normalize%i' % i)
z = norm_list
z_noise = GaussianNoiseLayer(z, sigma=noise, name='enc_noise%i' % i)
h = NonlinearityLayer(z_noise,
nonlinearity=unit, name='enc_nonlin%i' % i)
return h, z, z_noise, norm_list, z_pre
def create_encoder_pool(incoming, pool_size, layer_num):
i = layer_num
z_pre = MaxPool2DLayer(incoming, pool_size)
norm_list = batch_norm(
z_pre, name='enc_normalize%i' % i)
z = norm_list
z_noise = GaussianNoiseLayer(z, sigma=noise, name='enc_noise%i' % i)
h = NonlinearityLayer(z_noise,
nonlinearity=unit, name='enc_nonlin%i' % i)
return h, z, z_noise, norm_list , z_pre
def create_encoder_dense(incoming, num_units, layer_num):
i = layer_num
temp_l = reshape(incoming, ([0], 100,1))
z_pre_1 = DenseLayer(
incoming=temp_l, num_units=num_units, nonlinearity=lasagne.nonlinearities.leaky_rectify, b=None,
name='enc_dense%i' % i, W=init)
z_pre = lasagne.layers.dropout(z_pre_1, p=.5)
norm_list = batch_norm(
z_pre, name='enc_normalize%i' % i)
z = norm_list
z_noise = GaussianNoiseLayer(z, sigma=noise, name='enc_noise%i' % i)
h = NonlinearityLayer(z_noise,
nonlinearity=unit, name='enc_nonlin%i' % i)
return h, z, z_noise, norm_list
def create_encoder(incoming, num_units, layer_num):
i = layer_num
z_pre_1 = DenseLayer(incoming, num_units=num_units, nonlinearity=lasagne.nonlinearities.leaky_rectify, b=None,
name='enc_dense%i' % i, W=init)
z_pre = lasagne.layers.dropout(z_pre_1, p=.5)
norm_list = batch_norm(
z_pre, name='enc_normalize%i' % i)
z = norm_list
z_noise = GaussianNoiseLayer(z, sigma=noise, name='enc_noise%i' % i)
h = NonlinearityLayer(z_noise,
nonlinearity=unit, name='enc_nonlin%i' % i)
return h, z, z_noise, norm_list
def create_encoder_final(incoming, num_units, layer_num):
i = layer_num
z_pre_1 = DenseLayer(incoming, num_units=num_units, nonlinearity=lasagne.nonlinearities.leaky_rectify, b=None,
name='enc_dense%i' % i, W=init)
z_pre = lasagne.layers.dropout(z_pre_1, p=.5)
norm_list = batch_norm(
z_pre, name='enc_normalize%i' % i)
z = norm_lis
z_noise = GaussianNoiseLayer(z, sigma=noise, name='enc_noise%i' % i)
h = NonlinearityLayer(z_noise,
nonlinearity=softmax, name='enc_nonlin%i' % i)
return h, z, z_noise, norm_list
h1, z1, z_noise1, norm_list1 , conv1 = create_encoder_conv(
incoming=h0, num_conv=20, convsize=(5,5), layer_num=1)
h2, z2, z_noise2, norm_list2 , pool2 = create_encoder_pool(
incoming=h1, pool_size=(2,2), layer_num=2)
h3, z3, z_noise3, norm_list3, conv3 = create_encoder_conv(
incoming=h2, num_conv=60, convsize=(4,4), layer_num=3)
h4, z4, z_noise4, norm_list4 , pool4= create_encoder_pool(
incoming=h3, pool_size=(2,2), layer_num=4)
h5, z5, z_noise5, norm_list5 , conv5 = create_encoder_conv(
incoming=h4, num_conv=100, convsize=(3,3), layer_num=5)
h6, z6, z_noise6, norm_list6 , pool6= create_encoder_pool(
incoming=h5, pool_size=(2,2), layer_num=6)
h7, z7, z_noise7, norm_list7 = create_encoder_dense(
h6, num_units=500, layer_num=7)
h8, z8, z_noise8, norm_list8 = create_encoder(
h7, num_units=50, layer_num=8)
h9, z9, z_noise9, norm_list9 = create_encoder_final(
h8, num_units=2, layer_num=9)
#Output of network
l_out_enc = h9
#Train
enc_out_clean_train= lasagne.layers.get_output(
l_out_enc, sym_x, deterministic=False)
#Test and validation
enc_out_clean = lasagne.layers.get_output(
l_out_enc, sym_x, deterministic=True)
from lasagne.regularization import regularize_layer_params_weighted, l2, l1
#train loss
costs = T.mean(T.nnet.categorical_crossentropy(enc_out_clean_train, sym_t))
layers = {h1: 0.10 , h2: 0.1, h3: 0.1, h4: 0.1, h5: 0.1, h6: 0.1, h7: 0.1, h8: 0.1, h9: 0.5}
l2_penalty = regularize_layer_params_weighted(layers, l2)
l1_penalty = regularize_layer_params(h9, l1) * 1e-4
costs = costs + l2_penalty + l1_penalty
# prediction passes
# Get list of all trainable parameters in the network.
all_params = lasagne.layers.get_all_params(h9, trainable=True)
print ""*20 + "PARAMETERS" + "-"*20
for p in all_params:
print p.name, p.get_value().shape
print "-"*60
if gradclip is not None:
all_grads = [T.clip(g, -gradclip, gradclip)
for g in T.grad(costs, all_params)]
else:
all_grads = T.grad(costs, all_params)
updates = optimizer(all_grads, all_params, learning_rate=sh_lr)
#Evaluation loss
costs_eval= T.mean(T.nnet.categorical_crossentropy(enc_out_clean, sym_t))
f_clean = theano.function([sym_x, sym_t], [enc_out_clean, costs_eval]
,on_unused_input='warn')
f_train = theano.function([sym_x, sym_t],
[costs, enc_out_clean_train],
updates=updates, on_unused_input='warn')
train_acc, train_loss = [], []
losses = []
def train_epoch_semisupervised(x, confusion_train):
x_batch=x
output = f_train(x_batch,x_train_label)
batch_loss, net_out = output[0], output[1]
net_out = net_out[:num_labels]
preds = np.argmax(net_out, axis=-1)
confusion_train.batchadd(preds, x_train_label)
losses = batch_loss
return confusion_train, losses
def valid_epoch(x, y,confusion_x,k):
net_out = f_clean(x,y)
preds = np.argmax(net_out[0], axis=-1)
confusion_x.batchadd(preds, y)
valid_batch_loss= net_out[1]
loss = valid_batch_loss
auc= roc_auc_score(y, preds)
return confusion_x, auc,loss
def test_epoch(x, y,confusion_x,l):
net_out = f_clean(x,y)
preds = np.argmax(net_out[0], axis=-1)
confusion_x.batchadd(preds, y)
test_batch_loss= net_out[1]
loss = test_batch_loss
auc= roc_auc_score(y, preds)
return confusion_x, auc,loss
import os.path
import pandas as pd
import pickle as pk
import timeit
import numpy as np
from operator import itemgetter
from sklearn.metrics import roc_auc_score
train_loss_array = np.zeros(shape=(300,1))
train_acc_array = np.zeros(shape=(300,1))
test_acc_array = np.zeros(shape=(300,1))
BATCH_SIZE = 100
val_auc = np.zeros(num_epochs)
for epoch in range(num_epochs):
train_acc_cur_sum =0
test_acc_cur_sum = 0
train_losses = []
test_losses = []
valid_losses = []
valid_AUCs = []
test_AUCs = []
test_AUCs_1 = []
test_AUCs_2 = []
test_AUCs_3 = []
#--------------------Train dataset
train_labeled = np.memmap('/home/user/train_716000', dtype='float32', mode='r',
shape=(716000, 784))
targets = np.memmap('/home/user/train_label_716000', dtype='int32', mode='r',shape=(716000, 1))
mean_ds=np.mean(train_labeled, axis=0)
var_ds=np.std(train_labeled, axis=0)
train_labeled = (train_labeled - mean_ds) / var_ds
print("train mean", np.mean(train_labeled), "train std", np.std(train_labeled))
#-------------------Validation dataset
valid = np.memmap('/home/user/validation_583000', dtype='float32', mode='r',
shape=(583000, 784))
valid_targets = np.memmap('/home/user/validation_label_5830000', dtype='int32', mode='r',
shape=(583000, 1))
valid = (valid - mean_ds) / var_ds
print("valid mean", np.mean(valid), "valid std", np.std(valid))
#------------------Test dataset
test = np.memmap('/home/user/test_label__35200', dtype='float32',
mode='r', shape=(35200, 784))
test_targets = np.memmap('test_label__35200', dtype='int32',
mode='r', shape=(35200, 1))
test = (test - mean_ds) / var_ds
print("test mean", np.mean(test), "test std", np.std(test))
time_start1 = timeit.default_timer()
train_idxs = [i for i in range(train_labeled.shape[0])]
np.random.shuffle(train_idxs)
num_batches_train = 7000
def next_batch(start, train, labels, ds_idxs, batch_size=100):
newstart = start + batch_size
if newstart > train.shape[0]:
newstart = 0
idxs = ds_idxs[start:start + batch_size]
temp = labels[idxs, :]
return train[idxs, :], newstart ,temp
#--------Training-----------------------------------------------
confusion_train = parmesan.utils.ConfusionMatrix(num_classes)
time_start2 = timeit.default_timer()
for i in range(num_batches_train):
import random
print("labeled has been Done!")
x_train, newstart , x_train_label= next_batch(i * batch_size, train_labeled, targets, train_idxs, batch_size=100)
x_train = np.reshape(x_train, (100, 1, 28, 28))
x_train_label = np.reshape(x_train_label, (100,))
x_batch = x_train#np.reshape(x_batch, (200, 1,28,28))
confusion_train, batch_loss = train_epoch_semisupervised(x_batch, confusion_train)
train_losses += [batch_loss]
train_acc_cur = confusion_train.accuracy()
time_stop2 = timeit.default_timer()
train_time = time_start2 - time_stop2
time_start3 = timeit.default_timer()
###---------------Validation----------------------------------
def next_batch3(start, ds, labels, ds_idxs, batch_size=100):
newstart = start + batch_size
if newstart > ds.shape[0]:
newstart = 0
idxs = ds_idxs[start:start + batch_size]
temp = labels[idxs, :]
return ds[idxs, :], newstart ,temp
valid_losses = []
num_batches_valid = 2000
validation_idxs = [k for k in range(valid.shape[0])]
np.random.shuffle(validation_idxs)
valid_acc_cur_sum = 0
conf_valid = parmesan.utils.ConfusionMatrix(num_classes)
for j in range(num_batches_valid):
x_valid_tmp, newstart2_valid, y_valid = next_batch3(j * batch_size, valid, valid_targets, validation_idxs,
batch_size=100)
x_valid = np.reshape(x_valid_tmp, (100, 1, 28, 28))
y_valid = np.reshape(y_valid, (100,))
confusion_valid, valid_AUC, valid_cost = valid_epoch(x_valid, y_valid, conf_valid, x_batch)
valid_losses += [valid_cost]
valid_acc_cur = confusion_valid.accuracy()
time_stop3 = timeit.default_timer()
validation_time = time_start3 - time_stop3
### ----------------------Test---------------------------------------
def next_batch2(start, train, labels, ds_idxs, batch_size=100):
newstart = start + batch_size
if newstart > train.shape[0]:se
newstart = 0
idxs = ds_idxs[start:start + batch_size]
temp = labels[idxs, :]
return train[idxs, :], newstart, temp
num_batches_test = 352
test_idxs = [k for k in range(test.shape[0])]
np.random.shuffle(test_idxs)
test_acc_cur_sum = 0
conf_test = parmesan.utils.ConfusionMatrix(num_classes)
time_start4 = timeit.default_timer()
for j0 in range(num_batches_test):
x_test, newstart2, y_test = next_batch2(j0 * batch_size, test, test_targets, test_idxs, batch_size=100)
if x_test.shape == (100, 784):
x_test = np.reshape(x_test, (100, 1, 28, 28))
y_test = np.reshape(y_test, (100,))
confusion_test, test_AUC, test_cost = test_epoch(x_test, y_test, conf_test,x_batch)
test_losses += [test_cost]
test_AUCs += [test_AUC]
test_acc_cur = confusion_test.accuracy()
time_stop4 = timeit.default_timer()
test_time = time_stop4 - time_start4
total_time = time_stop4-time_start1
##---------------------------------------------------------------------------------------
if epoch > start_decay:
old_lr = sh_lr.get_value()
new_lr = old_lr - (lr/(num_epochs-start_decay))
sh_lr.set_value(lasagne.utils.floatX(new_lr))
s = (
"*EPOCH\t{}, \t{}, \t{}, \t{}, \t{}, \t{}, \t{}, \t{}, \t{}, \t{}, \t{}, \t{}"
).format(
#"test_AUC_1\t{}test_AUC_2\t{}test_AUC_3\t{}").format(
epoch, sh_lr.get_value(),
np.mean(train_losses), train_acc_cur
,np.mean(valid_losses), valid_acc_cur
,np.mean(test_losses), test_acc_cur, np.mean(test_AUCs)
,train_time, validation_time, test_time, total_time
)
print s
with open(output_file, 'a') as f:
f.write(s + "\n")
I need help to overcome overfitting. I use CNN to train 700,000 samples and test on 30,000 samples. My validation size is 200,000 though. The problem is not matter how much I decrease the learning rate I get overfitting. I mean the training loss decrease whereas validation loss and test loss increase! I would say from first epoch.
I have to mention that my test and validation dataset comes from different distribution and all three are from different source but similar shapes(all of them are same biological cell patch).
I got following result which is strange. How can I improve the results?
l1 and nonlinearity layers appeared after maxpooling layers.
So the last plots of loss and accuracy which I have uploaded before was for the revised edition of my network.I think you could even have added too much regularization. Can you please plot the different parts of your loss? "print theano.function([], l2_penalty()" , also for l1).
InputLayer(28,28)
-->Conv2DLayer(5,5)-->NonelinearityLayer(rectify)-->batchnorm-->Maxpool2DLayer(2,2)
-->Conv2DLayer(4,4)-->NonelinearityLayer(rectify)-->batchnorm-->Maxpool2DLayer(2,2)
-->Conv2DLayer(3,3)-->NonelinearityLayer(rectify)-->batchnorm-->Maxpool2DLayer(2,2)
-->DenseLayer(500)-->NonelinearityLayer(rectify)-->batchnorm-->dropout
-->DenseLayer(50)-->NonelinearityLayer(rectify)-->batchnorm-->dropout
-->DenseLayer(2)-->NonelinearityLayer(softmax)-->output
Yes I do use lasagne.nonlinearities.rectify NonlinearityLayer.
I have checked in last layer I have just Softmax layer. I will try your suggestion and update the results here. Thanks.
Then how about convolution layer? it has nonlinearity inside its diffinition too. Shall I set its nonlinearity to None or Identity as well? Because convolution Layer also followed by NonelinearityLayer.
✅🔴▶️▶ Really Amazing ️You Can Try This ◀️◀️🔴✅
🔴ALL>Movies✅ 📺 📱 💻 ✅ALL>Movies🔴
✅▶️▶️ CLICK HERE Full HD✅720p✅1080p✅4K✅
WATCH ✅💻📺📱👉https://co.fastmovies.org
ᗪOᗯᑎᒪOᗩᗪ ✅📺📱💻👉https://co.fastmovies.org
🔴WATCH>>ᗪOᗯᑎᒪOᗩᗪ>>HERE>👉https://co.fastmovies.org
✅WATCH>>ᗪOᗯᑎᒪOᗩᗪ>>HERE>👉https://co.fastmovies.org
💚WATCH>>ᗪOᗯᑎᒪOᗩᗪ>>HERE>👉https://co.fastmovies.org
💚WATCH>>ᗪOᗯᑎᒪOᗩᗪ>>HERE>👉https://co.fastmovies.org🔴ALL>Movies>ALL>TIME>Save>LINK👉https://co.fastmovies.org🔴ALL>Movies>ALL>TIME>Save>LINK👉https://co.fastmovies.org
🔴💻ALL>Movies>WATCH>ᗪOᗯᑎᒪOᗩᗪ>LINK>👉https://co.fastmovies.org🔴📱ALL>Movies>WATCH>ᗪOᗯᑎᒪOᗩᗪ>LINK>👉https://co.fastmovies.org
🔴📺ALL>Movies>WATCH>ᗪOᗯᑎᒪOᗩᗪ>LINK>👉https://co.fastmovies.org🔴📺ALL>Movies>WATCH>ᗪOᗯᑎᒪOᗩᗪ>LINK>👉https://co.fastmovies.org
🔴✅📺📱💻ALL>Movies>WATCH>ᗪOᗯᑎᒪOᗩᗪ>LINK>👉https://co.fastmovies.org🔴✅📺📱💻ALL>Movies>WATCH>ᗪOᗯᑎᒪOᗩᗪ>LINK>👉https://co.fastmovies.org🔴✅📺📱💻ALL>Movies>WATCH>ᗪOᗯᑎᒪOᗩᗪ>LINK>👉https://co.fastmovies.org🔴✅📺📱💻ALL>Movies>WATCH>ᗪOᗯᑎᒪOᗩᗪ>LINK>👉https://co.fastmovies.org🔴✅📺📱💻ALL>Movies>WATCH>ᗪOᗯᑎᒪOᗩᗪ>LINK>👉https://co.fastmovies.org🔴✅📺📱💻ALL>Movies>WATCH>ᗪOᗯᑎᒪOᗩᗪ>LINK>👉https://co.fastmovies.org🔴✅📺📱💻ALL>Movies>WATCH>ᗪOᗯᑎᒪOᗩᗪ>LINK>👉https://co.fastmovies.org🔴✅📺📱💻ALL>Movies>WATCH>ᗪOᗯᑎᒪOᗩᗪ>LINK>👉https://co.fastmovies.org🔴✅📺📱💻ALL>Movies>WATCH>ᗪOᗯᑎᒪOᗩᗪ>LINK>👉https://co.fastmovies.org🔴✅📺📱💻ALL>Movies>WATCH>ᗪOᗯᑎᒪOᗩᗪ>LINK>👉https://co.fastmovies.org🔴✅📺📱💻ALL>Movies>WATCH>ᗪOᗯᑎᒪOᗩᗪ>LINK>👉https://co.fastmovies.org🔴✅📺📱💻ALL>Movies>WATCH>ᗪOᗯᑎᒪOᗩᗪ>LINK>👉https://co.fastmovies.org🔴✅📺📱💻ALL>Movies>WATCH>ᗪOᗯᑎᒪOᗩᗪ>LINK>👉https://co.fastmovies.org🔴✅📺📱💻ALL>Movies>WATCH>ᗪOᗯᑎᒪOᗩᗪ>LINK>👉https://co.fastmovies.org🔴✅📺📱💻ALL>Movies>WATCH>ᗪOᗯᑎᒪOᗩᗪ>LINK>👉https://co.fastmovies.org🔴✅📺📱💻ALL>Movies>WATCH>ᗪOᗯᑎᒪOᗩᗪ>LINK>👉https://co.fastmovies.org🔴✅📺📱💻ALL>Movies>WATCH>ᗪOᗯᑎᒪOᗩᗪ>LINK>👉https://co.fastmovies.org🔴✅📺📱💻ALL>Movies>WATCH>ᗪOᗯᑎᒪOᗩᗪ>LINK>👉https://co.fastmovies.org🔴✅📺📱💻ALL>Movies>WATCH>ᗪOᗯᑎᒪOᗩᗪ>LINK>👉https://co.fastmovies.org🔴✅📺📱💻ALL>Movies>WATCH>ᗪOᗯᑎᒪOᗩᗪ>LINK>👉https://co.fastmovies.org🔴✅📺📱💻ALL>Movies>WATCH>ᗪOᗯᑎᒪOᗩᗪ>LINK>👉https://co.fastmovies.org🔴✅📺📱💻ALL>Movies>WATCH>ᗪOᗯᑎᒪOᗩᗪ>LINK>👉https://co.fastmovies.org