Overfitting after first epoch and increasing in loss & validation loss & training loss decreases

H

unread,

Feb 1, 2017, 5:49:42 AM2/1/17

to lasagne-users

Hey guys,

I need help to overcome overfitting. I use CNN to train 700,000 samples and test on 30,000 samples. My validation size is 200,000 though. The problem is not matter how much I decrease the learning rate I get overfitting. I mean the training loss decrease whereas validation loss and test loss increase! I would say from first epoch.
So I have added many dropout layer and added regularization term L2 and mean subtraction to normalize the datasets. I have to mention that my test and validation dataset comes from different distribution and all three are from different source but similar shapes(all of them are same biological cell patch). I put my network architecture. I hope someone can hep me what can I do to tackle overfitting!


class RasmusInit(lasagne.init.Initializer):
    """Sample initial weights from the Gaussian distribution.
    Initial weight parameters are sampled from N(mean, std).
    Parameters

    https://github.com/arasmus/ladder
    ----------
    std : float
        Std of initial parameters.
    mean : float
        Mean of initial parameters.
    """
    def __init__(self, std=1.0, mean=0.0):
        self.std = std
        self.mean = mean

    # std one should reproduce rasmus init...
    def sample(self, shape):
        return lasagne.utils.floatX(lasagne.random.get_rng().normal(
            self.mean, self.std, size=shape) /
                      np.sqrt(shape[0]))

filename_script = os.path.basename(os.path.realpath(__file__))

import argparse
parser = argparse.ArgumentParser()
parser.add_argument("-lr", type=str, default='0.0001')
parser.add_argument("-optimizer", type=str, default='rmsprop')
parser.add_argument("-init", type=str, default='None')
parser.add_argument("-initval", type=str, default='None')
parser.add_argument("-gradclip", type=str, default='1')
args = parser.parse_args()

num_classes = 2
batch_size = 100  # fails if batch_size != batch_size
np.random.seed(1234) # reproducibility
#----------------------------------------------------------------------
output_folder = os.path.join("results", os.path.splitext(filename_script)[0] + str(uuid.uuid4())[:18].replace('-', '_'))
if not os.path.exists(output_folder):
    os.makedirs(output_folder)
output_file = os.path.join(output_folder, 'results.log')
with open(output_file, 'wb') as f:
    f.write("#"*80 + "\n")
    for name, val in sorted(vars(args).items()):
        s = str(name) + " "*(40-len(name)) + str(val)
        f.write(s + "\n")
    f.write("#"*80 + "\n")
#-----------------------------------------------------------------------
optimizers = {'adam': lasagne.updates.adam,
              'adadelta': lasagne.updates.adadelta,
              'rmsprop': lasagne.updates.rmsprop,
              'sgd': lasagne
                  .updates.sgd,
              'nag': lasagne.updates.nesterov_momentum
              }
optimizer =  lasagne.updates.adam #optimizers[args.optimizer]

if args.init == 'None':  # default to antti rasmus init
    init = RasmusInit()
else:
    if args.initval != 'None':
        # if `-initval` is not `'None'` use it as first argument to Lasange initializer
        initargs = [float(args.initval)]
    else:
        # use default arguments for Lasange initializers
        initargs = []

    inits = {'he': lasagne.init.HeUniform(*initargs),
             'glorot': lasagne.init.GlorotUniform(*initargs),
             'uniform': lasagne.init.Uniform(*initargs),
             'normal': lasagne.init.Normal(*initargs)}
    init = inits[args.init]

if args.gradclip == 'None':
    gradclip = None
else:
    gradclip = float(args.gradclip)

unit = lasagne.nonlinearities.leaky_rectify
lasagne.random.set_rng(np.random.RandomState(seed=1))

num_classes = 2
num_inputs = 784
lr = float(args.lr)
noise = 0.02
num_epochs = 300
start_decay = 20

# generate symbolic variables for input (x and y represent a
# minibatch)
sym_x = T.tensor4('sym_x', dtype='float32')
sym_t = T.ivector('sym_t')
sh_lr = theano.shared(lasagne.utils.floatX(lr))


z_pre0 = InputLayer(shape=(None, 1,28,28))
z0 = z_pre0   # for consistency with other layers
z_noise0 = GaussianNoiseLayer(z0, sigma=noise, name='enc_noise0')
h0 = z_noise0  # no nonlinearity on input

def create_encoder_conv(incoming, num_conv, convsize, layer_num):
    i = layer_num
    z_pre_1 = Conv2DLayer(incoming, num_conv, convsize, nonlinearity=lasagne.nonlinearities.leaky_rectify)
    z_pre= lasagne.layers.dropout(z_pre_1, p=.5)

    norm_list = batch_norm(
        z_pre,  name='enc_normalize%i' % i)
    z = norm_list
    z_noise = GaussianNoiseLayer(z, sigma=noise, name='enc_noise%i' % i)
    h = NonlinearityLayer(z_noise,
        nonlinearity=unit, name='enc_nonlin%i' % i)
    return h, z, z_noise, norm_list, z_pre

def create_encoder_pool(incoming, pool_size, layer_num):
    i = layer_num
    z_pre = MaxPool2DLayer(incoming, pool_size)
    norm_list = batch_norm(
        z_pre, name='enc_normalize%i' % i)
    z = norm_list
    z_noise = GaussianNoiseLayer(z, sigma=noise, name='enc_noise%i' % i)
    h = NonlinearityLayer(z_noise,
        nonlinearity=unit, name='enc_nonlin%i' % i)
    return h, z, z_noise, norm_list , z_pre

def create_encoder_dense(incoming, num_units, layer_num):
    i = layer_num
    temp_l = reshape(incoming, ([0], 100,1))
    z_pre_1 = DenseLayer(
        incoming=temp_l, num_units=num_units, nonlinearity=lasagne.nonlinearities.leaky_rectify, b=None,
        name='enc_dense%i' % i, W=init)
    z_pre = lasagne.layers.dropout(z_pre_1, p=.5)
    norm_list = batch_norm(
        z_pre, name='enc_normalize%i' % i)
    z = norm_list
    z_noise = GaussianNoiseLayer(z, sigma=noise, name='enc_noise%i' % i)
    h = NonlinearityLayer(z_noise,
        nonlinearity=unit, name='enc_nonlin%i' % i)
    return h, z, z_noise, norm_list

def create_encoder(incoming, num_units, layer_num):
    i = layer_num
    z_pre_1 = DenseLayer(incoming, num_units=num_units, nonlinearity=lasagne.nonlinearities.leaky_rectify, b=None,
        name='enc_dense%i' % i, W=init)
    z_pre = lasagne.layers.dropout(z_pre_1, p=.5)
    norm_list = batch_norm(
        z_pre, name='enc_normalize%i' % i)
    z = norm_list
    z_noise = GaussianNoiseLayer(z, sigma=noise, name='enc_noise%i' % i)
    h = NonlinearityLayer(z_noise,
        nonlinearity=unit, name='enc_nonlin%i' % i)
    return h, z, z_noise, norm_list

def create_encoder_final(incoming, num_units, layer_num):
    i = layer_num
    z_pre_1 = DenseLayer(incoming, num_units=num_units, nonlinearity=lasagne.nonlinearities.leaky_rectify, b=None,
        name='enc_dense%i' % i, W=init)
    z_pre = lasagne.layers.dropout(z_pre_1, p=.5)
    norm_list = batch_norm(
        z_pre, name='enc_normalize%i' % i)
    z = norm_lis
    z_noise = GaussianNoiseLayer(z, sigma=noise, name='enc_noise%i' % i)
    h = NonlinearityLayer(z_noise,
        nonlinearity=softmax, name='enc_nonlin%i' % i)
    return h, z, z_noise, norm_list

h1, z1, z_noise1, norm_list1 , conv1 = create_encoder_conv(
    incoming=h0, num_conv=20, convsize=(5,5), layer_num=1)

h2, z2, z_noise2, norm_list2 , pool2 = create_encoder_pool(
    incoming=h1, pool_size=(2,2), layer_num=2)

h3, z3, z_noise3, norm_list3, conv3 = create_encoder_conv(
    incoming=h2, num_conv=60, convsize=(4,4), layer_num=3)

h4, z4, z_noise4, norm_list4 , pool4= create_encoder_pool(
    incoming=h3, pool_size=(2,2), layer_num=4)

h5, z5, z_noise5, norm_list5 , conv5 = create_encoder_conv(
    incoming=h4, num_conv=100, convsize=(3,3), layer_num=5)

h6, z6, z_noise6, norm_list6 , pool6= create_encoder_pool(
    incoming=h5, pool_size=(2,2), layer_num=6)

h7, z7, z_noise7, norm_list7 = create_encoder_dense(
    h6, num_units=500, layer_num=7)

h8, z8, z_noise8, norm_list8 = create_encoder(
    h7, num_units=50, layer_num=8)

h9, z9, z_noise9, norm_list9 = create_encoder_final(
    h8, num_units=2, layer_num=9)
#Output of network
l_out_enc = h9

#Train
enc_out_clean_train= lasagne.layers.get_output(
    l_out_enc, sym_x, deterministic=False)
#Test and validation
enc_out_clean = lasagne.layers.get_output(
    l_out_enc, sym_x, deterministic=True)

from lasagne.regularization import regularize_layer_params_weighted, l2, l1

#train loss
costs = T.mean(T.nnet.categorical_crossentropy(enc_out_clean_train, sym_t))

layers = {h1: 0.10 , h2: 0.1, h3: 0.1, h4: 0.1, h5: 0.1, h6: 0.1, h7: 0.1, h8: 0.1, h9: 0.5}
l2_penalty = regularize_layer_params_weighted(layers, l2)
l1_penalty = regularize_layer_params(h9, l1) * 1e-4

costs = costs + l2_penalty + l1_penalty

# prediction passes

# Get list of all trainable parameters in the network.
all_params = lasagne.layers.get_all_params(h9, trainable=True)
print ""*20 + "PARAMETERS" + "-"*20
for p in all_params:
    print p.name, p.get_value().shape
print "-"*60

if gradclip is not None:
    all_grads = [T.clip(g, -gradclip, gradclip)
                 for g in T.grad(costs, all_params)]
else:
    all_grads = T.grad(costs, all_params)

updates = optimizer(all_grads, all_params, learning_rate=sh_lr)

#Evaluation loss
costs_eval= T.mean(T.nnet.categorical_crossentropy(enc_out_clean, sym_t))


f_clean = theano.function([sym_x, sym_t], [enc_out_clean, costs_eval]
                          ,on_unused_input='warn')
f_train = theano.function([sym_x, sym_t],
                          [costs, enc_out_clean_train],
                          updates=updates, on_unused_input='warn')

train_acc, train_loss = [], []
losses = []
def train_epoch_semisupervised(x, confusion_train):
    x_batch=x
    output = f_train(x_batch,x_train_label)
    batch_loss, net_out = output[0], output[1]

    net_out = net_out[:num_labels]
    preds = np.argmax(net_out, axis=-1)
    confusion_train.batchadd(preds, x_train_label)
    losses = batch_loss

    return confusion_train, losses


def valid_epoch(x, y,confusion_x,k):

    net_out = f_clean(x,y)
    preds = np.argmax(net_out[0], axis=-1)

    confusion_x.batchadd(preds, y)
    valid_batch_loss= net_out[1]
    loss = valid_batch_loss
    auc= roc_auc_score(y, preds)

    return confusion_x, auc,loss

def test_epoch(x, y,confusion_x,l):

    net_out = f_clean(x,y)
    preds = np.argmax(net_out[0], axis=-1)

    confusion_x.batchadd(preds, y)
    test_batch_loss= net_out[1]
    loss = test_batch_loss
    auc= roc_auc_score(y, preds)

    return confusion_x, auc,loss

import os.path
import pandas as pd
import pickle as pk
import timeit
import numpy as np
from operator import itemgetter
from sklearn.metrics import roc_auc_score

train_loss_array = np.zeros(shape=(300,1))
train_acc_array = np.zeros(shape=(300,1))
test_acc_array = np.zeros(shape=(300,1))


BATCH_SIZE = 100
val_auc = np.zeros(num_epochs)
for epoch in range(num_epochs):
    train_acc_cur_sum =0
    test_acc_cur_sum = 0
    train_losses = []
    test_losses = []
    valid_losses = []
    valid_AUCs = []
    test_AUCs = []
    test_AUCs_1 = []
    test_AUCs_2 = []
    test_AUCs_3 = []
    #--------------------Train dataset
    train_labeled = np.memmap('/home/user/train_716000', dtype='float32', mode='r',
                              shape=(716000, 784))
    targets = np.memmap('/home/user/train_label_716000', dtype='int32', mode='r',shape=(716000, 1)) 
    mean_ds=np.mean(train_labeled, axis=0)
    var_ds=np.std(train_labeled, axis=0)
    train_labeled = (train_labeled - mean_ds) / var_ds
    print("train mean", np.mean(train_labeled), "train std", np.std(train_labeled))
    
    #-------------------Validation dataset
    valid = np.memmap('/home/user/validation_583000', dtype='float32', mode='r', 
                     shape=(583000, 784))
    valid_targets = np.memmap('/home/user/validation_label_5830000', dtype='int32', mode='r', 
                         shape=(583000, 1))
    valid = (valid - mean_ds) / var_ds
    print("valid mean", np.mean(valid), "valid std", np.std(valid))
    
    #------------------Test dataset
    test = np.memmap('/home/user/test_label__35200', dtype='float32',
                 mode='r', shape=(35200, 784))
    test_targets = np.memmap('test_label__35200', dtype='int32',
                             mode='r', shape=(35200, 1))
    test = (test - mean_ds) / var_ds
    print("test mean", np.mean(test), "test std", np.std(test))
 
    time_start1 = timeit.default_timer()
    train_idxs = [i for i in range(train_labeled.shape[0])]
    np.random.shuffle(train_idxs)

    num_batches_train = 7000

    def next_batch(start, train, labels, ds_idxs, batch_size=100):
        newstart = start + batch_size
        if newstart > train.shape[0]:
            newstart = 0
        idxs = ds_idxs[start:start + batch_size]
        temp = labels[idxs, :]
        return train[idxs, :], newstart ,temp
    #--------Training-----------------------------------------------
    confusion_train = parmesan.utils.ConfusionMatrix(num_classes)
    time_start2 = timeit.default_timer()
    
    for i in range(num_batches_train):
        import random
        print("labeled has been Done!")
        x_train, newstart , x_train_label= next_batch(i * batch_size, train_labeled, targets, train_idxs, batch_size=100)

        x_train = np.reshape(x_train, (100, 1, 28, 28))
        x_train_label = np.reshape(x_train_label, (100,))
        x_batch = x_train#np.reshape(x_batch, (200, 1,28,28))

        confusion_train, batch_loss  = train_epoch_semisupervised(x_batch, confusion_train)
        train_losses += [batch_loss]
        
    train_acc_cur = confusion_train.accuracy()
    time_stop2 = timeit.default_timer()
    train_time = time_start2 - time_stop2
    
    time_start3 = timeit.default_timer()
    ###---------------Validation----------------------------------
    def next_batch3(start, ds, labels, ds_idxs, batch_size=100):

        newstart = start + batch_size
        if newstart > ds.shape[0]:
            newstart = 0
        idxs = ds_idxs[start:start + batch_size]

        temp = labels[idxs, :]
        return ds[idxs, :], newstart ,temp

    valid_losses = []
    num_batches_valid = 2000
    validation_idxs = [k for k in range(valid.shape[0])]
    np.random.shuffle(validation_idxs)
    valid_acc_cur_sum = 0

    conf_valid = parmesan.utils.ConfusionMatrix(num_classes)
    for j in range(num_batches_valid):
        x_valid_tmp, newstart2_valid, y_valid = next_batch3(j * batch_size, valid, valid_targets, validation_idxs,
                                                            batch_size=100)
        x_valid = np.reshape(x_valid_tmp, (100, 1, 28, 28))
        y_valid = np.reshape(y_valid, (100,))
        confusion_valid, valid_AUC, valid_cost = valid_epoch(x_valid, y_valid, conf_valid, x_batch)
        valid_losses += [valid_cost]
        
    valid_acc_cur = confusion_valid.accuracy()
    time_stop3 = timeit.default_timer()
    validation_time = time_start3 - time_stop3

    ### ----------------------Test---------------------------------------
    def next_batch2(start, train, labels, ds_idxs, batch_size=100):

        newstart = start + batch_size
        if newstart > train.shape[0]:se
            newstart = 0
        idxs = ds_idxs[start:start + batch_size]
        temp = labels[idxs, :]
        return train[idxs, :], newstart, temp

    num_batches_test = 352
    test_idxs = [k for k in range(test.shape[0])]
    np.random.shuffle(test_idxs)
    
    test_acc_cur_sum = 0
    
    conf_test = parmesan.utils.ConfusionMatrix(num_classes)
    time_start4 = timeit.default_timer()
    for j0 in range(num_batches_test):

        x_test, newstart2, y_test = next_batch2(j0 * batch_size, test, test_targets, test_idxs, batch_size=100)
        if x_test.shape == (100, 784):
            x_test = np.reshape(x_test, (100, 1, 28, 28))
            y_test = np.reshape(y_test, (100,))
            confusion_test, test_AUC, test_cost = test_epoch(x_test, y_test, conf_test,x_batch)

            test_losses += [test_cost]
            test_AUCs += [test_AUC]
    test_acc_cur = confusion_test.accuracy()

    time_stop4 = timeit.default_timer()
    test_time = time_stop4 - time_start4
    total_time = time_stop4-time_start1
    ##---------------------------------------------------------------------------------------

    if epoch > start_decay:
        old_lr = sh_lr.get_value()
        new_lr = old_lr - (lr/(num_epochs-start_decay))
        sh_lr.set_value(lasagne.utils.floatX(new_lr))

    s = (
        "*EPOCH\t{}, \t{}, \t{}, \t{}, \t{}, \t{}, \t{}, \t{}, \t{}, \t{}, \t{}, \t{}"
         ).format(
         #"test_AUC_1\t{}test_AUC_2\t{}test_AUC_3\t{}").format(
        epoch, sh_lr.get_value(),
        np.mean(train_losses), train_acc_cur
        ,np.mean(valid_losses), valid_acc_cur
        ,np.mean(test_losses), test_acc_cur, np.mean(test_AUCs)
        ,train_time, validation_time, test_time, total_time
    )
    print s
    with open(output_file, 'a') as f:
        f.write(s + "\n")

Jan Schlüter

unread,

Feb 9, 2017, 6:10:28 AM2/9/17

to lasagne-users

I need help to overcome overfitting. I use CNN to train 700,000 samples and test on 30,000 samples. My validation size is 200,000 though. The problem is not matter how much I decrease the learning rate I get overfitting. I mean the training loss decrease whereas validation loss and test loss increase! I would say from first epoch.

Did you try the same dataset with a simpler architecture?

I have to mention that my test and validation dataset comes from different distribution and all three are from different source but similar shapes(all of them are same biological cell patch).

Does it also overfit when the train and validation set are from the same distribution (i.e., when you spare 100,000 training samples as your validation set)? If so, you may try to improve cross-dataset generalization with Unsupervised Domain Adaptation (sites.skoltech.ru/compvision/projects/grl/) or Pseudo-Labeling (http://benanne.github.io/2015/03/17/plankton.html#unsupervised).

H

unread,

Feb 9, 2017, 9:31:05 AM2/9/17

to lasagne-users

I have tried to mix train and validation dataset and after cross validation I created new train and validation dataset with the following size: 1,000,000 patches for training and 250,000 patches for validation. But I kept the test dataset the way it was before.
I got following result which is strange. How can I improve the results?

Jan Schlüter

unread,

Feb 9, 2017, 10:26:47 AM2/9/17

to lasagne-users

I got following result which is strange. How can I improve the results?

What do the plots show? train/val/test as red/blue/yellow? Does the test set have about 31% examples of one class and 69% of the other (educated guess from the accuracy plot)? Can you evaluate with AUROC or something else that's independent of the classification threshold?

H

unread,

Feb 9, 2017, 11:32:25 AM2/9/17

to lasagne-users

I'm sorry I forgot to mention that the blue color shows train loss and accuracy, red shows validation and test shows test accuracy. Yes! exactly the ratio of test is 68 % and 32 %! I will calculate the AUROC and upload the results here. Thanks Jan!

isen...@googlemail.com

unread,

Feb 9, 2017, 3:22:01 PM2/9/17

to lasagne-users

Hi,

one thing I noticed is that you add a Nonlinearity to your MaxPool layers. That is rather unusual (though this may not be the Problem).

As Jan pointed out, the class imbalance may be a Problem. You model is not really overfitting, but rather not learning anything at all. Instead it just learns to predict one of the two classes (the one that occurs more frequently). Real overfitting would have a much larger gap.

I find it very difficult to think about architectures if only the source code is given. Could you please plot your network (use this: https://gist.github.com/ebenolson/1682625dc9823e27d771)?

I think you could even have added too much regularization. Can you please plot the different parts of your loss? "print theano.function([], l2_penalty()" , also for l1).

What kind of data are you training on? If you were to look at the patches as an expert, would you be able to distinguish the different classes?

Regards,

Fabian

H

unread,

Feb 9, 2017, 4:58:46 PM2/9/17

to lasagne-users, isen...@googlemail.com

Hi Fabian, Thanks for helping me.
I have changed the network by removing

 l1 and nonlinearity layers appeared after maxpooling layers.

So the last plots of loss and accuracy which I have uploaded before was for the revised edition of my network.
I didn't understand what you did you mean by :

I think you could even have added too much regularization. Can you please plot the different parts of your loss? "print theano.function([], l2_penalty()" , also for l1).

could you please explain it more?
The data are the patches of blood cells which can be whether white cell or red cell. So it is not so obvious for non-specialists to distinguish them easily.

Actually I can't draw the network architecture because of some missing packages and it's getting on my nerve. But I can explain it to you here:

InputLayer(28,28)
-->Conv2DLayer(5,5)-->NonelinearityLayer(rectify)-->batchnorm-->Maxpool2DLayer(2,2)
-->Conv2DLayer(4,4)-->NonelinearityLayer(rectify)-->batchnorm-->Maxpool2DLayer(2,2)
-->Conv2DLayer(3,3)-->NonelinearityLayer(rectify)-->batchnorm-->Maxpool2DLayer(2,2)
-->DenseLayer(500)-->NonelinearityLayer(rectify)-->batchnorm-->dropout
-->DenseLayer(50)-->NonelinearityLayer(rectify)-->batchnorm-->dropout
-->DenseLayer(2)-->NonelinearityLayer(softmax)-->output

H

unread,

Feb 9, 2017, 6:28:29 PM2/9/17

to lasagne-users, isen...@googlemail.com

This my AUROC for 30 epochs! It's not learning at all! :(

isen...@googlemail.com

unread,

Feb 10, 2017, 4:04:24 AM2/10/17

to lasagne-users, isen...@googlemail.com

Hi,

I think the only package that is usually missing for the plotting functionality is pydot which you should be able to install easily using "pip install --upgrade --user pydot" (make sure that pip is up to date).

But thanks to your summary I now see the architecture. Now that we know that you don't have overfitting, try to actually increase the capacity of your model. Also you might want to use larger patches which will allow you to add more pooling operations and gather more context information. You could even go so far as to use VGG 16 or VGG 19 provided that your input size is large enough (and that it makes sense for your particular dataset to use such large patches (i think vgg uses 224x224)). At least look into VGG style networks: Conv Conv pool -> conv conv conv pool etc. You need to get you model to properly overfit before you can counteract that with regularization.

Also try to balance your training set so that each batch contains equal number of samples from each class. That way networks can learn better AND you will see very easily whether ist learns somethine or is just random guessing.

Regards,

Fabian

Jan Schlüter

unread,

Feb 14, 2017, 7:39:07 AM2/14/17

to lasagne-users, isen...@googlemail.com

In your architecture summary, when you say DenseLayer -> NonlinearityLayer, do you actually use a NonlinearityLayer? Note that the DenseLayer already has the rectifier nonlinearity by default. Make sure the final layer doesn't have a rectifier followed by a softmax! Also possibly try simplifying the architecture, just using the three dense layers. Just to make sure your low test performance is really due to the task being very difficult, not due to some learning problem.

H

unread,

Feb 14, 2017, 9:10:55 AM2/14/17

to lasagne-users, isen...@googlemail.com

Hey Jan,

Yes I do use lasagne.nonlinearities.rectify NonlinearityLayer.
I have checked in last layer I have just Softmax layer. I will try your suggestion and update the results here. Thanks.

Hey Fabian, Unfortunately my data patch size is fixed and I could not use bigger patches. So unfortunately I could not use more convolution and pooling layer and make the architecture more complex. Thanks

Jan Schlüter

unread,

Feb 14, 2017, 9:32:25 AM2/14/17

to lasagne-users, isen...@googlemail.com

Yes I do use lasagne.nonlinearities.rectify NonlinearityLayer.
I have checked in last layer I have just Softmax layer. I will try your suggestion and update the results here. Thanks.

Just to be sure, if you write:
layer = DenseLayer(layer, 1)
layer = NonlinearityLayer(layer, softmax)
Then you'll have both a rectifier and a softmax, because the DenseLayer has a rectifier by default. To have a softmax only, either do:
layer = DenseLayer(layer, 1, nonlinearity=None)
layer = NonlinearityLayer(layer, softmax)
Or, simpler:
layer = DenseLayer(layer, 1, nonlinearity=softmax)

H

unread,

Feb 14, 2017, 9:52:58 AM2/14/17

to lasagne-users

Then how about convolution layer? it has nonlinearity inside its diffinition too. Shall I set its nonlinearity to None or Identity as well? Because convolution Layer also followed by NonelinearityLayer.

Jan Schlüter

unread,

Feb 14, 2017, 10:17:32 AM2/14/17

to lasagne-users

Then how about convolution layer? it has nonlinearity inside its diffinition too. Shall I set its nonlinearity to None or Identity as well? Because convolution Layer also followed by NonelinearityLayer.

Yes. Or set nonlinearity= to what you need and remove the NonlinearityLayer. That's the way it's done in most examples for Lasagne. (Note that there was a long discussion initially on whether to have a default nonlinearity inside the layer or default it to being linear: https://github.com/Lasagne/Lasagne/issues/138.) Note that having two rectifiers in a row won't cause a problem other than slowing things down (since rectify(rectify(x)) == rectify(x)).

Lissa Anderson

unread,

Feb 11, 2024, 4:30:56 AMFeb 11

to lasagne-users

✅🔴▶️▶ Really Amazing ️You Can Try This ◀️◀️🔴✅
🔴ALL>Movies✅ 📺 📱 💻 ✅ALL>Movies🔴
✅▶️▶️ CLICK HERE Full HD✅720p✅1080p✅4K✅

WATCH ✅💻📺📱👉https://co.fastmovies.org

ᗪOᗯᑎᒪOᗩᗪ ✅📺📱💻👉https://co.fastmovies.org

🔴WATCH>>ᗪOᗯᑎᒪOᗩᗪ>>HERE>👉https://co.fastmovies.org
✅WATCH>>ᗪOᗯᑎᒪOᗩᗪ>>HERE>👉https://co.fastmovies.org

💚WATCH>>ᗪOᗯᑎᒪOᗩᗪ>>HERE>👉https://co.fastmovies.org
💚WATCH>>ᗪOᗯᑎᒪOᗩᗪ>>HERE>👉https://co.fastmovies.org

🔴ALL>Movies>ALL>TIME>Save>LINK👉https://co.fastmovies.org
🔴ALL>Movies>ALL>TIME>Save>LINK👉https://co.fastmovies.org

🔴💻ALL>Movies>WATCH>ᗪOᗯᑎᒪOᗩᗪ>LINK>👉https://co.fastmovies.org
🔴📱ALL>Movies>WATCH>ᗪOᗯᑎᒪOᗩᗪ>LINK>👉https://co.fastmovies.org

🔴📺ALL>Movies>WATCH>ᗪOᗯᑎᒪOᗩᗪ>LINK>👉https://co.fastmovies.org
🔴📺ALL>Movies>WATCH>ᗪOᗯᑎᒪOᗩᗪ>LINK>👉https://co.fastmovies.org

🔴✅📺📱💻ALL>Movies>WATCH>ᗪOᗯᑎᒪOᗩᗪ>LINK>👉https://co.fastmovies.org
🔴✅📺📱💻ALL>Movies>WATCH>ᗪOᗯᑎᒪOᗩᗪ>LINK>👉https://co.fastmovies.org

🔴✅📺📱💻ALL>Movies>WATCH>ᗪOᗯᑎᒪOᗩᗪ>LINK>👉https://co.fastmovies.org
🔴✅📺📱💻ALL>Movies>WATCH>ᗪOᗯᑎᒪOᗩᗪ>LINK>👉https://co.fastmovies.org

🔴✅📺📱💻ALL>Movies>WATCH>ᗪOᗯᑎᒪOᗩᗪ>LINK>👉https://co.fastmovies.org

🔴✅📺📱💻ALL>Movies>WATCH>ᗪOᗯᑎᒪOᗩᗪ>LINK>👉https://co.fastmovies.org

🔴✅📺📱💻ALL>Movies>WATCH>ᗪOᗯᑎᒪOᗩᗪ>LINK>👉https://co.fastmovies.org

🔴✅📺📱💻ALL>Movies>WATCH>ᗪOᗯᑎᒪOᗩᗪ>LINK>👉https://co.fastmovies.org

🔴✅📺📱💻ALL>Movies>WATCH>ᗪOᗯᑎᒪOᗩᗪ>LINK>👉https://co.fastmovies.org

🔴✅📺📱💻ALL>Movies>WATCH>ᗪOᗯᑎᒪOᗩᗪ>LINK>👉https://co.fastmovies.org

🔴✅📺📱💻ALL>Movies>WATCH>ᗪOᗯᑎᒪOᗩᗪ>LINK>👉https://co.fastmovies.org

🔴✅📺📱💻ALL>Movies>WATCH>ᗪOᗯᑎᒪOᗩᗪ>LINK>👉https://co.fastmovies.org

🔴✅📺📱💻ALL>Movies>WATCH>ᗪOᗯᑎᒪOᗩᗪ>LINK>👉https://co.fastmovies.org

🔴✅📺📱💻ALL>Movies>WATCH>ᗪOᗯᑎᒪOᗩᗪ>LINK>👉https://co.fastmovies.org

🔴✅📺📱💻ALL>Movies>WATCH>ᗪOᗯᑎᒪOᗩᗪ>LINK>👉https://co.fastmovies.org

🔴✅📺📱💻ALL>Movies>WATCH>ᗪOᗯᑎᒪOᗩᗪ>LINK>👉https://co.fastmovies.org

🔴✅📺📱💻ALL>Movies>WATCH>ᗪOᗯᑎᒪOᗩᗪ>LINK>👉https://co.fastmovies.org

🔴✅📺📱💻ALL>Movies>WATCH>ᗪOᗯᑎᒪOᗩᗪ>LINK>👉https://co.fastmovies.org

🔴✅📺📱💻ALL>Movies>WATCH>ᗪOᗯᑎᒪOᗩᗪ>LINK>👉https://co.fastmovies.org

🔴✅📺📱💻ALL>Movies>WATCH>ᗪOᗯᑎᒪOᗩᗪ>LINK>👉https://co.fastmovies.org

🔴✅📺📱💻ALL>Movies>WATCH>ᗪOᗯᑎᒪOᗩᗪ>LINK>👉https://co.fastmovies.org

🔴✅📺📱💻ALL>Movies>WATCH>ᗪOᗯᑎᒪOᗩᗪ>LINK>👉https://co.fastmovies.org

Reply all

Reply to author

Forward