loss = nan but accuracy isnt - 0 or 1 ( Classification )

516 views
Skip to first unread message

André Lopes

unread,
Dec 12, 2015, 12:13:53 AM12/12/15
to Keras-users
I have a dataset with 299 input neurons.

Each row in the  dataset can only be classified as 0 or 1

So i built this model below.
PS:The submission code doesnt work yet.


What am i doing wrong?



import numpy as np
import numpy as np
import pandas as pd
from keras.optimizers import RMSprop, SGD, Adam
from keras.utils.np_utils import to_categorical

np.random.seed(8000) # for reproducibility

import keras
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.layers.normalization import BatchNormalization
from keras.layers.advanced_activations import PReLU
from keras.utils import np_utils, generic_utils


def calculate_and_get_validation_data(x_train, y_train, validation_data_size_rows=50000):

if (validation_data_size_rows <= 0):
print "Validation DataSet size must be > 0 , parameter must be Positive and non-zero!"
import sys
sys.exit(1)

aux_value = validation_data_size_rows

training_shape = x_train.shape[0]

x_train, x_val = x_train[:training_shape - aux_value], x_train[training_shape - aux_value:]

y_train, y_val = y_train[:training_shape - aux_value], y_train[training_shape - aux_value:]


return x_train, y_train, x_val, y_val



def load_data():
print "Loading Data..."

trainingdata = np.genfromtxt("train_data_ready.csv", dtype=np.float32, delimiter=',', skip_header=1)
trainingdata = trainingdata[:, 1:]

traininglabel = np.genfromtxt(fname="train_label_ready.csv", dtype=np.float32, delimiter=",", skip_header=1)


submissiondata = np.genfromtxt(fname="test_data_ready.csv", dtype=np.float32, delimiter=",", skip_header=1)
submissiondata = submissiondata[:,1:]

print "\nShape : ", trainingdata.shape
print "\nShape : ", traininglabel.shape
print "\nShape : ", submissiondata.shape

#############################################
#############################################

x_train, y_train, x_val, y_val = calculate_and_get_validation_data(trainingdata, traininglabel, validation_data_size_rows=40000)

print "xtrain = ",x_train.shape
print "ytrain = ",y_train.shape
print "xval = ",x_val.shape
print "yval = ",y_val.shape

return x_train, y_train, x_val, y_val,submissiondata


def make_submission(y_prob, ids, encoder=["0","1"], fname="keras.csv"):
with open(fname, 'w') as f:
f.write('id,')
f.write(','.join([str(i) for i in encoder]))
f.write('\n')
for i, probs in zip(ids, y_prob):
probas = ','.join([i] + [str(p) for p in probs.tolist()])
f.write(probas)
f.write('\n')
print('Wrote submission to file {}.'.format(fname))


x_train, y_train, x_val, y_val,submissiondata= load_data()


y_train = to_categorical(y_train,nb_classes=2).astype("float32")
y_val = to_categorical(y_val,nb_classes=2).astype("float32")

print('Building model...')

model = Sequential()
# Dense(64) is a fully-connected layer with 64 hidden units.
# in the first layer, you must specify the expected input data shape:
# here, 20-dimensional vectors.
model.add(Dense(299,input_dim=299,init='uniform'))
model.add(Activation('tanh'))
model.add(BatchNormalization())
model.add(Dropout(0.5))

model.add(Dense(4000,init='uniform'))
model.add(Activation('tanh'))
model.add(BatchNormalization())
model.add(Dropout(0.5))

model.add(Dense(4000,init='uniform'))
model.add(Activation('tanh'))
model.add(BatchNormalization())
model.add(Dropout(0.5))

model.add(Dense(4000,init='uniform'))
model.add(Activation('tanh'))
model.add(BatchNormalization())
model.add(Dropout(0.5))

model.add(Dense(2000,init='uniform'))
model.add(Activation('tanh'))
model.add(BatchNormalization())
model.add(Dropout(0.5))

model.add(Dense(2,init='uniform',activation='sigmoid'))




sgd = SGD(lr=0.01, momentum=0.9, nesterov=True)
#adam = Adam(lr=0.01, beta_1=0.9, beta_2=0.999, epsilon=1e-7)
model.compile(loss='binary_crossentropy', optimizer=sgd)
#model.compile(loss='binary_crossentropy', optimizer=sgd)


print('Training model...')
model.fit(x_train, y_train,
batch_size=100,
nb_epoch=20,
show_accuracy=True,
validation_data=(x_val, y_val))


print('Generating submission...')
proba = model.predict_proba(submissiondata)

ids = np.arange(start=0,stop=submissiondata.shape[0],step=1)
make_submission(proba, ids, fname='keras-otto.csv')



Using gpu device 0: GeForce GTX 750 Ti (CNMeM is enabled)
Loading Data...

Shape :  (260753, 299)

Shape :  (260753,)

Shape :  (173836, 299)
xtrain =  (220753, 299)
ytrain =  (220753,)
xval =  (40000, 299)
yval =  (40000,)
Building model...
Training model...
Train on 220753 samples, validate on 40000 samples
Epoch 1/20
220753/220753 [==============================] - 386s - loss: nan - acc: 0.8119 - val_loss: nan - val_acc: 0.8155
Epoch 2/20
220753/220753 [==============================] - 386s - loss: nan - acc: 0.8119 - val_loss: nan - val_acc: 0.8155
Epoch 3/20
220753/220753 [==============================] - 386s - loss: nan - acc: 0.8119 - val_loss: nan - val_acc: 0.8155
Epoch 4/20
220753/220753 [==============================] - 386s - loss: nan - acc: 0.8119 - val_loss: nan - val_acc: 0.8155

André Lopes

unread,
Dec 12, 2015, 12:20:45 AM12/12/15
to Keras-users
I tried several other parameters, and different things but i always get val loss as nan


p.nec...@gmail.com

unread,
Dec 14, 2015, 6:24:47 AM12/14/15
to Keras-users
well, for 4000x4000 tanh neurons 'uniform' initialization will saturate your neurons to +1/-1. Then your batch normalization won't have any variance to normalize to.
I'd say try a different scaling initialization like lecun uniform or he uniform.
also with such big layers your learning rate is also probably too large.

sunshine...@gmail.com

unread,
Jul 7, 2016, 10:57:25 AM7/7/16
to Keras-users
I may have the same problem with you, did you solve your problem now?

在 2015年12月12日星期六 UTC+8下午1:13:53,André Lopes写道:
Reply all
Reply to author
Forward
0 new messages