I have a dataset with 299 input neurons.
So i built this model below.
PS:The submission code doesnt work yet.
import numpy as np
import numpy as np
import pandas as pd
from keras.optimizers import RMSprop, SGD, Adam
from keras.utils.np_utils import to_categorical
np.random.seed(8000) # for reproducibility
import keras
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.layers.normalization import BatchNormalization
from keras.layers.advanced_activations import PReLU
from keras.utils import np_utils, generic_utils
def calculate_and_get_validation_data(x_train, y_train, validation_data_size_rows=50000):
if (validation_data_size_rows <= 0):
print "Validation DataSet size must be > 0 , parameter must be Positive and non-zero!"
import sys
sys.exit(1)
aux_value = validation_data_size_rows
training_shape = x_train.shape[0]
x_train, x_val = x_train[:training_shape - aux_value], x_train[training_shape - aux_value:]
y_train, y_val = y_train[:training_shape - aux_value], y_train[training_shape - aux_value:]
return x_train, y_train, x_val, y_val
def load_data():
print "Loading Data..."
trainingdata = np.genfromtxt("train_data_ready.csv", dtype=np.float32, delimiter=',', skip_header=1)
trainingdata = trainingdata[:, 1:]
traininglabel = np.genfromtxt(fname="train_label_ready.csv", dtype=np.float32, delimiter=",", skip_header=1)
submissiondata = np.genfromtxt(fname="test_data_ready.csv", dtype=np.float32, delimiter=",", skip_header=1)
submissiondata = submissiondata[:,1:]
print "\nShape : ", trainingdata.shape
print "\nShape : ", traininglabel.shape
print "\nShape : ", submissiondata.shape
#############################################
#############################################
x_train, y_train, x_val, y_val = calculate_and_get_validation_data(trainingdata, traininglabel, validation_data_size_rows=40000)
print "xtrain = ",x_train.shape
print "ytrain = ",y_train.shape
print "xval = ",x_val.shape
print "yval = ",y_val.shape
return x_train, y_train, x_val, y_val,submissiondata
def make_submission(y_prob, ids, encoder=["0","1"], fname="keras.csv"):
with open(fname, 'w') as f:
f.write('id,')
f.write(','.join([str(i) for i in encoder]))
f.write('\n')
for i, probs in zip(ids, y_prob):
probas = ','.join([i] + [str(p) for p in probs.tolist()])
f.write(probas)
f.write('\n')
print('Wrote submission to file {}.'.format(fname))
x_train, y_train, x_val, y_val,submissiondata= load_data()
y_train = to_categorical(y_train,nb_classes=2).astype("float32")
y_val = to_categorical(y_val,nb_classes=2).astype("float32")
print('Building model...')
model = Sequential()
# Dense(64) is a fully-connected layer with 64 hidden units.
# in the first layer, you must specify the expected input data shape:
# here, 20-dimensional vectors.
model.add(Dense(299,input_dim=299,init='uniform'))
model.add(Activation('tanh'))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(4000,init='uniform'))
model.add(Activation('tanh'))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(4000,init='uniform'))
model.add(Activation('tanh'))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(4000,init='uniform'))
model.add(Activation('tanh'))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(2000,init='uniform'))
model.add(Activation('tanh'))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(2,init='uniform',activation='sigmoid'))
sgd = SGD(lr=0.01, momentum=0.9, nesterov=True)
#adam = Adam(lr=0.01, beta_1=0.9, beta_2=0.999, epsilon=1e-7)
model.compile(loss='binary_crossentropy', optimizer=sgd)
#model.compile(loss='binary_crossentropy', optimizer=sgd)
print('Training model...')
model.fit(x_train, y_train,
batch_size=100,
nb_epoch=20,
show_accuracy=True,
validation_data=(x_val, y_val))
print('Generating submission...')
proba = model.predict_proba(submissiondata)
ids = np.arange(start=0,stop=submissiondata.shape[0],step=1)
make_submission(proba, ids, fname='keras-otto.csv')
Using gpu device 0: GeForce GTX 750 Ti (CNMeM is enabled)
Loading Data...
Shape : (260753, 299)
Shape : (260753,)
Shape : (173836, 299)
xtrain = (220753, 299)
ytrain = (220753,)
xval = (40000, 299)
yval = (40000,)
Building model...
Training model...
Train on 220753 samples, validate on 40000 samples
Epoch 1/20
220753/220753 [==============================] - 386s - loss: nan - acc: 0.8119 - val_loss: nan - val_acc: 0.8155
Epoch 2/20
220753/220753 [==============================] - 386s - loss: nan - acc: 0.8119 - val_loss: nan - val_acc: 0.8155
Epoch 3/20
220753/220753 [==============================] - 386s - loss: nan - acc: 0.8119 - val_loss: nan - val_acc: 0.8155
Epoch 4/20
220753/220753 [==============================] - 386s - loss: nan - acc: 0.8119 - val_loss: nan - val_acc: 0.8155