Could someone help me with this as I am new on machine learning.
# -*- coding: utf-8 -*-
"""Space_Ship_Titanic.ipynb
Automatically generated by Colaboratory.
Original file is located at
""
import os
os.environ['KAGGLE_CONFIG_DIR'] = '/content'
import pandas as pd
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import numpy as np
# !kaggle competitions download -c spaceship-titanic
# !unzip \*.zip && rm *.zip
df = pd.read_csv("train.csv")
df1 = pd.read_csv("test.csv")
df.head()
df[['Deck', 'Num', 'Side']] = df['Cabin'].str.split("/",expand=True)
df1[['Deck', 'Num', 'Side']] = df1['Cabin'].str.split("/",expand=True)
df.head()
df = df.drop("Cabin",axis=1)
df1 = df1.drop("Cabin",axis=1)
df.head()
df["Billed"] = df[['RoomService','FoodCourt','ShoppingMall','Spa','VRDeck']].sum(axis=1)
df = df.drop(['RoomService','FoodCourt','ShoppingMall','Spa','VRDeck',],axis=1)
df1["Billed"] = df1[['RoomService','FoodCourt','ShoppingMall','Spa','VRDeck']].sum(axis=1)
df1 = df1.drop(['RoomService','FoodCourt','ShoppingMall','Spa','VRDeck',],axis=1)
df.head()
df = df.drop('Name',axis=1)
df1 = df1.drop('Name',axis=1)
df.head()
df[['Pass_group','group_num']] = df['PassengerId'].str.split("_",expand=True)
df = df.drop("PassengerId",axis=1)
df1[['Pass_group','group_num']] = df1['PassengerId'].str.split("_",expand=True)
df1 = df1.drop("PassengerId",axis=1)
df.head()
vocab_homePlanet = df["HomePlanet"].value_counts()
vocab_homePlanet
df['HomePlanet'] = df['HomePlanet'].dropna()
df['HomePlanet'].value_counts()
df["Destination"].value_counts()
df["Deck"].value_counts()
df.head()
df = df.astype(str)
df1 = df1.astype(str)
df
vectorize_layer = keras.layers.TextVectorization(max_tokens=50000,output_mode='int',output_sequence_length=4)
vectorize_layer.adapt(df['HomePlanet'])
df['HomePlanet'] = vectorize_layer(df['HomePlanet'])
vectorize_layer = keras.layers.TextVectorization(max_tokens=50000,output_mode='int',output_sequence_length=4)
vectorize_layer.adapt(df1['HomePlanet'])
df1['HomePlanet'] = vectorize_layer(df1['HomePlanet'])
df.head()
def vectorized_layer(data,input,output_length) :
vectorize_layer = keras.layers.TextVectorization(max_tokens=5000,output_mode='int',output_sequence_length=output_length)
vectorize_layer.adapt(data[input])
return vectorize_layer(data[input])
df['Destination'].value_counts()
df['Destination'] = vectorized_layer(df,'Destination',4)
df1['Destination'].value_counts()
df1['Destination'] = vectorized_layer(df1,'Destination',4)
df['Deck'].value_counts()
df['Deck'] = vectorized_layer(df,'Deck',9)
df1['Deck'].value_counts()
df1['Deck'] = vectorized_layer(df1,'Deck',9)
df['Side'].value_counts()
df['Side'] = vectorized_layer(df,'Side',3)
df1['Side'].value_counts()
df1['Side'] = vectorized_layer(df1,'Side',3)
df.head()
df1.head()
df['CryoSleep'] = df['CryoSleep'].map({'True':1,'False':0})
df['VIP'] = df['VIP'].map({'True':1,'False':0})
df['Transported'] = df['Transported'].map({'True':1,'False':0})
df1['CryoSleep'] = df1['CryoSleep'].map({'True':1,'False':0})
df1['VIP'] = df1['VIP'].map({'True':1,'False':0})
# df1['Transported'] = df1['Transported'].map({'True':1,'False':0})
df.head()
df1.head()
x = df.drop(['Transported'],axis=1)
y = df['Transported']
# x_normalized = df[['CryoSleep','VIP']]
x = x.to_numpy(dtype='float32')
y = y.to_numpy(dtype='float32')
# x_test = df1.drop(['CryoSleep','VIP'],axis=1)
x_test = df1
# x_test_normalized = df[['CryoSleep','VIP']]
x_test = x_test.to_numpy(dtype='float32')
x.dtype, x_test.dtype
minMaxScaller = MinMaxScaler(feature_range=(0,1))
StdScaller = StandardScaler()
x_minMax = minMaxScaller.fit_transform(x)
x_minMax
x_test_minMax = minMaxScaller.fit_transform(x_test)
# x_Std = StdScaller.fit_transform(x)
# x_Std[1]
x_train,x_val,y_train,y_val = train_test_split(x_minMax,y,test_size=0.2,random_state=42)
x_train.shape, x_val.shape, y_train.shape, y_val.shape
model = keras.Sequential()
input_shape = x.shape[1]
input_shape
x_train.view()
# model.add(keras.layers.Embedding(500,16))
model.add(keras.layers.Dense(512,activation='relu',input_shape=(,input_shape)))
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.Dropout(0.1))
model.add(keras.layers.Dense(256,activation='relu'))
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.Dropout(0.1))
model.add(keras.layers.Dense(128,activation='relu'))
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.Dropout(0.1))
# model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(1,activation='sigmoid'))
optimizer = keras.optimizers.Adam(learning_rate=0.001)
model.compile(optimizer=optimizer,loss='mse',metrics=['accuracy'])
model.fit(x_train,y_train,epochs=20)
model.evaluate(x_test_minMax)
And this is the result that I am having
Epoch 1/20
218/218 [==============================] - 4s 8ms/step - loss: nan - accuracy: 0.4967
Epoch 2/20
218/218 [==============================] - 2s 7ms/step - loss: nan - accuracy: 0.4967
Epoch 3/20
218/218 [==============================] - 1s 6ms/step - loss: nan - accuracy: 0.4967