Dear,
I am building an RNN for anomaly detection in a time series. I used
TensorFlow version: 2.15.0 and Keras version: 2.15.0
I am encountering a accuracy and a val_accuracy of zero after training!!!
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense
from keras.layers import Dropout
import matplotlib.pyplot as plt
# Step 1:
#data = pd.read_csv("C:\\Users\\xps\\Desktop\\MLProjects\\Data\\Resting.csv")
data = pd.read_csv("/data/workingdir/abh/Math/IsolationForest/Resting.CSV")
data = data["Amplitude"]
# Define a threshold ..
# Compute mean and standard deviation of reconstruction errors
mean = np.mean(data)
std = np.std(data)
# Define threshold as multiple of standard deviation
#threshold = mean + 2 * std
threshold = 650
# Label the data based on the threshold
labels = np.where(data > threshold, 1, 0)
# Add the labels to the data
data = pd.DataFrame(data)
data['Labels'] = labels
# Create a subsequences (Tensor subsequence) from a 1d dataset
# Number of subsequences and Length of each subsequence
number_subsequences = 1200
length_subsequences = 1000
# Channel dimension (set to 1 bcz its a time series 1d)
channel = 1
# Number of columns in the dataset
number_columns = data.shape[1]
# Calculate total number of data points
total_data_points = len(data)
# Calculate the stride to slide the window for creating subsequences
stride = (total_data_points - length_subsequences + 1) // number_subsequences
# Initialize the tensor to store subsequences
x_shape = (number_subsequences, length_subsequences, number_columns, channel)
x = np.zeros(x_shape)
# Iterate over each subsequence
for i in range(number_subsequences):
start_index = i * stride
end_index = start_index + length_subsequences
subsequence = data.iloc[start_index:end_index, :].values
#print(subsequence[:,4])
# Reshape subsequence to fit into X tensor
x[i, :, :, 0] = subsequence
# X tensor shape
print("Shape of x tensor:", x.shape)
from sklearn.model_selection import train_test_split
X = x[:, :, 0:1, :].squeeze(axis=-1)
y = x[:, :, 1, :].squeeze(axis=-1)
# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
# Reshape X_train and X_test to 2D arrays
X_train_flat = X_train.reshape(X_train.shape[0], -1)
X_test_flat = X_test.reshape(X_test.shape[0], -1)
# Calculate mean and standard deviation from X_train
mean = np.mean(X_train_flat, axis=0)
std_dev = np.std(X_train_flat, axis=0)
# Normalize X_train and X_test using Z-score normalization
X_train_normalized = (X_train_flat - mean) / std_dev
X_test_normalized = (X_test_flat - mean) / std_dev
# Reshape normalized data back to original shape
X_train = X_train_normalized.reshape(X_train.shape)
X_test = X_test_normalized.reshape(X_test.shape)
# Define the LSTM model
model = Sequential()
# Adding the 1st layer
model.add(LSTM(units=1100, return_sequences = True, input_shape=(length_subsequences,1)))
model.add(Dropout(0.2))
# Add 2nd Layer
model.add(LSTM(units=1000, return_sequences = True))
model.add(Dropout(0.2))
# Add 3rd Layer
model.add(LSTM(units=1000, return_sequences = True))
model.add(Dropout(0.2))
# Add 4th Layer
model.add(LSTM(units=1000, return_sequences = True))
model.add(Dropout(0.2))
# Add 5th Layer
model.add(LSTM(units=2000))
model.add(Dropout(0.2))
model.add(Dense(units=1000))
model.summary()
# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
# Fit the model
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))
I am getting error something like this:
Epoch 1/100
30/30 [==============================] - 79s 3s/step - loss: 1.8809 - accuracy: 0.0000e+00 - val_loss: 1.8551 - val_accuracy: 0.0000e+00
NOTE:
it works well with the older version of Tensorflow and Keras :
TensorFlow version: 1.14.0
Keras version: 2.3.1