I am working on uni-variate time series forecasting where I have multiple files in a folder each file (variable length) contains an average of 30 * 22100 (sampling rate = 22100) of samples.
I wrote a generator function which accepts time series x (inputs) , and y (output) files stored as Matlab .mat files and uses tf.data.Dataset.from_tensor_slices to create the batches for RNN model.
I want to use this generator in fit_generator(), however, I am getting RepeatDataset' object has no attribute 'shape' error
The full code implementation is given below
from __future__ import absolute_import, division, print_function, unicode_literals
try:
%tensorflow_version 2.x
except Exception:
pass
import tensorflow as tf
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
mpl.rcParams['figure.figsize'] = (8, 6)
mpl.rcParams['axes.grid'] = False
import
scipy.io as sio
def univariate_data(x_dataset, y_dataset, start_index, end_index, history_size, target_size):
data = []
labels = []
start_index = start_index + history_size
if end_index is None:
end_index = len(x_dataset) - target_size
for i in range(start_index, end_index):
indices = range(i-history_size, i)
# Reshape data from (history_size,) to (history_size, 1)
data.append(np.reshape(x_dataset[indices], (history_size, 1)))
labels.append(y_dataset[indices])
return np.array(data), np.array(labels)
def generate_data(setName='train', univariate_past_history = 100, univariate_future_target = 100, BATCH_SIZE = 256, BUFFER_SIZE = 10000):
dataSplit = {'train': 350, 'valid': 38, 'test': 50}
if (setName == 'train'):
rangeBeg = 0
rangeEnd = dataSplit[setName]
elif (setName == 'valid'):
rangeBeg = dataSplit['train']
rangeEnd = rangeBeg + dataSplit[setName]
elif (setName == 'test'):
rangeBeg = dataSplit['train'] + dataSplit['valid']
rangeEnd = rangeBeg + dataSplit['test']
dataSetDir1 = "../processed_data/x"
labelDir = "../processed_data/y"
while True:
for i in range(rangeBeg, rangeEnd):
print(setName + str(i))
wav = sio.loadmat(dataSetDir1 + '/' + '{}_{:01}_x.mat'.format(setName, i))
egg = sio.loadmat(labelDir + '/' + '{}_{:01}_x.mat'.format(setName, i))
wave_samples = wav['filteredX'][:, 0]
egg_samples = egg['hpfEgg'][:, 0]
x_train_uni, y_train_uni = univariate_data(wave_samples, egg_samples, 0, None,
univariate_past_history,
univariate_future_target)
if setName == 'train':
train_univariate = tf.data.Dataset.from_tensor_slices((x_train_uni, y_train_uni))
train_univariate = train_univariate.cache().shuffle(BUFFER_SIZE).batch(BATCH_SIZE).repeat()
else:
train_univariate = tf.data.Dataset.from_tensor_slices((x_train_uni, y_train_uni))
train_univariate = train_univariate.batch(BATCH_SIZE).repeat()
yield train_univariate
train_set_name ='train'
valid_set_name ='valid'
past_history = 100
future_target = 100
BATCH_SIZE = 256
BUFFER_SIZE = 10000
train_data_gen = generate_data(train_set_name, past_history, future_target, BATCH_SIZE, BUFFER_SIZE)
valid_data_gen = generate_data(valid_set_name, past_history, future_target, BATCH_SIZE, BUFFER_SIZE)
tf.keras.backend.clear_session()
x = tf.keras.layers.Input(shape=[past_history, 1], name='input')
l1 = tf.keras.layers.LSTM(32, return_sequences=True, name='LSTM_1')(x)
l2 = tf.keras.layers.LSTM(16, name='LSTM_2')(l1)
d1 = tf.keras.layers.Dense(100)(l2)
model = tf.keras.models.Model(x, d1)
model.compile(optimizer=tf.keras.optimizers.RMSprop(clipvalue=0.1), loss='mae')
EVALUATION_INTERVAL = 10
EPOCHS = 10
model.fit_generator(train_data_gen, epochs=EPOCHS,
steps_per_epoch=EVALUATION_INTERVAL,
validation_data=valid_data_gen, validation_steps=50, verbose=2)
Thanks and regard
Gurunath