fit_generator() for multiple time series files

220 views

Skip to first unread message

Gurunathreddy M

unread,

Nov 18, 2019, 6:54:42 AM11/18/19

to Keras-users

I am working on uni-variate time series forecasting where I have multiple files in a folder each file (variable length) contains an average of 30 * 22100 (sampling rate = 22100) of samples.

I wrote a generator function which accepts time series x (inputs) , and y (output) files stored as Matlab .mat files and uses tf.data.Dataset.from_tensor_slices to create the batches for RNN model.

I want to use this generator in fit_generator(), however, I am getting RepeatDataset' object has no attribute 'shape' error

The full code implementation is given below

from __future__ import absolute_import, division, print_function, unicode_literals
try:
%tensorflow_version 2.x
except Exception:
pass
import tensorflow as tf

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd

mpl.rcParams['figure.figsize'] = (8, 6)
mpl.rcParams['axes.grid'] = False
import scipy.io as sio

def univariate_data(x_dataset, y_dataset, start_index, end_index, history_size, target_size):
data = []
labels = []

start_index = start_index + history_size
if end_index is None:
    end_index = len(x_dataset) - target_size

for i in range(start_index, end_index):
    indices = range(i-history_size, i)
    # Reshape data from (history_size,) to (history_size, 1)
    data.append(np.reshape(x_dataset[indices], (history_size, 1)))
    labels.append(y_dataset[indices])
return np.array(data), np.array(labels)

def generate_data(setName='train', univariate_past_history = 100, univariate_future_target = 100, BATCH_SIZE = 256, BUFFER_SIZE = 10000):

    dataSplit = {'train': 350, 'valid': 38, 'test': 50}

    if (setName == 'train'):
        rangeBeg = 0
        rangeEnd = dataSplit[setName]
    elif (setName == 'valid'):
        rangeBeg = dataSplit['train']
        rangeEnd = rangeBeg + dataSplit[setName]
    elif (setName == 'test'):
        rangeBeg = dataSplit['train'] + dataSplit['valid']
        rangeEnd = rangeBeg + dataSplit['test']

    dataSetDir1 = "../processed_data/x"
    labelDir = "../processed_data/y"

    while True:
        for i in range(rangeBeg, rangeEnd):
            print(setName + str(i))
            wav = sio.loadmat(dataSetDir1 + '/' + '{}_{:01}_x.mat'.format(setName, i))
            egg = sio.loadmat(labelDir + '/' + '{}_{:01}_x.mat'.format(setName, i))
            wave_samples = wav['filteredX'][:, 0]
            egg_samples = egg['hpfEgg'][:, 0]


            x_train_uni, y_train_uni = univariate_data(wave_samples, egg_samples, 0, None,
                                                       univariate_past_history,
                                                       univariate_future_target)
            if setName == 'train':
                train_univariate = tf.data.Dataset.from_tensor_slices((x_train_uni, y_train_uni))
                train_univariate = train_univariate.cache().shuffle(BUFFER_SIZE).batch(BATCH_SIZE).repeat()
            else:
                train_univariate = tf.data.Dataset.from_tensor_slices((x_train_uni, y_train_uni))
                train_univariate = train_univariate.batch(BATCH_SIZE).repeat()

            yield train_univariate

train_set_name ='train'
valid_set_name ='valid'

past_history = 100
future_target = 100
BATCH_SIZE = 256
BUFFER_SIZE = 10000

train_data_gen = generate_data(train_set_name, past_history, future_target, BATCH_SIZE, BUFFER_SIZE)

valid_data_gen = generate_data(valid_set_name, past_history, future_target, BATCH_SIZE, BUFFER_SIZE)

tf.keras.backend.clear_session()
x = tf.keras.layers.Input(shape=[past_history, 1], name='input')
l1 = tf.keras.layers.LSTM(32, return_sequences=True, name='LSTM_1')(x)
l2 = tf.keras.layers.LSTM(16, name='LSTM_2')(l1)
d1 = tf.keras.layers.Dense(100)(l2)
model = tf.keras.models.Model(x, d1)
model.compile(optimizer=tf.keras.optimizers.RMSprop(clipvalue=0.1), loss='mae')

EVALUATION_INTERVAL = 10
EPOCHS = 10

model.fit_generator(train_data_gen, epochs=EPOCHS,
steps_per_epoch=EVALUATION_INTERVAL,
validation_data=valid_data_gen, validation_steps=50, verbose=2)

Thanks and regard

Gurunath

Lance Norskog

unread,

Nov 18, 2019, 5:14:23 PM11/18/19

to Gurunathreddy M, Keras-users

https://www.tensorflow.org/api_docs/python/tf/data/Dataset#from_tensor_slices

This page has this line:

# d = Dataset.from_tensor_slices([14.0, 15.0, 16.0])

I think you just give it one numpy array, not a Python tuple of 2 arrays.

--
You received this message because you are subscribed to the Google Groups "Keras-users" group.
To unsubscribe from this group and stop receiving emails from it, send an email to keras-users...@googlegroups.com.
To view this discussion on the web, visit https://groups.google.com/d/msgid/keras-users/18c36aea-2881-4728-a446-e3f10ae8ff66%40googlegroups.com.

Lance Norskog
lance....@gmail.com
Redwood City, CA

Reply all

Reply to author

Forward

0 new messages