I've successfully trained a convolutional auto-encoder on a 'smaller' (~17.7k samples) dataset however I do not have enough memory to scale it up to a larger set (~1.2M samples)
Most sources have suggested using the fit_generator function, however I have not been able to successfully run it due to a dimensionality mismatch in the final layer of the network. The final layer is correctly outputting a (300, 300, 3) image, the same size as the input image, however the generator is yielding the full batch size (50, 300, 300, 30 and causing the error.
I am very new to generators so there's a high chance it is a trivial solution, but I have tried several alterations with no success
thanks in regards
CODE:
def BatchGenerator(tarFile,batch_size):
tar = tarfile.open(FILE_PATH + '/' + tarFile, "r:gz")
members=tar.getnames()
while True:
batch_labels = np.zeros((batch_size,1))
batch_features = np.zeros((batch_size, 300, 300, 3))
count=0
while count<batch_size:
member= random.choice(members)
f = tar.extractfile(member)
if f:
content = f.read()
if content:
try:
img = Image.open(f)
if img.mode == 'RGBA':
img = img.convert('RGB')
imgArray = np.array(img)
batch_features[count]=imgArray
count+=1
print(count)
except:
continue
yield batch_features,batch_labels
a=BatchGenerator('1.tar.gz',batch_size)
model = Sequential()
model.add(Convolution2D(nb_filters, kernel_size[0], kernel_size[1], border_mode='same', input_shape=(300, 300, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=pool_size, border_mode='same'))
model.add(Convolution2D(nb_filters, kernel_size[0], kernel_size[1], border_mode='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(3, 3), border_mode='same'))
model.add(Convolution2D(nb_filters, kernel_size[0], kernel_size[1], border_mode='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=pool_size, border_mode='same'))
model.add(Convolution2D(nb_filters, kernel_size[0], kernel_size[1], border_mode='same'))
model.add(Activation('relu'))
model.add(UpSampling2D(pool_size))
model.add(Convolution2D(nb_filters, kernel_size[0], kernel_size[1], border_mode='same'))
model.add(Activation('relu'))
model.add(UpSampling2D((3, 3)))
model.add(Convolution2D(nb_filters, kernel_size[0], kernel_size[1], border_mode='same'))
model.add(Activation('relu'))
model.add(UpSampling2D(pool_size))
model.add(Convolution2D(3, kernel_size[0], kernel_size[1], border_mode='same'))
model.add(Activation('sigmoid'))
model.summary()
checkpointer = ModelCheckpoint(filepath="./tmp/weights.hdf5", verbose=1, save_best_only=True)
logger = CSVLogger("./log.csv", separator=',', append=False)
model.compile(loss='mse', optimizer=RMSprop(), metrics=['accuracy'])
# history = model.fit(X_train, X_train, callbacks=[checkpointer, logger], batch_size=batch_size, nb_epoch=nb_epoch, verbose=1, validation_data=(X_train, X_train))
history = model.fit_generator(a, samples_per_epoch = 50, nb_epoch = nb_epoch, verbose=2, callbacks=[], validation_data=None, class_weight=None, nb_worker=1)
ERROR:
Traceback (most recent call last):
File "/home/kre033/PycharmProjects/DeepLearning/RBM/icons/iconAutoencoder.py", line 241, in <module>
history = model.fit_generator(a, samples_per_epoch = 50, nb_epoch = nb_epoch, verbose=2, callbacks=[], validation_data=None, class_weight=None, nb_worker=1)
File "/usr/local/lib/python3.5/dist-packages/keras/models.py", line 907, in fit_generator
pickle_safe=pickle_safe)
File "/usr/local/lib/python3.5/dist-packages/keras/engine/training.py", line 1451, in fit_generator
class_weight=class_weight)
File "/usr/local/lib/python3.5/dist-packages/keras/engine/training.py", line 1220, in train_on_batch
check_batch_dim=True)
File "/usr/local/lib/python3.5/dist-packages/keras/engine/training.py", line 967, in _standardize_user_data
exception_prefix='model target')
File "/usr/local/lib/python3.5/dist-packages/keras/engine/training.py", line 100, in standardize_input_data
str(array.shape))
Exception: Error when checking model target: expected activation_7 to have 4 dimensions, but got array with shape (50, 1)