Multi clas classification of image data set

48 views
Skip to first unread message

spuran yarram

unread,
Apr 27, 2016, 11:08:33 AM4/27/16
to Caffe Users
I have multi class image classification problem , as a i wanted to google Le Ne t for this i resized my data and converted t to two lmdb files one for data and one for lables and i am unable to use these lmdb files for further processing.. can any one help me .The following out is the code i used for lmdb conversion:
import lmdb
import sys

import re, fileinput, math
import numpy as np

# Make sure that caffe is on the python path:
# caffe_root = '/home/spuran/Caffe/caffe' # this file is expected to be in {caffe_root}/examples
# import sys
# sys.path.insert(0, caffe_root + 'python')

import caffe

# Command line to check created files:
# python -mlmdb stat --env=./Downloads/caffe-master/data/liris-accede/train_score_lmdb/

data = 'test.txt'
lmdb_data_name = 'test_data_lmdb'
lmdb_label_name = 'train_score_lmdb'

Inputs = []
Labels = []
error_catch_label =[]

for line in fileinput.input(data):
entries = re.split(',', line.strip())
a= entries[0]
Inputs.append(a)
# print(a[1:-1])
# b= entries[1]
# Labels.append(b[1:-1])

print('Writing labels')

# # Size of buffer: 1000 elements to reduce memory consumption
# for idx in range(int(math.ceil(len(Labels)/1000.0))):
# in_db_label = lmdb.open(lmdb_label_name, map_size=int(1e12))
# with in_db_label.begin(write=True) as in_txn:
# try:
# for label_idx, label_ in enumerate(Labels[(1000*idx):(1000*(idx+1))]):
# im_dat = caffe.io.array_to_datum(np.array(label_).astype(float).reshape(1,1,1))
# in_txn.put('{:0>10d}'.format(1000*idx + label_idx), im_dat.SerializeToString())
#
# string_ = str(1000*idx+label_idx+1) + ' / ' + str(len(Labels))
# sys.stdout.write("\r%s" % string_)
# sys.stdout.flush()
# except(ValueError,IOError,TypeError,AttributeError):
# print("problem with")
# print(label_idx)
# error_catch_label.append(label_idx)
# continue
# in_db_label.close()
# print('')

print('Writing image data')
error_catch = []


for idx in range(int(math.ceil(len(Inputs)/1000.0))):
in_db_data = lmdb.open(lmdb_data_name, map_size=int(1e12))
with in_db_data.begin(write=True) as in_txn:
try:
for in_idx, in_ in enumerate(Inputs[(1000*idx):(1000*(idx+1))]):
im = caffe.io.load_image(in_)
print(im,"im")
im_dat = caffe.io.array_to_datum(im.astype(float).transpose((2, 0, 1)))
# print(im_dat,"im_dat")
in_txn.put('{:0>10d}'.format(1000*idx + in_idx), im_dat.SerializeToString())
# print('{:0>10d}'.format(1000*idx + in_idx), im_dat.SerializeToString())
string_ = str(1000*idx+in_idx+1) + ' / ' + str(len(Inputs))
sys.stdout.write("\r%s" % string_)
sys.stdout.flush()
# print()



except(ValueError,IOError,TypeError,AttributeError):
print("problem with")
print(in_db_data)
error_catch.append(string_)
continue
in_db_data.close()
print(error_catch)


print('')
Reply all
Reply to author
Forward
0 new messages