Multi clas classification of image data set

48 views

caffeclassificationimageimage_datalmbdpythonubuntu

Skip to first unread message

spuran yarram

unread,

Apr 27, 2016, 11:08:33 AM4/27/16

to Caffe Users

I have multi class image classification problem , as a i wanted to google Le Ne t for this i resized my data and converted t to two lmdb files one for data and one for lables and i am unable to use these lmdb files for further processing.. can any one help me .The following out is the code i used for lmdb conversion:

import lmdb
import sys

import re, fileinput, math
import numpy as np

# Make sure that caffe is on the python path:
# caffe_root = '/home/spuran/Caffe/caffe'  # this file is expected to be in {caffe_root}/examples
# import sys
# sys.path.insert(0, caffe_root + 'python')

import caffe

# Command line to check created files:
# python -mlmdb stat --env=./Downloads/caffe-master/data/liris-accede/train_score_lmdb/

data = 'test.txt'
lmdb_data_name = 'test_data_lmdb'
lmdb_label_name = 'train_score_lmdb'

Inputs = []
Labels = []
error_catch_label =[]

for line in fileinput.input(data):
    entries = re.split(',', line.strip())
    a= entries[0]
    Inputs.append(a)
    # print(a[1:-1])
    # b= entries[1]
    # Labels.append(b[1:-1])

print('Writing labels')

# # Size of buffer: 1000 elements to reduce memory consumption
# for idx in range(int(math.ceil(len(Labels)/1000.0))):
#     in_db_label = lmdb.open(lmdb_label_name, map_size=int(1e12))
#     with in_db_label.begin(write=True) as in_txn:
#      try:
#         for label_idx, label_ in enumerate(Labels[(1000*idx):(1000*(idx+1))]):
#             im_dat = caffe.io.array_to_datum(np.array(label_).astype(float).reshape(1,1,1))
#             in_txn.put('{:0>10d}'.format(1000*idx + label_idx), im_dat.SerializeToString())
#
#             string_ = str(1000*idx+label_idx+1) + ' / ' + str(len(Labels))
#             sys.stdout.write("\r%s" % string_)
#             sys.stdout.flush()
#      except(ValueError,IOError,TypeError,AttributeError):
#          print("problem with")
#          print(label_idx)
#          error_catch_label.append(label_idx)
#          continue
#     in_db_label.close()
# print('')

print('Writing image data')
error_catch = []


for idx in range(int(math.ceil(len(Inputs)/1000.0))):
    in_db_data = lmdb.open(lmdb_data_name, map_size=int(1e12))
    with in_db_data.begin(write=True) as in_txn:
        try:
            for in_idx, in_ in enumerate(Inputs[(1000*idx):(1000*(idx+1))]):
                im = caffe.io.load_image(in_)
                print(im,"im")
                im_dat = caffe.io.array_to_datum(im.astype(float).transpose((2, 0, 1)))
                # print(im_dat,"im_dat")
                in_txn.put('{:0>10d}'.format(1000*idx + in_idx), im_dat.SerializeToString())
                # print('{:0>10d}'.format(1000*idx + in_idx), im_dat.SerializeToString())
                string_ = str(1000*idx+in_idx+1) + ' / ' + str(len(Inputs))
                sys.stdout.write("\r%s" % string_)
                sys.stdout.flush()
                # print()



        except(ValueError,IOError,TypeError,AttributeError):
            print("problem with")
            print(in_db_data)
            error_catch.append(string_)
            continue
    in_db_data.close()
print(error_catch)


print('')

Reply all

Reply to author

Forward

0 new messages