import sysimport mathimport randomimport numpy as npimport caffeimport lmdbfrom PIL import Imageimport shutil
def detect(image_path_base, net_proto_path, net_model_path): net = caffe.Net(net_proto_path, net_model_path, caffe.TEST) window_size = net.blobs['data'].data.shape[2] print "Window size for network is", window_size halfws = window_size / 2 image = np.array(Image.open(image_path_base + "_rgb.png")).transpose((1,0,2)) image_depth = np.array(Image.open(image_path_base + "_d.png")).transpose((1,0,2)) # Combine RGB and depth images, only keeping blue and alpha channels from depth image. # Final array has form [x][y][R,G,B,Argb,D,Ad] # where Argb is the alpha channel from the RGB image and Ad is the alpha channel from the depth image. image = np.append(image, image_depth[:,:,2:], 2) inspect_pixels = [(x, y) for x in range(halfws, image.shape[0]-halfws) for y in range(halfws, image.shape[1]-halfws)] print "Inspecting", inspect_pixels, "pixels." for xy in inspect_pixels: window = image[xy[0]-halfws:xy[0]+halfws+1, xy[1]-halfws:xy[1]+halfws+1] # Check if any pixels are transparent (missing data) for wp in np.nditer(window, flags=['external_loop'], order='C'): if wp[3] != 255 or wp[5] != 255: break; else: # All pixels in the window have data, let's run detection on it. # Remove alpha channels. window = np.delete(window, [3, 5], 2) # Normalise so total sum is 0 window = window - window.mean() # transpose to channels, height, width window = window.transpose((2,1,0))
# Feed data into network.
net.blobs['data'].data[...] = window
# Run all layers of network and the output (?)
out = net.forward()
###################### NOW WHAT... ##########################
#print out #print xy, window, out #net.blobs['ip2'].data #print net.blobs['ip2'].data
def detect(image_path_base, net_proto_path, net_model_path): caffe.set_mode_cpu()
net = caffe.Net(net_proto_path, net_model_path, caffe.TEST) window_size = net.blobs['data'].data.shape[2] print "Window size for network is", window_size halfws = window_size / 2
# Reshape the net for single input. net.blobs['data'].reshape(1, 4, window_size, window_size)
image = np.array(Image.open(image_path_base + "_rgb.png")).transpose((1,0,2)) image_depth = np.array(Image.open(image_path_base + "_d.png")).transpose((1,0,2)) # Combine RGB and depth images, only keeping blue and alpha channels from depth image. # Final array has form [x][y][R,G,B,Argb,D,Ad] # where Argb is the alpha channel from the RGB image and Ad is the alpha channel from the depth image. image = np.append(image, image_depth[:,:,2:], 2) inspect_pixels = [(x, y) for x in range(halfws, image.shape[0]-halfws) for y in range(halfws, image.shape[1]-halfws)]
print "Inspecting", len(inspect_pixels), "pixels." # Generate colours to build a classification image. class_image = Image.new("RGB", (image.shape[0], image.shape[1])) class_count = net.blobs['prob'].data.shape[1] RGB_tuples = [colorsys.hsv_to_rgb(x*1.0/class_count, 0.8, 0.6) for x in range(class_count)] RGB_tuples = [(int(round(x[0]*255)), int(round(x[1]*255)), int(round(x[2]*255))) for x in RGB_tuples] category_pixels = {}
for xy in inspect_pixels: window = image[xy[0]-halfws:xy[0]+halfws+1, xy[1]-halfws:xy[1]+halfws+1] # Check if any pixels are transparent (missing data) for wp in np.nditer(window, flags=['external_loop'], order='C'): if wp[3] != 255 or wp[5] != 255: break; else: # All pixels in the window have data, let's run detection on it. # Remove alpha channels. window = np.delete(window, [3, 5], 2) # Normalise so total sum is 0 window = window - window.mean() # transpose to channels, height, width window = window.transpose((2,1,0))
# Scale from [0,255] to [0,1] (in training/testing this is performed by the data input layer). window = window / 255
net.blobs['data'].data[...] = window
out = net.forward() category = out['prob'].argmax() category_pixels.setdefault(category, list()).append(xy) class_image.putpixel(xy, RGB_tuples[category])
for (category, xy) in category_pixels.viewitems(): print category, xy class_image.save(image_path_base + "_predict.png")