Classify single input using trained model in python

329 views

Skip to first unread message

Oliver Coleman

unread,

Jun 13, 2015, 11:10:57 PM6/13/15

to caffe...@googlegroups.com

I have what seems like should be a simple task but I can't figure it out from the examples or API documentation. I've successfully trained and tested a network intended to be a pixel classifier (provide a context window of some rectangular section of an image, and the network should classify the pixel at the centre of the window). The input comes from an LMDB layer in which the items are generated from sections/windows of RGB+Depth image pairs, with each item being assigned a category/label. The problem is that when I try and use this trained network to directly classify input. I (think) I've got as far as correctly feeding the data into the trained network, but I don't know how to extract the output from it. I've attached the code I used to generate the DB (function generate_db in nnfd.py), and the network (net2.prototxt) and training solver.prototxt) protobufs. My attempt at the code to use the trained network to classify windows (the same size and processed in the same way as the training examples) is below. Any assistance would be greatly appreciated. :)

import sys
import math
import random
import numpy as np
import caffe
import lmdb
from PIL import Image
import shutil

def detect(image_path_base, net_proto_path, net_model_path):
    net = caffe.Net(net_proto_path, net_model_path, caffe.TEST)
    
    window_size = net.blobs['data'].data.shape[2]
    print "Window size for network is", window_size
    halfws = window_size / 2
    
    image = np.array(Image.open(image_path_base + "_rgb.png")).transpose((1,0,2))
    image_depth = np.array(Image.open(image_path_base + "_d.png")).transpose((1,0,2))
    # Combine RGB and depth images, only keeping blue and alpha channels from depth image.
    # Final array has form [x][y][R,G,B,Argb,D,Ad] 
    # where Argb is the alpha channel from the RGB image and Ad is the alpha channel from the depth image.
    image = np.append(image, image_depth[:,:,2:], 2)
    
    inspect_pixels = [(x, y) for x in range(halfws, image.shape[0]-halfws) for y in range(halfws, image.shape[1]-halfws)]
    
    print "Inspecting", inspect_pixels, "pixels."
    
    for xy in inspect_pixels:
        window = image[xy[0]-halfws:xy[0]+halfws+1, xy[1]-halfws:xy[1]+halfws+1]
        
        # Check if any pixels are transparent (missing data)
        for wp in np.nditer(window, flags=['external_loop'], order='C'):
            if wp[3] != 255 or wp[5] != 255:
                break;
        else:
            # All pixels in the window have data, let's run detection on it.
            # Remove alpha channels.
            window = np.delete(window, [3, 5], 2)
            # Normalise so total sum is 0
            window = window - window.mean()
            # transpose to channels, height, width
            window = window.transpose((2,1,0))
            
            # Feed data into network.
            net.blobs['data'].data[...] = window

            # Run all layers of network and the output (?)
            out = net.forward()

            ###################### NOW WHAT... ##########################

            #print out
            #print xy, window, out #net.blobs['ip2'].data
            #print net.blobs['ip2'].data

solver.prototxt

net2.prototxt

nnfd.py

Oliver Coleman

unread,

Jun 15, 2015, 8:21:47 PM6/15/15

to caffe...@googlegroups.com

Figured it out. I had to create a "deploy" version of the prototxt description of the network layers, and slightly modify the code to supply data to the network and read the output from it. For others who are in the same predicament: I've attached the deploy prototxt (complete with change comments) so you can compare the test/train version with a deploy version, and the updated code is below with the critical changes in bold:


def detect(image_path_base, net_proto_path, net_model_path):
    caffe.set_mode_cpu()

    net = caffe.Net(net_proto_path, net_model_path, caffe.TEST)
    
    window_size = net.blobs['data'].data.shape[2]
    print "Window size for network is", window_size
    halfws = window_size / 2

    # Reshape the net for single input.
    net.blobs['data'].reshape(1, 4, window_size, window_size)

    
    image = np.array(Image.open(image_path_base + "_rgb.png")).transpose((1,0,2))
    image_depth = np.array(Image.open(image_path_base + "_d.png")).transpose((1,0,2))
    # Combine RGB and depth images, only keeping blue and alpha channels from depth image.
    # Final array has form [x][y][R,G,B,Argb,D,Ad] 
    # where Argb is the alpha channel from the RGB image and Ad is the alpha channel from the depth image.
    image = np.append(image, image_depth[:,:,2:], 2)
    
    inspect_pixels = [(x, y) for x in range(halfws, image.shape[0]-halfws) for y in range(halfws, image.shape[1]-halfws)]

    print "Inspecting", len(inspect_pixels), "pixels."
    
    # Generate colours to build a classification image.
    class_image = Image.new("RGB", (image.shape[0], image.shape[1]))
    class_count = net.blobs['prob'].data.shape[1]
    RGB_tuples = [colorsys.hsv_to_rgb(x*1.0/class_count, 0.8, 0.6) for x in range(class_count)]
    RGB_tuples = [(int(round(x[0]*255)), int(round(x[1]*255)), int(round(x[2]*255))) for x in RGB_tuples]
    
    category_pixels = {}

    for xy in inspect_pixels:
        window = image[xy[0]-halfws:xy[0]+halfws+1, xy[1]-halfws:xy[1]+halfws+1]
        
        # Check if any pixels are transparent (missing data)
        for wp in np.nditer(window, flags=['external_loop'], order='C'):
            if wp[3] != 255 or wp[5] != 255:
                break;
        else:
            # All pixels in the window have data, let's run detection on it.
            # Remove alpha channels.
            window = np.delete(window, [3, 5], 2)
            
            # Normalise so total sum is 0
            window = window - window.mean()
            # transpose to channels, height, width
            window = window.transpose((2,1,0))

            # Scale from [0,255] to [0,1] (in training/testing this is performed by the data input layer).
            window = window / 255

            
            net.blobs['data'].data[...] = window

            out = net.forward()
            
            category = out['prob'].argmax()
            category_pixels.setdefault(category, list()).append(xy)
            
            class_image.putpixel(xy, RGB_tuples[category])
    
    for (category, xy) in category_pixels.viewitems():
        print category, xy
        
    class_image.save(image_path_base + "_predict.png")