how to interpretate output probabilities of a net

86 views
Skip to first unread message

Benedetta Savelli

unread,
Mar 18, 2016, 11:04:17 AM3/18/16
to Caffe Users
Hi everybody. I tried Alexnet on my own data for a two-output classification problem. 
this is my "net".prototxt

name: "AlexNet"

layer {
  name: "data"
  type: "Data"
  top: "data"
  transform_param {
    mirror: true
    mean_file: "/home/deepcuda/Scrivania/caffe/caffe/data/ilsvrc12/imagenet_mean_65_completo_new_label.binaryproto" 
    scale:0.0417226898880303
  }
  data_param {
    source: "/home/deepcuda/Scrivania/caffe/caffe/dati_caffe/image_train_lmdb_65_new_not_skeleton"
    batch_size: 64
    backend: LMDB
  }
}
layer {
  name: "conv1"
  type: "Convolution"
  bottom: "data"
  top: "conv1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 96
    kernel_size: 11
    stride: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}
layer {
  name: "relu1"
  type: "ReLU"
  bottom: "conv1"
  top: "conv1"
}
layer {
  name: "norm1"
  type: "LRN"
  bottom: "conv1"
  top: "norm1"
  lrn_param {
    local_size: 5
    alpha: 0.0001
    beta: 0.75
  }
}
layer {
  name: "pool1"
  type: "Pooling"
  bottom: "norm1"
  top: "pool1"
  pooling_param {
    pool: MAX
    kernel_size: 3
    stride: 2
  }
}
layer {
  name: "conv2"
  type: "Convolution"
  bottom: "pool1"
  top: "conv2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 256
    pad: 2
    kernel_size: 5
    group: 2
    weight_filler {
      type: "xavier"
      
    }
    bias_filler {
      type: "constant"
      
    }
  }
}
layer {
  name: "relu2"
  type: "ReLU"
  bottom: "conv2"
  top: "conv2"
}
layer {
  name: "norm2"
  type: "LRN"
  bottom: "conv2"
  top: "norm2"
  lrn_param {
    local_size: 5
    alpha: 0.0001
    beta: 0.75
  }
}
layer {
  name: "pool2"
  type: "Pooling"
  bottom: "norm2"
  top: "pool2"
  pooling_param {
    pool: MAX
    kernel_size: 3
    stride: 2
  }
}
layer {
  name: "conv3"
  type: "Convolution"
  bottom: "pool2"
  top: "conv3"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 384
    pad: 1
    kernel_size: 3
    weight_filler {
      type: "xavier"
      std: 0.01
    }
    bias_filler {
      type: "constant"
    }
  }
}
layer {
  name: "relu3"
  type: "ReLU"
  bottom: "conv3"
  top: "conv3"
}
layer {
  name: "conv4"
  type: "Convolution"
  bottom: "conv3"
  top: "conv4"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 384
    pad: 1
    kernel_size: 3
    group: 2
    weight_filler {
      type: "xavier"
      
    }
    bias_filler {
      type: "constant"
      
    }
  }
}
layer {
  name: "relu4"
  type: "ReLU"
  bottom: "conv4"
  top: "conv4"
}
layer {
  name: "conv5"
  type: "Convolution"
  bottom: "conv4"
  top: "conv5"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 256
    pad: 1
    kernel_size: 3
    group: 2
    weight_filler {
      type: "xavier"
      
    }
    bias_filler {
      type: "constant"
      
    }
  }
}
layer {
  name: "relu5"
  type: "ReLU"
  bottom: "conv5"
  top: "conv5"
}
layer {
  name: "pool5"
  type: "Pooling"
  bottom: "conv5"
  top: "pool5"
  pooling_param {
    pool: MAX
    kernel_size: 3
    stride: 2
  }
}
layer {
  name: "fc6"
  type: "InnerProduct"
  bottom: "pool5"
  top: "fc6"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  inner_product_param {
    num_output: 1024
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0.1
    }
  }
}
layer {
  name: "relu6"
  type: "ReLU"
  bottom: "fc6"
  top: "fc6"
}
layer {
  name: "drop6"
  type: "Dropout"
  bottom: "fc6"
  top: "fc6"
  dropout_param {
    dropout_ratio: 0.5
  }
}
layer {
  name: "fc7"
  type: "InnerProduct"
  bottom: "fc6"
  top: "fc7"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  inner_product_param {
    num_output: 512
    weight_filler {
      type: "xavier"
    
    }
    bias_filler {
      type: "constant"
      value: 0.1
    }
  }
}
layer {
  name: "relu7"
  type: "ReLU"
  bottom: "fc7"
  top: "fc7"
}
layer {
  name: "drop7"
  type: "Dropout"
  bottom: "fc7"
  top: "fc7"
  dropout_param {
    dropout_ratio: 0.5
  }
}
layer {
  name: "fc8"
  type: "InnerProduct"
  bottom: "fc7"
  top: "fc8"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  inner_product_param {
    num_output: 2
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "loss"
  type: "Softmax"
  bottom: "fc8"
  top: "loss"
}

and this is my solver.prototxt

# The train/test net protocol buffer definition

net: "/home/deepcuda/Scrivania/caffe/caffe/codice_retina/modifiche_riproduzione_esempio/prova3_new.prototxt"


base_lr: 0.001     # begin training at a learning rate of 0.01 = 1e-2

lr_policy: "step" # learning rate policy: drop the learning rate in "steps"
                  # by a factor of gamma every stepsize iterations

gamma: 0.1        # drop the learning rate by a factor of 10
                  # (i.e., multiply it by a factor of gamma = 0.1)

stepsize: 10000  # drop the learning rate every 100K iterations

#max_iter: 35000  # train for 350K iterations total
max_iter: 35000

momentum: 0.9

iter_size: 2

# snapshot intermediate results

snapshot: 5000
snapshot_prefix: "/home/deepcuda/Scrivania/caffe/caffe/codice_retina/modifiche_riproduzione_esempio/snapshot_prova3_new_not_skeleton/"

# solver mode: CPU or GPU

solver_mode: GPU

NOW I'M TRYING TO TEST THE NETWORK: I WANT TO OBTAIN THE OUTPUT PROBABILITY OF AN IMAGE (THAT IS DIFFERENT FROM THOSE OF THE TRAIN SET). I'M USING THIS PYTHON CODE :

import scipy
import numpy
from scipy import misc
import caffe
import sys
caffe_root = '/home/deepcuda/Scrivania/caffe/caffe/'  
sys.path.insert(0, caffe_root + 'python')




####################################caricamento pathname################################
pathdir1="/home/deepcuda/Scrivania/dataset2/"
pathdir2="/home/deepcuda/Scrivania/dataset2/"
pathdir3="/home/deepcuda/Scrivania/dataset2/"
test_imgName=[]
test_maskName=[]
test_grtName=[]

for num in range(1,3): 
    string1=str(num)
    string2="_image.jpg"
    string=pathdir1+string1+string2     
    test_imgName.append(string)
    test_maskName.append(string)
    test_grtName.append(string)
    print(test_imgName[num-1])

test_original_images=numpy.zeros((2,584,565))
test_grt_images=numpy.zeros((2,584,565))
test_mask_images=numpy.zeros((2,584,565))

for num in range(1,3):
    test_original_images[num-1]=misc.imread(test_imgName[num-1])
    test_grt_images[num-1]=misc.imread(test_maskName[num-1])
    test_mask_images[num-1]=misc.imread(test_grtName[num-1])
    
print('test_original_images.shape')    
print(test_original_images.shape)    

caffe.set_mode_gpu()


model_def = caffe_root + 'codice_retina/modifiche_riproduzione_esempio/prova3_new.prototxt'
model_weights = caffe_root + 'codice_retina/modifiche_riproduzione_esempio/snapshot_prova3_new_not_skeleton/_iter_30000.caffemodel'



net = caffe.Net(model_def,      # defines the structure of the model
                model_weights,  # contains the trained weights
                caffe.TEST)     # use test mode (e.g., don't perform dropout)
                

net.blobs['data'].reshape(1,        # batch size
                          1,         # 3-channel (BGR) images
                          65, 65)  # image size is 227x227
                          

               
mu = numpy.load(caffe_root + 'data/ilsvrc12/imagenet_mean_65_new_label.npy')
mu = mu.mean(1).mean(1)  # average over pixels to obtain the mean (BGR) pixel values
print('mu.shape')
print(mu.shape)
mu=mu[0:1]

transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})

#transformer.set_transpose('data', (2,0,1))  # move image channels to outermost dimension
transformer.set_mean('data', mu)            # subtract the dataset-mean value in each channel
#transformer.set_raw_scale('data', 255)      # rescale from [0, 1] to [0, 255]
#transformer.set_channel_swap('data', (2,1,0))  # swap channels from RGB to BGR        


print('net.blobs[data].data.shape')
print(net.blobs['data'].data.shape)

                          
                
b=test_original_images[0:1,0:65,0:65]
print('b.shape')
print(b.shape)                

transformed_image = transformer.preprocess('data', b)

print('shape immagine trasformata')
print(transformed_image.shape)
transformed_image=transformed_image[0]

print('shape immagine trasformata')
print(transformed_image.shape)

# copy the image data into the memory allocated for the net
net.blobs['data'].data[...] = transformed_image


### perform classification
output = net.forward()

output_prob = output['loss']
print(output_prob)
print(output_prob.shape)




THE VALUES I OBTAIN IN OUTPUT ARE:
[[ 0.57784313  0.42215687]
 [ 0.49116096  0.50883901]
 [ 0.53727967  0.4627203 ]
 [ 0.54450911  0.45549095]
 [ 0.56068432  0.43931565]
 [ 0.53982878  0.46017125]
 [ 0.49977863  0.50022143]
 [ 0.5512988   0.44870117]
 [ 0.54546332  0.45453668]
 [ 0.48363432  0.51636565]
 [ 0.49135649  0.50864351]
 [ 0.55865842  0.44134155]
 [ 0.65845817  0.3415418 ]
 [ 0.58295047  0.41704956]
 [ 0.57029867  0.42970127]
 [ 0.523287    0.47671294]
 [ 0.50206202  0.49793801]
 [ 0.56554979  0.43445024]
 [ 0.52870554  0.47129443]
 [ 0.51294541  0.48705459]
 [ 0.55717754  0.44282249]
 [ 0.47260985  0.52739012]
 [ 0.50372535  0.49627468]
 [ 0.55172133  0.4482787 ]
 [ 0.48262683  0.5173732 ]
 [ 0.58251905  0.41748092]
 [ 0.49053943  0.50946057]
 [ 0.49907571  0.50092435]
 [ 0.4999122   0.5000878 ]
 [ 0.56121379  0.43878621]
 [ 0.48556128  0.51443875]
 [ 0.5404231   0.4595769 ]
 [ 0.60585636  0.39414361]
 [ 0.48810562  0.5118944 ]
 [ 0.53850931  0.46149069]
 [ 0.54466194  0.45533806]
 [ 0.48128659  0.51871341]
 [ 0.51208222  0.48791778]
 [ 0.52174073  0.47825924]
 [ 0.49407539  0.50592458]
 [ 0.52621222  0.47378781]
 [ 0.59144926  0.40855068]
 [ 0.53040445  0.46959561]
 [ 0.4654443   0.53455573]
 [ 0.50830644  0.4916935 ]
 [ 0.48784629  0.51215369]
 [ 0.48705718  0.51294279]
 [ 0.60839272  0.39160728]
 [ 0.57995975  0.42004025]
 [ 0.52602094  0.47397906]
 [ 0.53539646  0.46460348]
 [ 0.53561234  0.46438771]
 [ 0.5237124   0.4762876 ]
 [ 0.54550368  0.45449629]
 [ 0.52077532  0.47922465]
 [ 0.47821584  0.52178413]
 [ 0.48000407  0.51999587]
 [ 0.52162433  0.47837567]
 [ 0.47795764  0.52204239]
 [ 0.48434082  0.51565921]
 [ 0.55128402  0.44871598]
 [ 0.55382776  0.44617224]
 [ 0.49549878  0.50450122]
 [ 0.54070073  0.45929927]]
(64, 2)

Why are there 64(x2) values?? i want to obtain the output probabilities of a single image (that is the one i gave to the net in the forward pass). How can i do it?





 




Jan

unread,
Mar 18, 2016, 11:18:08 AM3/18/16
to Caffe Users
That is actually quite easy: for what you want to do you cannot use the same network config as for training. You need a "deploy" config for that. In short, remove the data layer and replace it by "input" and "input_shape" parameters, in your case

input: "data"
input_shape
: 1
input_shape: 1
input_shape: 65
input_shape: 65

if your images are 65x65 pixels. This tells caffe that it should create an input blob called "data", but its data is filled through the API. If you just use a data layer as for training, all the manual reshaping and data filling you did is lost when calling forward() because the data layer outputs will overwrite everything. And as such you have a 64 batchsize as before. That is why you see what you see there.

Jan

Ahmed Ibrahim

unread,
Mar 19, 2016, 9:14:52 AM3/19/16
to Caffe Users
I agree with Jan and i have a question , how can you train your network without labels ?

Jan

unread,
Mar 21, 2016, 4:20:11 AM3/21/16
to Caffe Users
You need to look into "unsupervised learning". Regular CNNs don't work without labels, but you can have a look into autoencoders, deep belief networks and similar network models. Most of them you cannot train with caffe I'm afraid.

Jan
Reply all
Reply to author
Forward
0 new messages