Trainning with 13 class

Antonio Paes

unread,

Jun 14, 2015, 7:03:00 PM6/14/15

to caffe...@googlegroups.com

Hy guys, i'm trying train my network based on imagenet, I've 13 class (0, 1, 3, 7....13), and I've 360K images for train and 180K for tests, all images have 54x54, follow my architecture:

layer {

type: "Data"

top: "data"

top: "label"

include {

phase: TRAIN

}

transform_param {

mirror: false

crop_size: 54

mean_file: "data/ilsvrc12/imagenet_mean.binaryproto"

}

data_param

{

source: "examples/imagenet/ilsvrc12_train_lmdb"

batch_size: 100

backend: LMDB

}

layer {

type: "Data"

top: "data"

top: "label"

include {

phase: TEST

}

transform_param {

mirror: false

crop_size: 54

mean_file: "data/ilsvrc12/imagenet_mean.binaryproto"

}

data_param {

source: "examples/imagenet/ilsvrc12_val_lmdb"

batch_size: 100

backend: LMDB

}

layer {

type: "Convolution"

bottom: "data"

top: "conv1"

param {

lr_mult: 1

decay_mult: 1

}

param {

lr_mult: 2

decay_mult: 0

}

convolution_param {

num_output: 64

kernel_size: 6

stride: 1

weight_filler {

type: "gaussian"

std: 0.01

}

bias_filler {

type: "constant"

value: 0

}

layer {

type: "ReLU"

bottom: "conv1"

top: "conv1"

}

layer {

type: "Pooling"

bottom: "conv1"

top: "pool1"

pooling_param {

pool: MAX

kernel_size: 3

stride: 1

}

layer {

type: "Convolution"

bottom: "pool1"

top: "conv2"

param {

lr_mult: 1

decay_mult: 1

}

param {

lr_mult: 2

decay_mult: 0

}

convolution_param {

num_output: 96

pad: 0

kernel_size: 4

group: 2

weight_filler {

type: "gaussian"

std: 0.01

}

bias_filler {

type: "constant"

value: 1

}

layer {

type: "ReLU"

bottom: "conv2"

top: "conv2"

}

layer {

type: "Pooling"

bottom: "conv2"

top: "pool2"

pooling_param {

pool: MAX

kernel_size: 3

stride: 1

}

layer {

type: "LRN"

bottom: "pool2"

top: "norm2"

lrn_param {

local_size: 5

alpha: 0.0001

beta: 0.75

}

layer {

type: "Convolution"

bottom: "norm2"

top: "conv3"

param {

lr_mult: 1

decay_mult: 1

}

param {

lr_mult: 2

decay_mult: 0

}

convolution_param {

num_output: 128

pad: 1

kernel_size: 4

weight_filler {

type: "gaussian"

std: 0.01

}

bias_filler {

type: "constant"

value: 0

}

layer {

type: "ReLU"

bottom: "conv3"

top: "conv3"

}

layer {

type: "Pooling"

bottom: "conv3"

top: "pool3"

pooling_param {

pool: MAX

kernel_size: 3

stride: 2

}

layer {

type: "Convolution"

bottom: "pool3"

top: "conv4"

param {

lr_mult: 1

decay_mult: 1

}

param {

lr_mult: 2

decay_mult: 0

}

convolution_param {

num_output: 128

pad: 1

kernel_size: 3

weight_filler {

type: "gaussian"

std: 0.01

}

bias_filler {

type: "constant"

value: 0

}

layer {

type: "ReLU"

bottom: "conv4"

top: "conv4"

}

layer {

type: "Pooling"

bottom: "conv4"

top: "pool4"

pooling_param {

pool: MAX

kernel_size: 3

stride: 2

}

layer {

type: "InnerProduct"

bottom: "pool4"

top: "fc1"

param {

lr_mult: 1

decay_mult: 1

}

param {

lr_mult: 2

decay_mult: 0

}

inner_product_param {

num_output: 4096

weight_filler {

type: "gaussian"

std: 0.005

}

bias_filler {

type: "constant"

value: 1

}

layer {

type: "ReLU"

bottom: "fc1"

top: "fc1"

}

#layer {

# name: "drop1"

# type: "Dropout"

# bottom: "fc1"

# top: "fc1"

# dropout_param {

# dropout_ratio: 0.5

# }

#}

layer {

type: "InnerProduct"

bottom: "fc1"

top: "fc2"

param {

lr_mult: 1

decay_mult: 1

}

param {

lr_mult: 2

decay_mult: 0

}

inner_product_param {

num_output: 1000

weight_filler {

type: "gaussian"

std: 0.005

}

bias_filler {

type: "constant"

value: 1

}

layer {

type: "ReLU"

bottom:"fc2"

top: "fc2"

}

layer {

type: "InnerProduct"

bottom: "fc2"

top: "fc3_13"

param {

lr_mult: 1

decay_mult: 1

}

param {

lr_mult: 2

decay_mult: 0

}

inner_product_param {

num_output: 13

weight_filler {

type: "gaussian"

std: 0.005

}

bias_filler {

type: "constant"

value: 1

}

layer {

type: "ReLU"

bottom: "fc3_13"

top: "fc3_13"

}

layer {

type: "Accuracy"

bottom: "fc3_13"

bottom: "label"

top: "accuracy"

include {

phase: TEST

}

layer {

type: "SoftmaxWithLoss"

bottom: "fc3_13"

bottom: "label"

top: "loss"

}

my accuracy rate don't pass of 14%, any idea ?

thanks guys.

npit

unread,

Jun 15, 2015, 7:46:57 AM6/15/15

to caffe...@googlegroups.com

Have you also tested your net elsewere?

I finetuned on a 60-label dataset. Accuracy saturated around 45 % according to the Testing phase, but testing the network on another set i got < 1 % accuracy.

npit

unread,

Jun 15, 2015, 7:48:37 AM6/15/15

to caffe...@googlegroups.com

So you are using a subset of the imagenet classes?

Do the training/testing dbs contain images from these classes only?

Because they have the same name as the example ones.

Antonio Paes

unread,

Jun 15, 2015, 12:00:47 PM6/15/15

to caffe...@googlegroups.com

First thanks for answer npit,

first I'm using multipie dataset, I don't use imagenet classes, and i using only multipie for train and test, but with some constraint, the subject which appear in train don't appear in test.

I think in use fine-tuning, but when i use a pre-trained model from imagenet the architectures don't match, you have experience in fine-tuning?

thanks.

npit

unread,

Jun 15, 2015, 3:47:25 PM6/15/15

to caffe...@googlegroups.com

The data layers of the net you posted takes input from the imagenet databases, though.

first I'm using multipie dataset, I don't use imagenet classes, and i using only multipie for train and test, but with some constraint, the subject which appear in train don't appear in test.

I'm afraid I did not understand that.

Antonio Paes

unread,

Jun 16, 2015, 1:10:39 PM6/16/15

to caffe...@googlegroups.com

I'll try make more tests with fine-tuning, and report later.

Thanks.

Uzair Ahmed

unread,

Jan 27, 2016, 3:02:15 AM1/27/16

to Caffe Users

Hi npit,

When you fone tuned your model for a 60 class problem, Did you change the last fc layer output from 1000 to 60?

I have been trying to modify Alex-net to address a 200 class problem but the network does not converge when i change the last fc layer output from 1000 to 200. Any hints?

Thanks.

npit

unread,

Jan 28, 2016, 2:23:24 PM1/28/16

to Caffe Users

Hi Uzair.

You have to also change the name of the layer. Also is your finetuning data shuffled and consisting of exactly 60 classes(0 to 59)?
What's the behaviour of the training / test loss?

Reply all

Reply to author

Forward