Finetuning Caffe Imagenet on CIFAR10 data

3,394 views
Skip to first unread message

Gil Levi

unread,
Oct 7, 2014, 2:21:41 PM10/7/14
to caffe...@googlegroups.com
Hi,

Following the flickr fine-tune and the pascal fine-tune examples, I trying to fine-tune the imagenet network on the CIFAR10 dataset (I resized the images to 256x256).

However, the loss doesn't seem to be reducing. 

Can someone please help me to detect the problem?

Thanks,

Gil

Implementation details:

Here is my train_val prototxt file:

name: "FlickrStyleCaffeNet"
layers {
  name: "data"
  type: DATA
  top: "data"
  top: "label"
  data_param {
    source: "/home/michael/CIFAR10/data_256_resized/resized_train_leveldb"
    mean_file: "/home/michael/CIFAR10/data_256_resized/imagenet_mean_cifar_resized.binaryproto"
    batch_size: 50
    crop_size: 227
    mirror: true
  }
  include: { phase: TRAIN }
}
layers {
  name: "data"
  type: DATA
  top: "data"
  top: "label"
  data_param {
    source: "/home/michael/CIFAR10/data_256_resized/resized_val_leveldb"
    mean_file: "/home/michael/CIFAR10/data_256_resized/imagenet_mean_cifar_resized.binaryproto"
    batch_size: 50
    crop_size: 227
    mirror: false
  }
  include: { phase: TEST }
}
layers {
  name: "conv1"
  type: CONVOLUTION
  bottom: "data"
  top: "conv1"
  blobs_lr: 1
  blobs_lr: 2
  weight_decay: 1
  weight_decay: 0
  convolution_param {
    num_output: 96
    kernel_size: 11
    stride: 4
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layers {
  name: "relu1"
  type: RELU
  bottom: "conv1"
  top: "conv1"
}
layers {
  name: "pool1"
  type: POOLING
  bottom: "conv1"
  top: "pool1"
  pooling_param {
    pool: MAX
    kernel_size: 3
    stride: 2
  }
}
layers {
  name: "norm1"
  type: LRN
  bottom: "pool1"
  top: "norm1"
  lrn_param {
    local_size: 5
    alpha: 0.0001
    beta: 0.75
  }
}
layers {
  name: "conv2"
  type: CONVOLUTION
  bottom: "norm1"
  top: "conv2"
  blobs_lr: 1
  blobs_lr: 2
  weight_decay: 1
  weight_decay: 0
  convolution_param {
    num_output: 256
    pad: 2
    kernel_size: 5
    group: 2
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 1
    }
  }
}
layers {
  name: "relu2"
  type: RELU
  bottom: "conv2"
  top: "conv2"
}
layers {
  name: "pool2"
  type: POOLING
  bottom: "conv2"
  top: "pool2"
  pooling_param {
    pool: MAX
    kernel_size: 3
    stride: 2
  }
}
layers {
  name: "norm2"
  type: LRN
  bottom: "pool2"
  top: "norm2"
  lrn_param {
    local_size: 5
    alpha: 0.0001
    beta: 0.75
  }
}
layers {
  name: "conv3"
  type: CONVOLUTION
  bottom: "norm2"
  top: "conv3"
  blobs_lr: 1
  blobs_lr: 2
  weight_decay: 1
  weight_decay: 0
  convolution_param {
    num_output: 384
    pad: 1
    kernel_size: 3
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layers {
  name: "relu3"
  type: RELU
  bottom: "conv3"
  top: "conv3"
}
layers {
  name: "conv4"
  type: CONVOLUTION
  bottom: "conv3"
  top: "conv4"
  blobs_lr: 1
  blobs_lr: 2
  weight_decay: 1
  weight_decay: 0
  convolution_param {
    num_output: 384
    pad: 1
    kernel_size: 3
    group: 2
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 1
    }
  }
}
layers {
  name: "relu4"
  type: RELU
  bottom: "conv4"
  top: "conv4"
}
layers {
  name: "conv5"
  type: CONVOLUTION
  bottom: "conv4"
  top: "conv5"
  blobs_lr: 1
  blobs_lr: 2
  weight_decay: 1
  weight_decay: 0
  convolution_param {
    num_output: 256
    pad: 1
    kernel_size: 3
    group: 2
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 1
    }
  }
}
layers {
  name: "relu5"
  type: RELU
  bottom: "conv5"
  top: "conv5"
}
layers {
  name: "pool5"
  type: POOLING
  bottom: "conv5"
  top: "pool5"
  pooling_param {
    pool: MAX
    kernel_size: 3
    stride: 2
  }
}
layers {
  name: "fc6"
  type: INNER_PRODUCT
  bottom: "pool5"
  top: "fc6"
  blobs_lr: 1
  blobs_lr: 2
  weight_decay: 1
  weight_decay: 0
  inner_product_param {
    num_output: 4096
    weight_filler {
      type: "gaussian"
      std: 0.005
    }
    bias_filler {
      type: "constant"
      value: 1
    }
  }
}
layers {
  name: "relu6"
  type: RELU
  bottom: "fc6"
  top: "fc6"
}
layers {
  name: "drop6"
  type: DROPOUT
  bottom: "fc6"
  top: "fc6"
  dropout_param {
    dropout_ratio: 0.5
  }
}
layers {
  name: "fc7"
  type: INNER_PRODUCT
  bottom: "fc6"
  top: "fc7"
  # Note that blobs_lr can be set to 0 to disable any fine-tuning of this, and any other, layer
  blobs_lr: 1
  blobs_lr: 2
  weight_decay: 1
  weight_decay: 0
  inner_product_param {
    num_output: 4096
    weight_filler {
      type: "gaussian"
      std: 0.005
    }
    bias_filler {
      type: "constant"
      value: 1
    }
  }
}
layers {
  name: "relu7"
  type: RELU
  bottom: "fc7"
  top: "fc7"
}
layers {
  name: "drop7"
  type: DROPOUT
  bottom: "fc7"
  top: "fc7"
  dropout_param {
    dropout_ratio: 0.5
  }
}
layers {
  name: "fc8_cifar"
  type: INNER_PRODUCT
  bottom: "fc7"
  top: "fc8_cifar"
  # blobs_lr is set to higher than for other layers, because this layer is starting from random while the others are already trained
  blobs_lr: 10
  blobs_lr: 20
  weight_decay: 1
  weight_decay: 0
  inner_product_param {
    num_output: 10
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layers {
  name: "loss"
  type: SOFTMAX_LOSS
  bottom: "fc8_cifar"
  bottom: "label"
}
layers {
  name: "accuracy"
  type: ACCURACY
  bottom: "fc8_flickr"
  bottom: "label"
  top: "accuracy"
  include: { phase: TEST }
}




And here is the solver file:

net: "finetune_train_val_prototxt.txt"
test_iter: 100
test_interval: 1000
# lr for fine-tuning should be lower than when starting from scratch
base_lr: 0.001
lr_policy: "step"
gamma: 0.1
# stepsize should also be lower, as we're closer to being done
stepsize: 20000
display: 20
max_iter: 100000
momentum: 0.9
weight_decay: 0.0005
snapshot: 10000
snapshot_prefix: "finetune_cifar"
# uncomment the following to default to CPU mode solving
solver_mode: CPU

Yangqing Jia

unread,
Oct 7, 2014, 2:25:38 PM10/7/14
to Gil Levi, caffe...@googlegroups.com
It most likely won't work. Imagine what the input would look like: they are 32x32 images resized to 256x256. The Imagenet model takes 7x7 patches, which only correspond to 0.875 pixels in the original image. As a result, they won't see anything interesting but an almost constant-colored patch.

Yangqing

--
You received this message because you are subscribed to the Google Groups "Caffe Users" group.
To unsubscribe from this group and stop receiving emails from it, send an email to caffe-users...@googlegroups.com.
To post to this group, send email to caffe...@googlegroups.com.
To view this discussion on the web visit https://groups.google.com/d/msgid/caffe-users/daf919fb-ba00-43c9-9c8e-e5b767e3ebac%40googlegroups.com.
For more options, visit https://groups.google.com/d/optout.

Gil Levi

unread,
Oct 7, 2014, 2:32:19 PM10/7/14
to caffe...@googlegroups.com, gil.l...@gmail.com
Thanks, I actually didn't feed the "train" binary with the initial weights, so I was training from scratch. 

Still, I'm taking your comment into consideration, but I will give it a shot and see what happens. 


Thanks again,
Gil 

deepcnn

unread,
Oct 7, 2014, 10:26:01 PM10/7/14
to caffe...@googlegroups.com, gil.l...@gmail.com
Does caffe provide a Imagenet model takes 7x7 patches ?  Because I remember the Alex network takes 11*11 patches in the first convolutional layer. If it does, where could I get the model? Thanks evry much!

Yangqing Jia於 2014年10月8日星期三UTC+8上午2時25分38秒寫道:

deepcnn

unread,
Oct 7, 2014, 10:35:28 PM10/7/14
to caffe...@googlegroups.com, gil.l...@gmail.com
A little mofication I want to suggest you: the name of accuracy layer should be cifar10, I am not sure whether this is a really possible lead to problem, but from the view of naming consistence, I think need change. 
layers {
  name: "loss"
  type: SOFTMAX_LOSS
  bottom: "fc8_cifar"
  bottom: "label"
}
layers {
  name: "accuracy"
  type: ACCURACY
  bottom: "fc8_flickr"  #---> fc8_cifar
  bottom: "label"
  top: "accuracy"
  include: { phase: TEST }
}


Gil Levi於 2014年10月8日星期三UTC+8上午2時32分19秒寫道:

Gil Levi

unread,
Oct 8, 2014, 5:32:57 AM10/8/14
to caffe...@googlegroups.com, gil.l...@gmail.com
Thanks for your comment. 

Actually I got it working and currently the accuracy on the validation set is 87% after 3K iteration. Hope I don't have a bug somewhere. 

Regarding your question, the size of the filters in the first later of AlexNet and Caffe Imagenet are indeed 11x11, but there are new deeper models on the "Model Zoo" where the size of the filters is 3x3. 


Thanks again!

Gil. 

Venkatesh N

unread,
Jan 1, 2015, 6:27:30 PM1/1/15
to caffe...@googlegroups.com, gil.l...@gmail.com
Hi,


 
How did you manage to resolve this issue? did you just resize 32x32 to 256x256 and used the fine-tuning on Caffe imagenet network? Were you able to find out the reason about why didn't the loss function decrease with your initial settings?

Thanks,
Venky

Gil Levi

unread,
Jan 2, 2015, 4:22:22 AM1/2/15
to caffe...@googlegroups.com, gil.l...@gmail.com
Hi,

I resized the images to 256x256 when creating the leveldb. 

I have a bug somewhere, I don't really remember where, but I finally got it working.

Gil

Venkatesh N

unread,
Jan 3, 2015, 2:51:04 AM1/3/15
to caffe...@googlegroups.com, gil.l...@gmail.com
Thanks, Gil. 

Martin Yu

unread,
Jan 6, 2015, 11:30:27 PM1/6/15
to caffe...@googlegroups.com, gil.l...@gmail.com
Hi, Gil
I'm trying to resize cifar10 dataset and then wanna feed them into imagenet network. However, the images after resizing are weird. For example:

Top: 32x32 cifar10 image

Bottom: ( resized ) 256x256 image

I use caffe.io.resize_image to do resizing. How do you resize your images? Do you oversample them to obtain ten 227x227 crops? Thank you very much.

Gil Levi

unread,
Jan 13, 2015, 8:37:39 AM1/13/15
to caffe...@googlegroups.com, gil.l...@gmail.com
Hi,

Sorry for the late response.

I'm not resizing my images. Where I created the leveldb, Caffe resized them automatically (I passed it the resize flag).

When testing the net, I just fed the 32x32x3 images to Caffe. 

I can send you the prediction scripts (in python). Keep in mind though that I'm using a somewhat old version of Caffe.

Gil. 

H KK

unread,
Jan 15, 2015, 3:07:51 AM1/15/15
to caffe...@googlegroups.com, gil.l...@gmail.com
how to feed the train binary with the initial weights? The finetune-net.cpp seems be abolished...Thank U!

在 2014年10月8日星期三 UTC+8上午2:32:19,Gil Levi写道:

Gil Levi

unread,
Jan 15, 2015, 6:28:03 AM1/15/15
to caffe...@googlegroups.com, gil.l...@gmail.com
Here:

TOOLS=/home/caffe-master/build/tools
TXT_DIR=/home//prototxt_files

$TOOLS/caffe train --solver=$TXT_DIR/finetune_solver_prototxt.txt -weights /home/reference_net/caffe_reference_imagenet_model



Gil

李組賢

unread,
Apr 6, 2015, 10:45:42 AM4/6/15
to caffe...@googlegroups.com
HI~
How you resized the cifar10 data when creating lmdb. What can I do for it, Thx~ 

Gil Levi

unread,
Apr 6, 2015, 11:18:24 AM4/6/15
to caffe...@googlegroups.com
I resized it to 256x256. 

Nikiforos Pittaras

unread,
Apr 6, 2015, 11:25:58 AM4/6/15
to caffe...@googlegroups.com
Googlenet feature extraction works ok with me, after finetuning on 38 concepts.
I renamed and changed the output sizes if the fc  and loss layers.

李組賢

unread,
Apr 7, 2015, 3:18:05 AM4/7/15
to caffe...@googlegroups.com
Thx for your response. But I mean what command should i add to convert 32X32 to 256X256  Thx~~ QAQ I am a new learner

Gil Levi於 2015年4月6日星期一 UTC+8下午11時18分24秒寫道:

Gil Levi

unread,
Apr 7, 2015, 9:01:18 AM4/7/15
to caffe...@googlegroups.com
Here's an example to the shell script that I'm using for creating the leveldb:

TOOLS=/home/ubuntu/repositories/caffe/build/tools
DATA=/
DEF_FILES=/mnt/CASIA/CasiaWebFace/train_val_txt_files
OUT=/mnt/CASIA/CasiaWebFace/leveldb

# Set RESIZE=true to resize the images to 256x256. Leave as false if images have
# already been resized using another tool.
RESIZE=true
if $RESIZE; then
  RESIZE_HEIGHT=256
  RESIZE_WIDTH=256
else
  RESIZE_HEIGHT=0
  RESIZE_WIDTH=0
fi

if [ ! -d "$DATA" ]; then
  echo "Error: DATA is not a path to a directory: $DATA"
  echo "Set the DATA variable in create_imagenet.sh to the path" \
       "where the ImageNet training data is stored."
  exit 1
fi



echo "Creating  train leveldb..."
GLOG_logtostderr=1 $TOOLS/convert_imageset.bin --resize_height=$RESIZE_HEIGHT --resize_width=$RESIZE_WIDTH --shuffle  $DATA $DEF_FILES/train.txt $OUT/train_leveldb

李組賢

unread,
Apr 7, 2015, 10:02:22 AM4/7/15
to caffe...@googlegroups.com
Can this do with .bin Data ? Datasets download from website is name .bin(binary)              THank you ~~~~~~~: ))))  

Gil Levi於 2015年4月7日星期二 UTC+8下午9時01分18秒寫道:

Gil Levi

unread,
Apr 7, 2015, 11:59:03 AM4/7/15
to caffe...@googlegroups.com
I'm not sure. 

李組賢

unread,
Apr 7, 2015, 11:35:12 PM4/7/15
to caffe...@googlegroups.com
Thank you. Can you share the URL where you downloaded that datasets?  : )

Nazil Perveen

unread,
Apr 27, 2016, 12:23:03 AM4/27/16
to Caffe Users
Hi Gil,

I am new to finetuning in caffe and after going through different documentation, I just want to ask you one question what Yangqing suggest, that how the image size will be if you resize it from 32 x 32 to 256 x 256. Is that the correct way to fintune the model on small image-size dataset ?.

Gil Levi

unread,
Apr 27, 2016, 9:24:32 AM4/27/16
to Caffe Users
Hi,

Yes, it actually worked. I got nice results (above 90%).

Best,
Gil 
Reply all
Reply to author
Forward
0 new messages