Hi everyone.
I'm working on some project about image tagging and I'm new to caffe. I have a caffe code from one of the papers that i read about image tagging and the data set that is used for it, is CIFAR100.
According to code and layers structure, I have to feed image features in LMDB format and labels in HDF5 format into the code separately. I converted the features into the lmdb format and labels into the hdf5 format but yet i can't run the code. I think it should be some preprocessing on the labels before feed them into the network but i couldn't understand how! I wrote an email to paper's Authors but unfortunately they didn't respond yet.
as the paper author told me, the data set should be in a hierarchical structure so I'm trying to use CIFAR100 data set with two-level hierarchical structure: every image has to labels, a coarse label and a fine label.
I put the code here, so if anybody could run it with the CIFAR100 dataset, please teach me how.
this is proto.txt file:
name: "res_to_lstm"
layer {
name: "image_feature"
type: "Data"
top: "global_pool"
include { phase: TRAIN }
data_param {
source: "./examples/coarse-to-fine/Imagenet/ResNet/feature/global_pool_train"
batch_size: 100
backend: LMDB
}
}
layer {
name: "data"
type: "HDF5Data"
top: "cont_sentence"
top: "input_sentence"
top: "target_sentence"
include { phase: TRAIN }
hdf5_data_param {
source: "/home/destiny/Datasets/Imagenet/train_h5_caffe/hdf5_chunk_list_shuffle.txt"
batch_size: 10
}
}
layer {
name: "image_feature"
type: "Data"
top: "global_pool"
include { phase: TEST }
data_param {
source: "./examples/coarse-to-fine/Imagenet/ResNet/feature/global_pool_val"
batch_size: 100
backend: LMDB
}
}
layer {
name: "data"
type: "HDF5Data"
top: "cont_sentence"
top: "input_sentence"
top: "target_sentence"
include { phase: TEST }
hdf5_data_param {
source: "/home/destiny/Datasets/Imagenet/val_h5_caffe/hdf5_chunk_list.txt"
batch_size: 10
}
}
layer {
name: "embedding"
type: "Embed"
bottom: "input_sentence"
top: "embedded_input_sentence"
param {
lr_mult: 1
}
embed_param {
bias_term: false
input_dim: 1861
num_output: 1000
weight_filler {
type: "uniform"
min: -0.08
max: 0.08
}
}
}
layer {
name: "lstm1"
type: "LSTM"
bottom: "embedded_input_sentence"
bottom: "cont_sentence"
bottom: "global_pool"
top: "lstm1"
recurrent_param {
num_output: 1000
weight_filler {
type: "uniform"
min: -0.08
max: 0.08
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "predict"
type: "InnerProduct"
bottom: "lstm1"
top: "predict"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
inner_product_param {
num_output: 1861
weight_filler {
type: "uniform"
min: -0.08
max: 0.08
}
bias_filler {
type: "constant"
value: 0
}
axis: 2
}
}
layer {
name: "cross_entropy_loss"
type: "SoftmaxWithLoss"
bottom: "predict"
bottom: "target_sentence"
top: "cross_entropy_loss"
loss_weight: 10
loss_param {
ignore_label: -1
}
softmax_param {
axis: 2
}
}
layer {
name: "accuracy"
type: "Accuracy"
bottom: "predict"
bottom: "target_sentence"
top: "accuracy"
include { phase: TEST }
accuracy_param {
axis: 2
ignore_label: -1
}
}
--------------------------------------------------------
and this is the solver.prototxt file:
net: "./examples/coarse-to-fine/Imagenet/ResNet-BN/ResNet_train_lstm.prototxt"
test_iter: 500
test_interval: 1000
test_initialization: true
base_lr: 0.1
lr_policy: "step"
gamma: 0.5
stepsize: 60000
display: 200
max_iter: 260000
momentum: 0.9
weight_decay: 0.0000
snapshot: 10000
snapshot_prefix: "./examples/coarse-to-fine/Imagenet/ResNet/models/global_pool_lstm"
solver_mode: GPU
random_seed: 1701
average_loss: 100
clip_gradients: 10