Hi all,
First, let me admit I do not know nearly as much about deep learning as I should, as a student, I am still learning. If I say something that makes no sense, pointers would be greatly appreciated.
I am trying to use Caffe to build a CNN for facial keypoint extraction
(the Kaggle competition). I started with
this tutorial which, with a lot of reading, trying and cursing (i.e. learning), I almost got it working. Almost, because the network outputs the same data for any input. To figure out why, I started rewriting the code to use the pyCaffe interface. (Has always been the goal and should provide more information for troubleshooting).
The data consists of images (96x96 greyscale pixels) for which 15 keypoints need to be found.
So, the label data consists of 30 values representing the 15 keypoint coordinates.
Unfortunately, I cannot figure out how to set up the MemoryDataLayer properly to accept the label data. I keep on getting errors like the following out of my network.
euclidean_loss_layer.cpp:14] Check failed: bottom[0]->count(1) == bottom[1]->count(1) (30 vs. 1) Inputs must have the same dimension.
Obviously, I need to set the dimensions of the label data somewhere. But how? By now I am quite sure I cannot set this up in the layer parameters itself and am out of leads.
Some pointers would be very much appreciated.
N.B. I think my final code could be a nice addition to the Notebook Examples. I'd be more than willing to share it.
My full network setup (currently mostly the same as the tutorial):
name: "VER1"
layers {
name: "i_data"
top: "data"
top: "label"
type: MEMORY_DATA
memory_data_param {
batch_size: 64
channels: 1
height: 96
width: 96
}
}
layers {
name: "conv1"
type: CONVOLUTION
bottom: "data"
top: "conv1"
convolution_param {
num_output: 32
kernel_size: 11
stride: 2
bias_filler {
type: "constant"
value: 0.1
}
}
}
layers {
name: "relu2"
type: RELU
bottom: "conv1"
top: "conv1"
}
layers {
name: "pool1"
type: POOLING
bottom: "conv1"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layers {
name: "conv2"
type: CONVOLUTION
bottom: "pool1"
top: "conv2"
convolution_param {
num_output: 64
pad: 2
kernel_size: 7
group: 2
bias_filler {
type: "constant"
value: 0.1
}
}
}
layers {
name: "relu2"
type: RELU
bottom: "conv2"
top: "conv2"
}
layers {
name: "pool2"
type: POOLING
bottom: "conv2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layers {
name: "norm2"
type: LRN
bottom: "pool2"
top: "norm2"
lrn_param {
norm_region: WITHIN_CHANNEL
local_size: 3
alpha: 5e-05
beta: 0.75
}
}
layers {
name: "conv3"
type: CONVOLUTION
bottom: "norm2"
top: "conv3"
convolution_param {
num_output: 32
pad: 1
kernel_size: 5
bias_filler {
type: "constant"
value: 0.1
}
}
}
layers {
name: "relu3"
type: RELU
bottom: "conv3"
top: "conv3"
}
layers {
name: "conv4"
type: CONVOLUTION
bottom: "conv3"
top: "conv4"
convolution_param {
num_output: 64
pad: 1
kernel_size: 5
bias_filler {
type: "constant"
value: 0.1
}
}
}
layers {
name: "relu4"
type: RELU
bottom: "conv4"
top: "conv4"
}
layers {
name: "conv5"
type: CONVOLUTION
bottom: "conv4"
top: "conv5"
convolution_param {
num_output: 32
pad: 1
kernel_size: 5
bias_filler {
type: "constant"
value: 0.1
}
}
}
layers {
name: "relu5"
type: RELU
bottom: "conv5"
top: "conv5"
}
layers {
name: "pool5"
type: POOLING
bottom: "conv5"
top: "pool5"
pooling_param {
pool: MAX
kernel_size: 4
stride: 2
}
}
layers {
name: "drop0"
type: DROPOUT
bottom: "pool5"
top: "pool5"
dropout_param {
dropout_ratio: 0.5
}
}
layers {
name: "ip1"
type: INNER_PRODUCT
bottom: "pool5"
top: "ip1"
inner_product_param {
num_output: 100
bias_filler {
type: "constant"
value: 0.1
}
}
}
layers {
name: "relu4"
type: RELU
bottom: "ip1"
top: "ip1"
}
layers {
name: "drop1"
type: DROPOUT
bottom: "ip1"
top: "ip1"
dropout_param {
dropout_ratio: 0.5
}
}
layers {
name: "ip2"
type: INNER_PRODUCT
bottom: "ip1"
top: "ip2"
inner_product_param {
num_output: 30
bias_filler {
type: "constant"
value: 0.1
}
}
}
layers {
name: "relu22"
type: RELU
bottom: "ip2"
top: "ip2"
}
layers {
name: "loss"
type: EUCLIDEAN_LOSS
bottom: "ip2"
bottom: "label"
top: "loss"
}