Hi, I am attempting to build a siamese network, that similarly to "Siamese Neural Networks for One-Shot Image Recognition" by Gregory Koch, 2015 (
http://www.cs.utoronto.ca/~gkoch/files/msc-thesis.pdf), Chapter 3. page 8, instead of using a Contrastive Divergence in the siamese, uses a sigmoid cross-entropy objective.
The distance function is computed as
p = sigmoid{ SUM_j[ a_j * abs( feat_j - featp_j ) ]}
that is in the cross-entropy loss with the label sim, 1 if the pair is genuine, 0 if it is an impostoir.
I have defined the following way:
name: "mnist_siamese_train_test"
layer {
name: "pair_data"
type: "Data"
top: "pair_data"
top: "sim"
include {
phase: TRAIN
}
transform_param {
scale: 0.00390625
}
data_param {
source: "/home/imaging/new_caffe/examples/siamese/mnist_siamese_train_leveldb"
batch_size: 64
}
}
layer {
name: "pair_data"
type: "Data"
top: "pair_data"
top: "sim"
include {
phase: TEST
}
transform_param {
scale: 0.00390625
}
data_param {
source: "/home/imaging/new_caffe/examples/siamese/mnist_siamese_test_leveldb"
batch_size: 100
}
}
layer {
name: "slice_pair"
type: "Slice"
bottom: "pair_data"
top: "data"
top: "data_p"
slice_param {
slice_dim: 1
slice_point: 1
}
}
layer {
name: "conv1"
type: "Convolution"
bottom: "data"
top: "conv1"
param {
name: "conv1_w"
lr_mult: 1
}
param {
name: "conv1_b"
lr_mult: 2
}
convolution_param {
num_output: 20
kernel_size: 5
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "pool1"
type: "Pooling"
bottom: "conv1"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv2"
type: "Convolution"
bottom: "pool1"
top: "conv2"
param {
name: "conv2_w"
lr_mult: 1
}
param {
name: "conv2_b"
lr_mult: 2
}
convolution_param {
num_output: 50
kernel_size: 5
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "pool2"
type: "Pooling"
bottom: "conv2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "ip1"
type: "InnerProduct"
bottom: "pool2"
top: "ip1"
param {
name: "ip1_w"
lr_mult: 1
}
param {
name: "ip1_b"
lr_mult: 2
}
inner_product_param {
num_output: 500
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu1"
type: "ReLU"
bottom: "ip1"
top: "ip1"
}
layer {
name: "ip2"
type: "InnerProduct"
bottom: "ip1"
top: "ip2"
param {
name: "ip2_w"
lr_mult: 1
}
param {
name: "ip2_b"
lr_mult: 2
}
inner_product_param {
num_output: 10
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "feat"
type: "InnerProduct"
bottom: "ip2"
top: "feat"
param {
name: "feat_w"
lr_mult: 1
}
param {
name: "feat_b"
lr_mult: 2
}
inner_product_param {
num_output: 2
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "conv1_p"
type: "Convolution"
bottom: "data_p"
top: "conv1_p"
param {
name: "conv1_w"
lr_mult: 1
}
param {
name: "conv1_b"
lr_mult: 2
}
convolution_param {
num_output: 20
kernel_size: 5
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "pool1_p"
type: "Pooling"
bottom: "conv1_p"
top: "pool1_p"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv2_p"
type: "Convolution"
bottom: "pool1_p"
top: "conv2_p"
param {
name: "conv2_w"
lr_mult: 1
}
param {
name: "conv2_b"
lr_mult: 2
}
convolution_param {
num_output: 50
kernel_size: 5
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "pool2_p"
type: "Pooling"
bottom: "conv2_p"
top: "pool2_p"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "ip1_p"
type: "InnerProduct"
bottom: "pool2_p"
top: "ip1_p"
param {
name: "ip1_w"
lr_mult: 1
}
param {
name: "ip1_b"
lr_mult: 2
}
inner_product_param {
num_output: 500
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu1_p"
type: "ReLU"
bottom: "ip1_p"
top: "ip1_p"
}
layer {
name: "ip2_p"
type: "InnerProduct"
bottom: "ip1_p"
top: "ip2_p"
param {
name: "ip2_w"
lr_mult: 1
}
param {
name: "ip2_b"
lr_mult: 2
}
inner_product_param {
num_output: 10
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "feat_p"
type: "InnerProduct"
bottom: "ip2_p"
top: "feat_p"
param {
name: "feat_w"
lr_mult: 1
}
param {
name: "feat_b"
lr_mult: 2
}
inner_product_param {
num_output: 2
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "diff"
type: "Eltwise"
bottom: "feat"
bottom: "feat_p"
top: "diff"
eltwise_param {
operation: SUM
coeff: 1
coeff: -1
}
}
layer {
name: "absdiff"
bottom: "diff"
top: "absdiff"
type: "AbsVal"
}
layer {
name: "dist"
type: "InnerProduct"
bottom: "absdiff"
top: "dist"
param {
name: "distfeat_w"
lr_mult: 1
}
param {
name: "distfeat_b"
lr_mult: 0
}
inner_product_param {
num_output: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "loss"
type: "SigmoidCrossEntropyLoss"
bottom: "sim"
bottom: "dist"
top: "loss"
}
However, I get this error:
sigmoid_cross_entropy_loss_layer.cu:13] SigmoidCrossEntropyLoss Layer cannot backpropagate to label inputs.
Anybody has any hints on how to fix it?
Thank you