Siamese with Sigmoid Cross Entropy Loss, conversely to Contrastive Divergence

Alessandro Ferrari

unread,

Feb 3, 2016, 10:46:27 AM2/3/16

to Caffe Users

Hi, I am attempting to build a siamese network, that similarly to "Siamese Neural Networks for One-Shot Image Recognition" by Gregory Koch, 2015 (http://www.cs.utoronto.ca/~gkoch/files/msc-thesis.pdf), Chapter 3. page 8, instead of using a Contrastive Divergence in the siamese, uses a sigmoid cross-entropy objective.

The distance function is computed as
p = sigmoid{ SUM_j[ a_j * abs( feat_j - featp_j ) ]}

that is in the cross-entropy loss with the label sim, 1 if the pair is genuine, 0 if it is an impostoir.

I have defined the following way:

name: "mnist_siamese_train_test"
layer {
name: "pair_data"
type: "Data"
top: "pair_data"
top: "sim"
include {
    phase: TRAIN
}
transform_param {
    scale: 0.00390625
}
data_param {
    source: "/home/imaging/new_caffe/examples/siamese/mnist_siamese_train_leveldb"
    batch_size: 64
}
}
layer {
name: "pair_data"
type: "Data"
top: "pair_data"
top: "sim"
include {
    phase: TEST
}
transform_param {
    scale: 0.00390625
}
data_param {
    source: "/home/imaging/new_caffe/examples/siamese/mnist_siamese_test_leveldb"
    batch_size: 100
}
}
layer {
name: "slice_pair"
type: "Slice"
bottom: "pair_data"
top: "data"
top: "data_p"
slice_param {
    slice_dim: 1
    slice_point: 1
}
}
layer {
name: "conv1"
type: "Convolution"
bottom: "data"
top: "conv1"
param {
    name: "conv1_w"
    lr_mult: 1
}
param {
    name: "conv1_b"
    lr_mult: 2
}
convolution_param {
    num_output: 20
    kernel_size: 5
    stride: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
}
}
layer {
name: "pool1"
type: "Pooling"
bottom: "conv1"
top: "pool1"
pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
}
}
layer {
name: "conv2"
type: "Convolution"
bottom: "pool1"
top: "conv2"
param {
    name: "conv2_w"
    lr_mult: 1
}
param {
    name: "conv2_b"
    lr_mult: 2
}
convolution_param {
    num_output: 50
    kernel_size: 5
    stride: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
}
}
layer {
name: "pool2"
type: "Pooling"
bottom: "conv2"
top: "pool2"
pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
}
}
layer {
name: "ip1"
type: "InnerProduct"
bottom: "pool2"
top: "ip1"
param {
    name: "ip1_w"
    lr_mult: 1
}
param {
    name: "ip1_b"
    lr_mult: 2
}
inner_product_param {
    num_output: 500
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
}
}
layer {
name: "relu1"
type: "ReLU"
bottom: "ip1"
top: "ip1"
}
layer {
name: "ip2"
type: "InnerProduct"
bottom: "ip1"
top: "ip2"
param {
    name: "ip2_w"
    lr_mult: 1
}
param {
    name: "ip2_b"
    lr_mult: 2
}
inner_product_param {
    num_output: 10
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
}
}
layer {
name: "feat"
type: "InnerProduct"
bottom: "ip2"
top: "feat"
param {
    name: "feat_w"
    lr_mult: 1
}
param {
    name: "feat_b"
    lr_mult: 2
}
inner_product_param {
    num_output: 2
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
}
}
layer {
name: "conv1_p"
type: "Convolution"
bottom: "data_p"
top: "conv1_p"
param {
    name: "conv1_w"
    lr_mult: 1
}
param {
    name: "conv1_b"
    lr_mult: 2
}
convolution_param {
    num_output: 20
    kernel_size: 5
    stride: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
}
}
layer {
name: "pool1_p"
type: "Pooling"
bottom: "conv1_p"
top: "pool1_p"
pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
}
}
layer {
name: "conv2_p"
type: "Convolution"
bottom: "pool1_p"
top: "conv2_p"
param {
    name: "conv2_w"
    lr_mult: 1
}
param {
    name: "conv2_b"
    lr_mult: 2
}
convolution_param {
    num_output: 50
    kernel_size: 5
    stride: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
}
}
layer {
name: "pool2_p"
type: "Pooling"
bottom: "conv2_p"
top: "pool2_p"
pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
}
}
layer {
name: "ip1_p"
type: "InnerProduct"
bottom: "pool2_p"
top: "ip1_p"
param {
    name: "ip1_w"
    lr_mult: 1
}
param {
    name: "ip1_b"
    lr_mult: 2
}
inner_product_param {
    num_output: 500
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
}
}
layer {
name: "relu1_p"
type: "ReLU"
bottom: "ip1_p"
top: "ip1_p"
}
layer {
name: "ip2_p"
type: "InnerProduct"
bottom: "ip1_p"
top: "ip2_p"
param {
    name: "ip2_w"
    lr_mult: 1
}
param {
    name: "ip2_b"
    lr_mult: 2
}
inner_product_param {
    num_output: 10
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
}
}
layer {
name: "feat_p"
type: "InnerProduct"
bottom: "ip2_p"
top: "feat_p"
param {
    name: "feat_w"
    lr_mult: 1
}
param {
    name: "feat_b"
    lr_mult: 2
}
inner_product_param {
    num_output: 2
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
}
}
layer {
name: "diff"
type: "Eltwise"
bottom: "feat"
bottom: "feat_p"
top: "diff"
eltwise_param {
    operation: SUM
    coeff: 1
    coeff: -1
}
}
layer {
name: "absdiff"
bottom: "diff"
top: "absdiff"
type: "AbsVal"
}
layer {
name: "dist"
type: "InnerProduct"
bottom: "absdiff"
top: "dist"
param {
    name: "distfeat_w"
    lr_mult: 1
}
param {
    name: "distfeat_b"
    lr_mult: 0
}
inner_product_param {
    num_output: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
}
}
layer {
name: "loss"
type: "SigmoidCrossEntropyLoss"
bottom: "sim"
bottom: "dist"
top: "loss"
}

However, I get this error:

sigmoid_cross_entropy_loss_layer.cu:13] SigmoidCrossEntropyLoss Layer cannot backpropagate to label inputs.

Anybody has any hints on how to fix it?

Thank you

mehdi Noroozi

unread,

Jun 2, 2016, 2:14:18 PM6/2/16

to Caffe Users

Hi,

I have the same problem. Would be grateful if you could let me know in the case that you have found the solution.

Message has been deleted

zhiqiang Tao

unread,

Jul 7, 2016, 11:42:16 AM7/7/16

to Caffe Users

Hi Alessandro,

I also have this problem, and it seems shutting down the "propagate_down" of label bottom works for me.

See https://groups.google.com/forum/#!searchin/caffe-users/backpropagate$20to$20label$20data/caffe-users/0_8t7ZI83tI/discussion