Batch Size impact on Batch Normalisation Layer

MKR

unread,

Nov 21, 2016, 3:50:57 PM11/21/16

to Caffe Users

Hello,

I want/have to train a network with a Batch Size of 1. Should I set the "use_global_stats" in my Batch Normalization Layers to true or false during training and testing?

If yes, do I have to add parameters like "lr","moving_average_fraction" or "eps"... ?

My guess is:
- Training: True
- Testing: True

But first experiments showed that:
- Training: False
- Testing: False
was much better.

Thanks for your help!

Message has been deleted

MKR

unread,

Nov 22, 2016, 3:42:21 AM11/22/16

to Caffe Users

This is my Network:

name: "ResNet-50"
layer {
name: "data"
type: "Input"
top: "data"
input_param {
    shape {
      dim: 1
      dim: 3
      dim: 224
      dim: 224
    }
}
include {
    phase: TRAIN
}
}

layer {
name: "label"
type: "Input"
top: "label"
input_param {
    shape {
      dim: 1
      dim: 1
      dim: 1
      dim: 1
    }
}
include {
      phase: TRAIN
}
}

layer {
name: "res_conv1"
type: "Convolution"
bottom: "data"
top: "res_conv1"
param { lr_mult: 0 decay_mult: 0 }
param { lr_mult: 0 decay_mult: 0 }
convolution_param {
    num_output: 1
    pad: 0
    kernel_size: 224
    stride: 1
    weight_filler {
      type: "constant"
      value: 1
    }
    bias_filler {
      type: "constant"
      value: 0
    }
}
}

layer {
name: "bn_conv1"
type: "BatchNorm"
bottom: "res_conv1"
top: "res_conv1"
include {
    phase: TRAIN
}
batch_norm_param {
    use_global_stats: false
}
}

layer {
name: "bn_conv1"
type: "BatchNorm"
bottom: "res_conv1"
top: "res_conv1"
include {
    phase: TEST
}
batch_norm_param {
    use_global_stats: false
}
}

layer {
name: "scale_conv1"
type: "Scale"
bottom: "res_conv1"
top: "res_conv1"
scale_param {
    filler {
      value: 1
    }
    bias_term: true
    bias_filler {
      value: 0
    }
}
param { lr_mult: 0 decay_mult: 0 }
param { lr_mult: 0 decay_mult: 0 }
}

layer {
name: "fc1000"
type: "InnerProduct"
bottom: "res_conv1"
top: "fc1000"
inner_product_param {
    num_output: 2
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 0
    }
}
}

layer {
name: "prob"
type: "Softmax"
bottom: "fc1000"
top: "prob"
include: {
    phase: TEST
}
}

layer {
name:"argmax"
type: "ArgMax"
bottom: "prob"
top: "argmax"
include: {
    phase: TEST
}
}

layer {
type: 'SoftmaxWithLoss'
name: 'loss'
top: 'loss'
bottom: 'fc1000'
bottom: 'label'
include: {
    phase: TRAIN
}
}

Input are ones with shape (224,224,3). I set the weights and bias to these values to analyze the problem.
ee what is happening (attached file). I cannot explain it ...

iter_BN.png

Reply all

Reply to author

Forward