Weight Filler gaussian: Define right std ??

602 views

Initilizationdeviationfillerinitstandardstdweight

Skip to first unread message

Julian Kolarz

unread,

Dec 17, 2016, 4:13:53 PM12/17/16

to Caffe Users

Hi all,
i am wondering how to define the right gaussian std for intial weights. So i have this situation:
I took a net prototxt from one paper and trained it with the trainings data they used in their paper. so everything went good and the loss converged. In the paper they explained that when they use the "xavier" weight filler, that their training does not converge. So to check if this is the case, i changed all the gaussian weight filler to xavier. And indeed the net then didnt converged. So now i am asking myself how they knew that they have to use 0.01, 0.2 and 0.05 as std for gaussian filler? (just for the case if it helps to answer my question: The training dataset that was used, was the USCD pedestrian dataset)

Because now i want to use this net for my own training data, which is similar to the training data they used. But the net does not converge, so i think that i have to change the initial weight filler. But i have no clue how should i change them. Is there some kind of heuristic i can use?

Here is the net:

name: "UCSD_CNN"
input: "data_s0"
input_dim: 1
input_dim: 1
input_dim: 72
input_dim: 72

layer {
  name: "conv1"
  type: "Convolution"
  bottom: "data_s0"
  top: "conv1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 32
    pad: 3
    kernel_size: 7
    stride: 1
    weight_filler {
      type: "gaussian"
      std: 0.01000
    }

    bias_filler {
      type: "constant"
    }
  }
}
layer {
  name: "relu1"
  type: "ReLU"
  bottom: "conv1"
  top: "conv1"
}
layer {
  name: "pool1"
  type: "Pooling"
  bottom: "conv1"
  top: "pool1"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}

layer {
  name: "conv2"
  type: "Convolution"
  bottom: "pool1"
  top: "conv2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 32
    pad: 3
    kernel_size: 7
    stride: 1
    weight_filler {
      type: "gaussian"
      std: 0.01000
    }
    bias_filler {
      type: "constant"
    }
  }
}
layer {
  name: "relu2"
  type: "ReLU"
  bottom: "conv2"
  top: "conv2"
}
layer {
  name: "pool2"
  type: "Pooling"
  bottom: "conv2"
  top: "pool2"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}

layer {
  name: "conv3"
  type: "Convolution"
  bottom: "pool2"
  top: "conv3"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 64
    kernel_size: 5
    stride: 1
    pad: 2
    weight_filler {
      type: "gaussian"
      std: 0.20000
    }
    bias_filler {
      type: "constant"
    }
  }
}
layer {
  name: "relu3"
  type: "ReLU"
  bottom: "conv3"
  top: "conv3"
}

layer {
  name: "conv4"
  type: "Convolution"
  bottom: "conv3"
  top: "conv4"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 1000
    kernel_size: 1
    stride: 1
    pad: 0
    weight_filler {
      type: "gaussian"
      std: 0.20000
    }
    bias_filler {
      type: "constant"
#      value: 1
    }
  }
}
layer {
  name: "relu4"
  type: "ReLU"
  bottom: "conv4"
  top: "conv4"
}

layer {
  name: "conv5"
  type: "Convolution"
  bottom: "conv4"
  top: "conv5"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 400
    kernel_size: 1
    stride: 1
    pad: 0
    weight_filler {
      type: "gaussian"
      std: 0.05000
    }
    bias_filler {
      type: "constant"
#      value: 1
    }
  }
}
layer {
  name: "relu5"
  type: "ReLU"
  bottom: "conv5"
  top: "conv5"
}

layer {
  name: "conv6"
  type: "Convolution"
  bottom: "conv5"
  top: "conv6"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 1
    kernel_size: 1
    stride: 1
    pad: 0
    weight_filler {
      type: "constant"
      value: 0
    }
    bias_filler {
      type: "constant"
#      value: 1
    }
  }
}

Reply all

Reply to author

Forward

0 new messages