loss does not decrease at 8.2 during training

41 views
Skip to first unread message

kai

unread,
Oct 30, 2017, 11:10:53 PM10/30/17
to Caffe Users
I was training a classifier for printed chinese character. My dataset include 3755 categories. for each category I have 512 images. I was trying to train my data using alexnet and googlenet. But the wired thing is when I training my model, the loss always stuck at 8.2 in both network. I have try to set the lr_rate smaller to 0.0000001 and the loss would start as around 15 and begin to decrease. however, after the loss close to 8 it stuck at there again...I cannot figure out where is the problem. I test and train the network with other dataset and found the loss do decrease to a small number. I do not know if is because of my training data. All my data is handmade by image augmentation( rotate , blur , add noise,  change color etc) from one original image. below is my net work and training log and some sample of my data.


name: "GoogleNet"
layer {
  name: "data"
  type: "Data"
  top: "data"
  top: "label"
  include {
    phase: TRAIN
  }
  transform_param {
    mirror: false
    mean_value: 104
    mean_value: 117
    mean_value: 123
  }
  data_param {
    source: "new_train_lmdb"
    batch_size: 10
    backend: LMDB
  }
}
layer {
  name: "data"
  type: "Data"
  top: "data"
  top: "label"
  include {
    phase: TEST
  }
  transform_param {
    mirror: false
    mean_value: 104
    mean_value: 117
    mean_value: 123
  }
  data_param {
    source: "new_val_lmdb"
    batch_size: 20
    backend: LMDB
  }
}

layer {
  name: "conv1"
  type: "Convolution"
  bottom: "data"
  top: "conv1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }

  convolution_param {
    num_output: 64
    kernel_size: 7
    stride: 2
    pad: 0
    weight_filler {
      type: "xavier"
      #std: 0.015
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "relu1"
  type: "ReLU"
  bottom: "conv1"
  top: "conv1"
}
layer {
  name: "pool1"
  type: "Pooling"
  bottom: "conv1"
  top: "pool1"
  pooling_param {
    pool: MAX
    kernel_size: 3
    stride: 2
    pad: 0
  }
}
layer {
  name: "norm1"
  type: "LRN"
  bottom: "pool1"
  top: "norm1"
  lrn_param {
    local_size: 5
    alpha: 0.0001
    beta: 0.75
  }
}
layer {
  name: "reduction2"
  type: "Convolution"
  bottom: "norm1"
  top: "reduction2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 64
    pad: 0
    kernel_size: 1
    group: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "relu_reduction2"
  type: "ReLU"
  bottom: "reduction2"
  top: "reduction2"
}
layer {
  name: "conv2"
  type: "Convolution"
  bottom: "reduction2"
  top: "conv2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 192
    pad: 1
    kernel_size: 3
    group: 1
    weight_filler {
      type: "xavier"
      #std: 0.02
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "relu2"
  type: "ReLU"
  bottom: "conv2"
  top: "conv2"
}
layer {
  name: "norm2"
  type: "LRN"
  bottom: "conv2"
  top: "norm2"
  lrn_param {
    local_size: 5
    alpha: 0.0001
    beta: 0.75
  }
}
layer {
  name: "pool2"
  type: "Pooling"
  bottom: "norm2"
  top: "pool2"
  pooling_param {
    pool: MAX
    kernel_size: 3
    stride: 2
    pad: 0
  }
}
# Inception module 1 ***************
layer {
  name: "icp1_reduction1"
  type: "Convolution"
  bottom: "pool2"
  top: "icp1_reduction1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 96
    pad: 0
    kernel_size: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "relu_icp1_reduction1"
  type: "ReLU"
  bottom: "icp1_reduction1"
  top: "icp1_reduction1"
}
layer {
  name: "icp1_reduction2"
  type: "Convolution"
  bottom: "pool2"
  top: "icp1_reduction2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 16
    pad: 0
    kernel_size: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "relu_icp1_reduction2"
  type: "ReLU"
  bottom: "icp1_reduction2"
  top: "icp1_reduction2"
}
layer {
  name: "icp1_pool"
  type: "Pooling"
  bottom: "pool2"
  top: "icp1_pool"
  pooling_param {
    pool: MAX
    kernel_size: 3
    stride: 1
    pad: 1
  }
}
# ***********
layer {
  name: "icp1_out0"
  type: "Convolution"
  bottom: "pool2"
  top: "icp1_out0"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 64
    pad: 0
    kernel_size: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "relu_icp1_out0"
  type: "ReLU"
  bottom: "icp1_out0"
  top: "icp1_out0"
}
layer {
  name: "icp1_out1"
  type: "Convolution"
  bottom: "icp1_reduction1"
  top: "icp1_out1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 128
    pad: 1
    kernel_size: 3
    weight_filler {
      type: "xavier"
      #std: 0.04
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "relu_icp1_out1"
  type: "ReLU"
  bottom: "icp1_out1"
  top: "icp1_out1"
}
layer {
  name: "icp1_out2"
  type: "Convolution"
  bottom: "icp1_reduction2"
  top: "icp1_out2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 32
    pad: 2
    kernel_size: 5
    weight_filler {
      type: "xavier"
      #std: 0.08
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "relu_icp1_out2"
  type: "ReLU"
  bottom: "icp1_out2"
  top: "icp1_out2"
}
layer {
  name: "icp1_out3"
  type: "Convolution"
  bottom: "icp1_pool"
  top: "icp1_out3"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 32
    pad: 0
    kernel_size: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "relu_icp1_out3"
  type: "ReLU"
  bottom: "icp1_out3"
  top: "icp1_out3"
}
# Concat them together
layer {
  name: "icp2_in"
  type: "Concat"
  bottom: "icp1_out0"
  bottom: "icp1_out1"
  bottom: "icp1_out2"
  bottom: "icp1_out3"
  top: "icp2_in"
}
# Inception module 2 ***************
layer {
  name: "icp2_reduction1"
  type: "Convolution"
  bottom: "icp2_in"
  top: "icp2_reduction1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 128
    pad: 0
    kernel_size: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "relu_icp2_reduction1"
  type: "ReLU"
  bottom: "icp2_reduction1"
  top: "icp2_reduction1"
}
layer {
  name: "icp2_reduction2"
  type: "Convolution"
  bottom: "icp2_in"
  top: "icp2_reduction2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 32
    pad: 0
    kernel_size: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "relu_icp2_reduction2"
  type: "ReLU"
  bottom: "icp2_reduction2"
  top: "icp2_reduction2"
}
layer {
  name: "icp2_pool"
  type: "Pooling"
  bottom: "icp2_in"
  top: "icp2_pool"
  pooling_param {
    pool: MAX
    kernel_size: 3
    stride: 1
    pad: 1
  }
}
# ***********
layer {
  name: "icp2_out0"
  type: "Convolution"
  bottom: "icp2_in"
  top: "icp2_out0"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 128
    pad: 0
    kernel_size: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "relu_icp2_out0"
  type: "ReLU"
  bottom: "icp2_out0"
  top: "icp2_out0"
}
layer {
  name: "icp2_out1"
  type: "Convolution"
  bottom: "icp2_reduction1"
  top: "icp2_out1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 192
    pad: 1
    kernel_size: 3
    weight_filler {
      type: "xavier"
      #std: 0.04
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "relu_icp2_out1"
  type: "ReLU"
  bottom: "icp2_out1"
  top: "icp2_out1"
}
layer {
  name: "icp2_out2"
  type: "Convolution"
  bottom: "icp2_reduction2"
  top: "icp2_out2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 96
    pad: 2
    kernel_size: 5
    weight_filler {
      type: "xavier"
      #std: 0.08
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "relu_icp2_out2"
  type: "ReLU"
  bottom: "icp2_out2"
  top: "icp2_out2"
}
layer {
  name: "icp2_out3"
  type: "Convolution"
  bottom: "icp2_pool"
  top: "icp2_out3"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 64
    pad: 0
    kernel_size: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "relu_icp2_out3"
  type: "ReLU"
  bottom: "icp2_out3"
  top: "icp2_out3"
}
# Concat them together
layer {
  name: "icp2_out"
  type: "Concat"
  bottom: "icp2_out0"
  bottom: "icp2_out1"
  bottom: "icp2_out2"
  bottom: "icp2_out3"
  top: "icp2_out"
}
layer {
  name: "icp3_in"
  type: "Pooling"
  bottom: "icp2_out"
  top: "icp3_in"
  pooling_param {
    pool: MAX
    kernel_size: 3
    stride: 2
    pad: 0
  }
}
# Inception module 3 ***************
layer {
  name: "icp3_reduction1"
  type: "Convolution"
  bottom: "icp3_in"
  top: "icp3_reduction1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 112
    pad: 0
    kernel_size: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "relu_icp3_reduction1"
  type: "ReLU"
  bottom: "icp3_reduction1"
  top: "icp3_reduction1"
}
layer {
  name: "icp3_reduction2"
  type: "Convolution"
  bottom: "icp3_in"
  top: "icp3_reduction2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 24
    pad: 0
    kernel_size: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "relu_icp3_reduction2"
  type: "ReLU"
  bottom: "icp3_reduction2"
  top: "icp3_reduction2"
}
layer {
  name: "icp3_pool"
  type: "Pooling"
  bottom: "icp3_in"
  top: "icp3_pool"
  pooling_param {
    pool: MAX
    kernel_size: 3
    stride: 1
    pad: 1
  }
}
# ***********
layer {
  name: "icp3_out0"
  type: "Convolution"
  bottom: "icp3_in"
  top: "icp3_out0"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 160
    pad: 0
    kernel_size: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "relu_icp3_out0"
  type: "ReLU"
  bottom: "icp3_out0"
  top: "icp3_out0"
}
layer {
  name: "icp3_out1"
  type: "Convolution"
  bottom: "icp3_reduction1"
  top: "icp3_out1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 224
    pad: 1
    kernel_size: 3
    weight_filler {
      type: "xavier"
      #std: 0.04
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "relu_icp3_out1"
  type: "ReLU"
  bottom: "icp3_out1"
  top: "icp3_out1"
}
layer {
  name: "icp3_out2"
  type: "Convolution"
  bottom: "icp3_reduction2"
  top: "icp3_out2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 64
    pad: 2
    kernel_size: 5
    weight_filler {
      type: "xavier"
      #std: 0.08
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "relu_icp3_out2"
  type: "ReLU"
  bottom: "icp3_out2"
  top: "icp3_out2"
}
layer {
  name: "icp3_out3"
  type: "Convolution"
  bottom: "icp3_pool"
  top: "icp3_out3"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 64
    pad: 0
    kernel_size: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "relu_icp3_out3"
  type: "ReLU"
  bottom: "icp3_out3"
  top: "icp3_out3"
}
# Concat them together
layer {
  name: "icp3_out"
  type: "Concat"
  bottom: "icp3_out0"
  bottom: "icp3_out1"
  bottom: "icp3_out2"
  bottom: "icp3_out3"
  top: "icp3_out"
}
# Inception module 4 ***************
layer {
  name: "icp4_reduction1"
  type: "Convolution"
  bottom: "icp3_out"
  top: "icp4_reduction1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 160
    pad: 0
    kernel_size: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "relu_icp4_reduction1"
  type: "ReLU"
  bottom: "icp4_reduction1"
  top: "icp4_reduction1"
}
layer {
  name: "icp4_reduction2"
  type: "Convolution"
  bottom: "icp3_out"
  top: "icp4_reduction2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 32
    pad: 0
    kernel_size: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "relu_icp4_reduction2"
  type: "ReLU"
  bottom: "icp4_reduction2"
  top: "icp4_reduction2"
}
layer {
  name: "icp4_pool"
  type: "Pooling"
  bottom: "icp3_out"
  top: "icp4_pool"
  pooling_param {
    pool: MAX
    kernel_size: 3
    stride: 1
    pad: 1
  }
}
# ***********
layer {
  name: "icp4_out0"
  type: "Convolution"
  bottom: "icp3_out"
  top: "icp4_out0"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 256
    pad: 0
    kernel_size: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "relu_icp4_out0"
  type: "ReLU"
  bottom: "icp4_out0"
  top: "icp4_out0"
}
layer {
  name: "icp4_out1"
  type: "Convolution"
  bottom: "icp4_reduction1"
  top: "icp4_out1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 320
    pad: 1
    kernel_size: 3
    weight_filler {
      type: "xavier"
      #std: 0.04
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "relu_icp4_out1"
  type: "ReLU"
  bottom: "icp4_out1"
  top: "icp4_out1"
}
layer {
  name: "icp4_out2"
  type: "Convolution"
  bottom: "icp4_reduction2"
  top: "icp4_out2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 128
    pad: 2
    kernel_size: 5
    weight_filler {
      type: "xavier"
      #std: 0.08
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "relu_icp4_out2"
  type: "ReLU"
  bottom: "icp4_out2"
  top: "icp4_out2"
}
layer {
  name: "icp4_out3"
  type: "Convolution"
  bottom: "icp4_pool"
  top: "icp4_out3"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 128
    pad: 0
    kernel_size: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "relu_icp4_out3"
  type: "ReLU"
  bottom: "icp4_out3"
  top: "icp4_out3"
}
# Concat them together
layer {
  name: "icp4_out"
  type: "Concat"
  bottom: "icp4_out0"
  bottom: "icp4_out1"
  bottom: "icp4_out2"
  bottom: "icp4_out3"
  top: "icp4_out"
}
# classification branch
layer {
  name: "cls3_pool"
  type: "Pooling"
  bottom: "icp4_out"
  top: "cls3_pool"
  pooling_param {
    pool: AVE
    kernel_size: 5
    stride: 3
    pad: 0
    # This padding is somewhat special
  }
}
layer {
  name: "cls3_reduction"
  type: "Convolution"
  bottom: "cls3_pool"
  top: "cls3_reduction"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 128
    pad: 0
    kernel_size: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "relu_cls3_reduction"
  type: "ReLU"
  bottom: "cls3_reduction"
  top: "cls3_reduction"
}
layer {
  name: "cls3_fc1"
  type: "InnerProduct"
  bottom: "cls3_reduction"
  top: "cls3_fc1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  inner_product_param {
    num_output: 1024
    weight_filler {
      type: "xavier"
      #std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "relu_cls3_fc1"
  type: "ReLU"
  bottom: "cls3_fc1"
  top: "cls3_fc1"
}
layer {
  name: "cls3_drop"
  type: "Dropout"
  bottom: "cls3_fc1"
  top: "cls3_fc1"
  dropout_param {
    dropout_ratio: 0.4
  }
}
layer {
  name: "cls3_fc2"
  type: "InnerProduct"
  bottom: "cls3_fc1"
  top: "cls3_fc2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  inner_product_param {
    num_output: 3755
    weight_filler {
      type: "xavier"
      #std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}

layer {
  name: "loss"
  type: "SoftmaxWithLoss"
  bottom: "cls3_fc2"
  bottom: "label"
  top: "loss"
  loss_weight: 1
}
layer {
  name: "top-1"
  type: "Accuracy"
  bottom: "cls3_fc2"
  bottom: "label"
  top: "top-1"
  include {
    phase: TEST
  }
}
layer {
  name: "top-5"
  type: "Accuracy"
  bottom: "cls3_fc2"
  bottom: "label"
  top: "top-5"
  include {
    phase: TEST
  }
  accuracy_param {
    top_k: 5
  }
}
Reply all
Reply to author
Forward
0 new messages