Accuracy stays at 0.5 when using AlexNet for binary classification dataset

191 views
Skip to first unread message

Taylor

unread,
Jun 29, 2016, 1:08:00 AM6/29/16
to Caffe Users
My dataset contains images with 800*600 pixels. They were resized to 256*256 when converting to LMDB. I have 10 images in training set and 10 images in validation set. And each image has an label of 1 or 0.

After running AlexNet on the dataset with 1000 iterations, accuracy was 0.5 and the loss was 8.60332.
I think there is something wrong with the configuration since it's a binary classification problem. It is so wired that the accuracy was 0.5.
Any help is appreciated. Thanks in advance.

Taylor

unread,
Jun 29, 2016, 1:09:26 AM6/29/16
to Caffe Users
name: "AlexNet"
layer {
  name: "data"
  type: "Data"
  top: "data"
  top: "label"
  include {
    phase: TRAIN
  }
  transform_param {
    mirror: true
    crop_size: 227
    mean_file: "testWithAlexNet/alexnet_mean.binaryproto"
  }
  data_param {
    source: "testWithAlexNet/test_train_lmdb"
    batch_size: 256
    backend: LMDB
  }
}
layer {
  name: "data"
  type: "Data"
  top: "data"
  top: "label"
  include {
    phase: TEST
  }
  transform_param {
    mirror: false
    crop_size: 227
    mean_file: "testWithAlexNet/alexnet_val_mean.binaryproto"
  }
  data_param {
    source: "testWithAlexNet/test_val_lmdb"
    batch_size: 50
    backend: LMDB
  }
}
layer {
  name: "conv1"
  type: "Convolution"
  bottom: "data"
  top: "conv1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 96
    kernel_size: 11
    stride: 4
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "relu1"
  type: "ReLU"
  bottom: "conv1"
  top: "conv1"
}
layer {
  name: "norm1"
  type: "LRN"
  bottom: "conv1"
  top: "norm1"
  lrn_param {
    local_size: 5
    alpha: 0.0001
    beta: 0.75
  }
}
layer {
  name: "pool1"
  type: "Pooling"
  bottom: "norm1"
  top: "pool1"
  pooling_param {
    pool: MAX
    kernel_size: 3
    stride: 2
  }
}
layer {
  name: "conv2"
  type: "Convolution"
  bottom: "pool1"
  top: "conv2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 256
    pad: 2
    kernel_size: 5
    group: 2
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 0.1
    }
  }
}
layer {
  name: "relu2"
  type: "ReLU"
  bottom: "conv2"
  top: "conv2"
}
layer {
  name: "norm2"
  type: "LRN"
  bottom: "conv2"
  top: "norm2"
  lrn_param {
    local_size: 5
    alpha: 0.0001
    beta: 0.75
  }
}
layer {
  name: "pool2"
  type: "Pooling"
  bottom: "norm2"
  top: "pool2"
  pooling_param {
    pool: MAX
    kernel_size: 3
    stride: 2
  }
}
layer {
  name: "conv3"
  type: "Convolution"
  bottom: "pool2"
  top: "conv3"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 384
    pad: 1
    kernel_size: 3
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "relu3"
  type: "ReLU"
  bottom: "conv3"
  top: "conv3"
}
layer {
  name: "conv4"
  type: "Convolution"
  bottom: "conv3"
  top: "conv4"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 384
    pad: 1
    kernel_size: 3
    group: 2
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 0.1
    }
  }
}
layer {
  name: "relu4"
  type: "ReLU"
  bottom: "conv4"
  top: "conv4"
}
layer {
  name: "conv5"
  type: "Convolution"
  bottom: "conv4"
  top: "conv5"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 256
    pad: 1
    kernel_size: 3
    group: 2
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 0.1
    }
  }
}
layer {
  name: "relu5"
  type: "ReLU"
  bottom: "conv5"
  top: "conv5"
}
layer {
  name: "pool5"
  type: "Pooling"
  bottom: "conv5"
  top: "pool5"
  pooling_param {
    pool: MAX
    kernel_size: 3
    stride: 2
  }
}
layer {
  name: "fc6"
  type: "InnerProduct"
  bottom: "pool5"
  top: "fc6"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  inner_product_param {
    num_output: 4096
    weight_filler {
      type: "gaussian"
      std: 0.005
    }
    bias_filler {
      type: "constant"
      value: 0.1
    }
  }
}
layer {
  name: "relu6"
  type: "ReLU"
  bottom: "fc6"
  top: "fc6"
}
layer {
  name: "drop6"
  type: "Dropout"
  bottom: "fc6"
  top: "fc6"
  dropout_param {
    dropout_ratio: 0.5
  }
}
layer {
  name: "fc7"
  type: "InnerProduct"
  bottom: "fc6"
  top: "fc7"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  inner_product_param {
    num_output: 4096
    weight_filler {
      type: "gaussian"
      std: 0.005
    }
    bias_filler {
      type: "constant"
      value: 0.1
    }
  }
}
layer {
  name: "relu7"
  type: "ReLU"
  bottom: "fc7"
  top: "fc7"
}
layer {
  name: "drop7"
  type: "Dropout"
  bottom: "fc7"
  top: "fc7"
  dropout_param {
    dropout_ratio: 0.5
  }
}
layer {
  name: "fc8-test"
  type: "InnerProduct"
  bottom: "fc7"
  top: "fc8"
  param {
    lr_mult: 0.2
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  inner_product_param {
    num_output: 2
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "accuracy"
  type: "Accuracy"
  bottom: "fc8"
  bottom: "label"
  top: "accuracy"
  include {
    phase: TEST
  }
}
layer {
  name: "loss"
  type: "SoftmaxWithLoss"
  bottom: "fc8"
  bottom: "label"
  top: "loss"
}

Vijay Kumar

unread,
Jun 29, 2016, 1:41:18 AM6/29/16
to Caffe Users
your last 3 layers are incorrect. It should be as below.

layer {
  name: "fc8-test"
  type: "InnerProduct"
  bottom: "fc7"
  top: "fc8-test"
  param {
    lr_mult: 0.2
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  inner_product_param {
    num_output: 2
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "accuracy"
  type: "Accuracy"
  bottom: "fc8-test"
  bottom: "label"
  top: "accuracy"
  include {
    phase: TEST
  }
}
layer {
  name: "loss"
  type: "SoftmaxWithLoss"
  bottom: "fc8-test"
  bottom: "label"
  top: "loss"

Taylor

unread,
Jun 29, 2016, 8:56:57 AM6/29/16
to Caffe Users
Thanks for the reply. I fixed the error but I still got 0.5 accuracy. Below is a part of the iterations results.

I0629 01:46:06.479586 2090364928 net.cpp:219] data does not need backward computation.

I0629 01:46:06.479591 2090364928 net.cpp:261] This network produces output accuracy

I0629 01:46:06.479598 2090364928 net.cpp:261] This network produces output loss

I0629 01:46:06.479719 2090364928 net.cpp:274] Network initialization done.

I0629 01:46:06.479893 2090364928 solver.cpp:60] Solver scaffolding done.

I0629 01:46:06.479986 2090364928 caffe.cpp:219] Starting Optimization

I0629 01:46:06.480036 2090364928 solver.cpp:279] Solving AlexNet

I0629 01:46:06.480064 2090364928 solver.cpp:280] Learning Rate Policy: step

I0629 01:46:06.611563 2090364928 solver.cpp:337] Iteration 0, Testing net (#0)

I0629 01:47:47.621381 2090364928 solver.cpp:404]     Test net output #0: accuracy = 0.5

I0629 01:47:47.621454 2090364928 solver.cpp:404]     Test net output #1: loss = 0.696434 (* 1 = 0.696434 loss)

I0629 01:48:00.851289 2090364928 solver.cpp:228] Iteration 0, loss = 0.701379

I0629 01:48:00.851326 2090364928 solver.cpp:244]     Train net output #0: loss = 0.701379 (* 1 = 0.701379 loss)

I0629 01:48:00.851351 2090364928 sgd_solver.cpp:106] Iteration 0, lr = 0.001

I0629 02:22:59.889930 2090364928 solver.cpp:337] Iteration 100, Testing net (#0)

I0629 02:24:38.849248 2090364928 solver.cpp:404]     Test net output #0: accuracy = 0.599999

I0629 02:24:38.851076 2090364928 solver.cpp:404]     Test net output #1: loss = 0.71359 (* 1 = 0.71359 loss)

I0629 02:24:50.243051 2090364928 solver.cpp:228] Iteration 100, loss = 0.23183

I0629 02:24:50.243088 2090364928 solver.cpp:244]     Train net output #0: loss = 0.23183 (* 1 = 0.23183 loss)

I0629 02:24:50.243098 2090364928 sgd_solver.cpp:106] Iteration 100, lr = 0.001

I0629 07:32:12.592731 2090364928 solver.cpp:337] Iteration 200, Testing net (#0)

I0629 07:33:57.453852 2090364928 solver.cpp:404]     Test net output #0: accuracy = 0.5

I0629 07:33:57.454047 2090364928 solver.cpp:404]     Test net output #1: loss = 2.90009 (* 1 = 2.90009 loss)

I0629 07:34:09.543332 2090364928 solver.cpp:228] Iteration 200, loss = 0.00113564

I0629 07:34:09.543375 2090364928 solver.cpp:244]     Train net output #0: loss = 0.00113564 (* 1 = 0.00113564 loss)

I0629 07:34:09.543386 2090364928 sgd_solver.cpp:106] Iteration 200, lr = 0.001

I0629 08:49:23.774435 2090364928 solver.cpp:337] Iteration 300, Testing net (#0)

I0629 08:50:56.419193 2090364928 solver.cpp:404]     Test net output #0: accuracy = 0.5

I0629 08:50:56.419250 2090364928 solver.cpp:404]     Test net output #1: loss = 3.36429 (* 1 = 3.36429 loss)

I0629 08:51:06.988178 2090364928 solver.cpp:228] Iteration 300, loss = 0.000547573

I0629 08:51:06.988265 2090364928 solver.cpp:244]     Train net output #0: loss = 0.000547573 (* 1 = 0.000547573 loss)

Vijay Kumar

unread,
Jun 30, 2016, 2:05:28 AM6/30/16
to Caffe Users
You should use the same mean file that you used for training, in the testing as well. 

 mean_file: "testWithAlexNet/alexnet_mean.binaryproto" 

this should be the same both in train and test phase.

Taylor

unread,
Jul 1, 2016, 11:53:51 AM7/1/16
to Caffe Users
Thanks for the reply. So I need to use the mean file that I got from the training dataset for testing dataset, right? I will try that later. Could you please explain why use the same mean file instead of using separate mean file that calculate from each dataset? Thank you so much.
Reply all
Reply to author
Forward
0 new messages