hi
my solver
net: "models/testmodel/train_val.prototxt"
test_iter: 100
test_interval: 100
base_lr:0.0001
lr_policy: "step"
gamma: 0.1
stepsize: 5000
display: 100
max_iter: 30000
momentum: 0.2
weight_decay: 0.0005
snapshot: 1000
snapshot_prefix: "models/testmodel/caffe5_train"
solver_mode: CPU
my train_val.prototxt
name: "CaffeNet"
layer {
name: "data"
type: "Data"
top: "data"
top: "label"
include {
phase: TRAIN
}
transform_param {
mirror: true
crop_size: 227
mean_file: "data/ilsvrc12/imagenet_mean.binaryproto"
}
# mean pixel / channel-wise mean instead of mean image
# transform_param {
# crop_size: 227
# mean_value: 104
# mean_value: 117
# mean_value: 123
# mirror: true
# }
data_param {
source: "/home/elahe/Downloads/caffe-master/examples/imagenet/5train_lmdb"
batch_size: 256
backend: LMDB
}
}
layer {
name: "data"
type: "Data"
top: "data"
top: "label"
include {
phase: TEST
}
transform_param {
mirror: false
crop_size: 227
mean_file: "data/ilsvrc12/imagenet_mean.binaryproto"
}
# mean pixel / channel-wise mean instead of mean image
# transform_param {
# crop_size: 227
# mean_value: 104
# mean_value: 117
# mean_value: 123
# mirror: false
# }
data_param {
source: "/home/elahe/Downloads/caffe-master/examples/imagenet/5val_lmdb"
batch_size: 96
backend: LMDB
}
}
layer {
name: "conv1"
type: "Convolution"
bottom: "data"
top: "conv1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 96
kernel_size: 11
stride: 4
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0.1
}
}
}
layer {
name: "relu1"
type: "ReLU"
bottom: "conv1"
top: "conv1"
}
layer {
name: "pool1"
type: "Pooling"
bottom: "conv1"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "norm1"
type: "LRN"
bottom: "pool1"
top: "norm1"
lrn_param {
local_size: 5
alpha: 0.0001
beta: 0.75
}
}
layer {
name: "fc7_t"
type: "InnerProduct"
bottom: "norm1"
top: "fc7_t"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 1
}
inner_product_param {
num_output: 4096
weight_filler {
type: "gaussian"
std: 0.005
}
bias_filler {
type: "constant"
value: 0.1
}
}
}
layer {
name: "relu7"
type: "ReLU"
bottom: "fc7_t"
top: "fc7_t"
}
layer {
name: "fc8_t"
type: "InnerProduct"
bottom: "fc7_t"
top: "fc8_t"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 1
}
inner_product_param {
num_output: 5
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0.1
}
}
}
layer {
name: "accuracy"
type: "Accuracy"
bottom: "fc8_t"
bottom: "label"
top: "accuracy"
include {
phase: TEST
}
}
layer {
name: "loss"
type: "SoftmaxWithLoss"
bottom: "fc8_t"
bottom: "label"
top: "loss"
}
Output:
I0112 20:19:36.843014 3667 solver.cpp:408] Test net output #0: accuracy = 0.166667
I0112 20:19:36.843159 3667 solver.cpp:408] Test net output #1: loss = 23.2279 (* 1 = 23.2279 loss)
I0112 20:19:44.464568 3667 solver.cpp:236] Iteration 0, loss = 19.8601
I0112 20:19:44.464617 3667 solver.cpp:252] Train net output #0: loss = 19.8601 (* 1 = 19.8601 loss)
I0112 20:19:44.464637 3667 sgd_solver.cpp:106] Iteration 0, lr = 0.0001
I0112 20:38:12.537981 3667 solver.cpp:340] Iteration 100, Testing net (#0)
I0112 20:41:32.721359 3667 solver.cpp:408] Test net output #0: accuracy = 0.722083
I0112 20:41:32.721444 3667 solver.cpp:408] Test net output #1: loss = 7.48023 (* 1 = 7.48023 loss)
I0112 20:41:42.956892 3667 solver.cpp:236] Iteration 100, loss = 12.3962
I0112 20:41:42.956938 3667 solver.cpp:252] Train net output #0: loss = 12.3962 (* 1 = 12.3962 loss)
I0112 20:41:42.956950 3667 sgd_solver.cpp:106] Iteration 100, lr = 0.0001
I0112 21:02:22.146353 3667 solver.cpp:340] Iteration 200, Testing net (#0)
I0112 21:05:43.417537 3667 solver.cpp:408] Test net output #0: accuracy = 0.388854
I0112 21:05:43.417616 3667 solver.cpp:408] Test net output #1: loss = 50.6208 (* 1 = 50.6208 loss)
I0112 21:05:52.962345 3667 solver.cpp:236] Iteration 200, loss = 58.1768
I0112 21:05:52.962393 3667 solver.cpp:252] Train net output #0: loss = 58.1768 (* 1 = 58.1768 loss)
I0112 21:05:52.962404 3667 sgd_solver.cpp:106] Iteration 200, lr = 0.0001
I0112 21:25:20.889557 3667 solver.cpp:340] Iteration 300, Testing net (#0)
I0112 21:28:47.969341 3667 solver.cpp:408] Test net output #0: accuracy = 0.444479
I0112 21:28:47.969409 3667 solver.cpp:408] Test net output #1: loss = 45.0209 (* 1 = 45.0209 loss)
I0112 21:28:58.119763 3667 solver.cpp:236] Iteration 300, loss = 48.0553
I0112 21:28:58.119810 3667 solver.cpp:252] Train net output #0: loss = 48.0553 (* 1 = 48.0553 loss)
I0112 21:28:58.119823 3667 sgd_solver.cpp:106] Iteration 300, lr = 0.0001
I0112 21:48:42.978188 3667 solver.cpp:340] Iteration 400, Testing net (#0)
I0112 21:52:09.260004 3667 solver.cpp:408] Test net output #0: accuracy = 0.277708
I0112 21:52:09.260082 3667 solver.cpp:408] Test net output #1: loss = 60.1215 (* 1 = 60.1215 loss)
I0112 21:52:19.721530 3667 solver.cpp:236] Iteration 400, loss = 59.2108
I0112 21:52:19.721578 3667 solver.cpp:252] Train net output #0: loss = 59.2108 (* 1 = 59.2108 loss)
I0112 21:52:19.721590 3667 sgd_solver.cpp:106] Iteration 400, lr = 0.0001
I0112 22:10:31.664820 3667 solver.cpp:340] Iteration 500, Testing net (#0)
I0112 22:13:14.781256 3667 solver.cpp:408] Test net output #0: accuracy = 0.277917
I0112 22:13:14.781415 3667 solver.cpp:408] Test net output #1: loss = 63.0641 (* 1 = 63.0641 loss)
I0112 22:13:22.490547 3667 solver.cpp:236] Iteration 500, loss = 66.8671
I0112 22:13:22.490595 3667 solver.cpp:252] Train net output #0: loss = 66.8671 (* 1 = 66.8671 loss)
I0112 22:13:22.490607 3667 sgd_solver.cpp:106] Iteration 500, lr = 0.0001
I0112 22:28:21.003844 3667 solver.cpp:340] Iteration 600, Testing net (#0)
I0112 22:30:50.384950 3667 solver.cpp:408] Test net output #0: accuracy = 0.500104
I0112 22:30:50.385020 3667 solver.cpp:408] Test net output #1: loss = 43.6592 (* 1 = 43.6592 loss)
I0112 22:30:57.693332 3667 solver.cpp:236] Iteration 600, loss = 68.9141
I0112 22:30:57.693379 3667 solver.cpp:252] Train net output #0: loss = 68.9141 (* 1 = 68.9141 loss)
I0112 22:30:57.693392 3667 sgd_solver.cpp:106] Iteration 600, lr = 0.0001
I0112 22:45:46.741204 3667 solver.cpp:340] Iteration 700, Testing net (#0)
I0112 22:48:22.395297 3667 solver.cpp:408] Test net output #0: accuracy = 0.222292
I0112 22:48:22.395411 3667 solver.cpp:408] Test net output #1: loss = 67.9223 (* 1 = 67.9223 loss)
I0112 22:48:29.634990 3667 solver.cpp:236] Iteration 700, loss = 69.2606
I0112 22:48:29.635037 3667 solver.cpp:252] Train net output #0: loss = 69.2606 (* 1 = 69.2606 loss)
I0112 22:48:29.635051 3667 sgd_solver.cpp:106] Iteration 700, lr = 0.0001
I0112 23:03:28.678174 3667 solver.cpp:340] Iteration 800, Testing net (#0)
I0112 23:05:56.843686 3667 solver.cpp:408] Test net output #0: accuracy = 0.166667
I0112 23:05:56.843754 3667 solver.cpp:408] Test net output #1: loss = 72.7803 (* 1 = 72.7803 loss)
I0112 23:06:04.018985 3667 solver.cpp:236] Iteration 800, loss = 73.0079
I0112 23:06:04.019027 3667 solver.cpp:252] Train net output #0: loss = 73.0079 (* 1 = 73.0079 loss)
i test any lr and momentum, then delete the dropout layer ,in all of them
after passed iterations loss=NaN...
why
:((