I'm trying to formulate a very simple regression problem with Caffe but Unfortunately the loss on the training set does not decrease. I was wondering if anyone can help me figure out what is wrong.
Many thanks in advance.
net: "train_small_linear.prototxt"
# Carry out testing every 500 training iterations.
# The base learning rate, momentum and the weight decay of the network.
base_lr: 0.0001
momentum: 0.9
weight_decay: 0
#0.000005
# The learning rate policy
lr_policy: "fixed"
gamma: 0.001
power: 0.75
# Display every 100 iterations
display: 1000
# The maximum number of iterations
max_iter: 10000
# snapshot intermediate results
#snapshot: 5000
snapshot_prefix: "models/train_small_0"
# solver mode: CPU or GPU
solver_mode: GPU
#type: "Nesterov"
I1224 17:08:52.024641 22993 solver.cpp:288] Solving KaggleLinear
I1224 17:08:52.024646 22993 solver.cpp:289] Learning Rate Policy: fixed
I1224 17:08:52.026695 22993 solver.cpp:237] Iteration 0, loss = 0.669407
I1224 17:08:52.026710 22993 solver.cpp:253] Train net output #0: loss = 0.669407 (* 1 = 0.669407 loss)
I1224 17:08:52.026716 22993 sgd_solver.cpp:106] Iteration 0, lr = 0.0001
I1224 17:08:53.618754 22993 solver.cpp:237] Iteration 1000, loss = 0.102132
I1224 17:08:53.618768 22993 solver.cpp:253] Train net output #0: loss = 0.102132 (* 1 = 0.102132 loss)
I1224 17:08:53.618774 22993 sgd_solver.cpp:106] Iteration 1000, lr = 0.0001
I1224 17:08:55.192927 22993 solver.cpp:237] Iteration 2000, loss = 0.0323169
I1224 17:08:55.192940 22993 solver.cpp:253] Train net output #0: loss = 0.0323169 (* 1 = 0.0323169 loss)
I1224 17:08:55.192946 22993 sgd_solver.cpp:106] Iteration 2000, lr = 0.0001
I1224 17:08:56.766247 22993 solver.cpp:237] Iteration 3000, loss = 0.0281559
I1224 17:08:56.766260 22993 solver.cpp:253] Train net output #0: loss = 0.0281559 (* 1 = 0.0281559 loss)
I1224 17:08:56.766265 22993 sgd_solver.cpp:106] Iteration 3000, lr = 0.0001
I1224 17:08:58.338812 22993 solver.cpp:237] Iteration 4000, loss = 0.0265256
I1224 17:08:58.338825 22993 solver.cpp:253] Train net output #0: loss = 0.0265256 (* 1 = 0.0265256 loss)
I1224 17:08:58.338830 22993 sgd_solver.cpp:106] Iteration 4000, lr = 0.0001
I1224 17:08:59.911969 22993 solver.cpp:237] Iteration 5000, loss = 0.032174
I1224 17:08:59.911983 22993 solver.cpp:253] Train net output #0: loss = 0.032174 (* 1 = 0.032174 loss)
I1224 17:08:59.911988 22993 sgd_solver.cpp:106] Iteration 5000, lr = 0.0001
I1224 17:09:01.484633 22993 solver.cpp:237] Iteration 6000, loss = 0.0211191
I1224 17:09:01.484647 22993 solver.cpp:253] Train net output #0: loss = 0.0211191 (* 1 = 0.0211191 loss)
I1224 17:09:01.484652 22993 sgd_solver.cpp:106] Iteration 6000, lr = 0.0001
I1224 17:09:03.057622 22993 solver.cpp:237] Iteration 7000, loss = 0.0266609
I1224 17:09:03.057636 22993 solver.cpp:253] Train net output #0: loss = 0.0266609 (* 1 = 0.0266609 loss)
I1224 17:09:03.057641 22993 sgd_solver.cpp:106] Iteration 7000, lr = 0.0001
I1224 17:09:04.631146 22993 solver.cpp:237] Iteration 8000, loss = 0.0285837
I1224 17:09:04.631160 22993 solver.cpp:253] Train net output #0: loss = 0.0285837 (* 1 = 0.0285837 loss)
I1224 17:09:04.631165 22993 sgd_solver.cpp:106] Iteration 8000, lr = 0.0001
I1224 17:09:06.204957 22993 solver.cpp:237] Iteration 9000, loss = 0.0166937
I1224 17:09:06.204970 22993 solver.cpp:253] Train net output #0: loss = 0.0166937 (* 1 = 0.0166937 loss)
I1224 17:09:06.204975 22993 sgd_solver.cpp:106] Iteration 9000, lr = 0.0001
I1224 17:09:07.775774 22993 solver.cpp:459] Snapshotting to binary proto file models/train_small_0_iter_10000.caffemodel
I1224 17:09:07.776536 22993 sgd_solver.cpp:269] Snapshotting solver state to binary proto file models/train_small_0_iter_10000.solverstate
I1224 17:09:07.778307 22993 solver.cpp:321] Iteration 10000, loss = 0.0192395
I1224 17:09:07.778319 22993 solver.cpp:326] Optimization Done.
I1224 17:09:07.778323 22993 caffe.cpp:215] Optimization Done.