name: "AE_6_layers"
layers {
top: "data"
top: "label"
name: "data"
type: HDF5_DATA
hdf5_data_param {
source: "examples/data/train.txt"
batch_size: 100
}
include: { phase: TRAIN}
}
layers {
top: "data"
top: "label"
name: "data"
type: HDF5_DATA
hdf5_data_param {
source: "examples/data/train.txt"
batch_size: 100
}
include: { phase: TEST
stage: 'test-on-train'
}
}
layers {
bottom: "data"
top: "encode1"
name: "encode1"
type: INNER_PRODUCT
blobs_lr: 1
blobs_lr: 1
weight_decay: 1
weight_decay: 0
inner_product_param {
num_output: 2048
weight_filler {
type: "gaussian"
std: 1
}
bias_filler {
type: "constant"
value: 0
}
}
}
layers {
bottom: "encode1"
top: "encode1neuron"
name: "encode1neuron"
type: RELU
}
layers {
bottom: "encode1neuron"
top: "encode2"
name: "encode2"
type: INNER_PRODUCT
blobs_lr: 1
blobs_lr: 1
weight_decay: 1
weight_decay: 0
inner_product_param {
num_output: 1024
weight_filler {
type: "gaussian"
std: 1
}
bias_filler {
type: "constant"
value: 0
}
}
}
layers {
bottom: "encode2"
top: "encode2neuron"
name: "encode2neuron"
type: RELU
}
layers {
bottom: "encode2neuron"
top: "encode3"
name: "encode3"
type: INNER_PRODUCT
blobs_lr: 1
blobs_lr: 1
weight_decay: 1
weight_decay: 0
inner_product_param {
num_output: 512
weight_filler {
type: "gaussian"
std: 1
}
bias_filler {
type: "constant"
value: 0
}
}
}
layers {
bottom: "encode3"
top: "encode3neuron"
name: "encode3neuron"
type: RELU
}
layers {
bottom: "encode3neuron"
top: "encode4"
name: "encode4"
type: INNER_PRODUCT
blobs_lr: 1
blobs_lr: 1
weight_decay: 1
weight_decay: 0
inner_product_param {
num_output: 256
weight_filler {
type: "gaussian"
std: 1
}
bias_filler {
type: "constant"
value: 0
}
}
}
layers {
bottom: "encode4"
top: "encode4neuron"
name: "encode4neuron"
type: RELU
}
layers {
bottom: "encode4neuron"
top: "encode5"
name: "encode5"
type: INNER_PRODUCT
blobs_lr: 1
blobs_lr: 1
weight_decay: 1
weight_decay: 0
inner_product_param {
num_output: 128
weight_filler {
type: "gaussian"
std: 1
}
bias_filler {
type: "constant"
value: 0
}
}
}
layers {
bottom: "encode5"
top: "encode5neuron"
name: "encode5neuron"
type: RELU
}
layers {
bottom: "encode5neuron"
top: "encode6"
name: "encode6"
type: INNER_PRODUCT
blobs_lr: 1
blobs_lr: 1
weight_decay: 1
weight_decay: 0
inner_product_param {
num_output: 64
weight_filler {
type: "gaussian"
std: 1
}
bias_filler {
type: "constant"
value: 0
}
}
}
layers {
bottom: "encode6"
top: "encode6neuron"
name: "encode6neuron"
type: RELU
}
#######DECODER#######
layers {
bottom: "encode6neuron"
top: "decode6"
name: "decode6"
type: INNER_PRODUCT
blobs_lr: 1
blobs_lr: 1
weight_decay: 1
weight_decay: 0
inner_product_param {
num_output: 128
weight_filler {
type: "gaussian"
std: 1
}
bias_filler {
type: "constant"
value: 0
}
}
}
layers {
bottom: "decode6"
top: "decode6neuron"
name: "decode6neuron"
type: RELU
}
layers {
bottom: "decode6"
top: "decode5"
name: "decode5"
type: INNER_PRODUCT
blobs_lr: 1
blobs_lr: 1
weight_decay: 1
weight_decay: 0
inner_product_param {
num_output: 256
weight_filler {
type: "gaussian"
std: 1
}
bias_filler {
type: "constant"
value: 0
}
}
}
layers {
bottom: "decode5"
top: "decode5neuron"
name: "decode5neuron"
type: RELU
}
layers {
bottom: "decode5"
top: "decode4"
name: "decode4"
type: INNER_PRODUCT
blobs_lr: 1
blobs_lr: 1
weight_decay: 1
weight_decay: 0
inner_product_param {
num_output: 512
weight_filler {
type: "gaussian"
std: 1
}
bias_filler {
type: "constant"
value: 0
}
}
}
layers {
bottom: "decode4"
top: "decode4neuron"
name: "decode4neuron"
type: RELU
}
layers {
bottom: "decode4"
top: "decode3"
name: "decode3"
type: INNER_PRODUCT
blobs_lr: 1
blobs_lr: 1
weight_decay: 1
weight_decay: 0
inner_product_param {
num_output: 1024
weight_filler {
type: "gaussian"
std: 1
}
bias_filler {
type: "constant"
value: 0
}
}
}
layers {
bottom: "decode3"
top: "decode3neuron"
name: "decode3neuron"
type: RELU
}
layers {
bottom: "decode3"
top: "decode2"
name: "decode2"
type: INNER_PRODUCT
blobs_lr: 1
blobs_lr: 1
weight_decay: 1
weight_decay: 0
inner_product_param {
num_output: 2048
weight_filler {
type: "gaussian"
std: 1
}
bias_filler {
type: "constant"
value: 0
}
}
}
layers {
bottom: "decode2"
top: "decode2neuron"
name: "decode2neuron"
type: RELU
}
layers {
bottom: "decode2"
top: "decode1"
name: "decode1"
type: INNER_PRODUCT
blobs_lr: 1
blobs_lr: 1
weight_decay: 1
weight_decay: 0
inner_product_param {
num_output: 4096
weight_filler {
type: "gaussian"
std: 1
}
bias_filler {
type: "constant"
value: 0
}
}
}
layers {
bottom: "decode1"
top: "decode1neuron"
name: "decode1neuron"
type: RELU
}
layers {
bottom: "decode1neuron"
bottom: "data"
top: "cross_entropy_loss"
name: "loss"
type: SIGMOID_CROSS_ENTROPY_LOSS
loss_weight: 1
}
layers {
bottom: "decode1neuron"
bottom: "data"
top: "l2_error"
name: "loss"
type: EUCLIDEAN_LOSS
loss_weight: 0
}
Here is my solver;
net: "examples/data/autoencoder.prototxt"
test_state: { stage: 'test-on-train' }
test_iter: 500
test_interval: 250
test_compute_loss: true
base_lr: 0.01
lr_policy: "fixed"
display: 100
max_iter: 65000
weight_decay: 0.0005
snapshot: 10000
snapshot_prefix: "examples/data/models/autoencoder_adagrad_train"
# solver mode: CPU or GPU
solver_mode: GPU
solver_type: ADAGRAD
My HDF5 file is created by python h5py including 2 datasets "data" with [22500,4096] matrix and "label" [22500, 1] with all 0 values.
Caffe can run in this setting but the problem is, I cannot observe any loss computation in the training. Instead I observe many lines of
...
I1201 17:55:11.683280 17094 solver.cpp:206] Train net output #396778: decode6neuron = 2.91948e+12
I1201 17:55:11.683292 17094 solver.cpp:206] Train net output #396779: decode6neuron = 4.074e+11
I1201 17:55:11.683305 17094 solver.cpp:206] Train net output #396780: decode6neuron = -0
I1201 17:55:11.683315 17094 solver.cpp:206] Train net output #396781: decode6neuron = -0
...
I1201 17:55:11.684697 17094 solver.cpp:206] Train net output #396896: label = 0
I1201 17:55:11.684708 17094 solver.cpp:206] Train net output #396897: label = 0
I1201 17:55:11.684720 17094 solver.cpp:206] Train net output #396898: label = 0
I1201 17:55:11.684730 17094 solver.cpp:206] Train net output #396899: label = 0
I1201 17:55:11.684741 17094 solver.cpp:206] Train net output #396900: label = 0
I1201 17:55:11.684751 17094 solver.cpp:206] Train net output #396901: label = 0
I1201 17:55:11.684764 17094 solver.cpp:632] Iteration 400, lr = 0.01
...
same again
I guess there is something wrong but cannot see it by myself. Any help?