I try to train 6 layers AE with Caffe. How should I structure H5DF data and prototxt for this?

343 views

auto-encoderhdf5training

Skip to first unread message

Eren Gölge

unread,

Dec 1, 2014, 11:04:10 AM12/1/14

to caffe...@googlegroups.com

Here is my model.prototxt (sorry this is pretty long)

layers {

top: "data"

top: "label"

type: HDF5_DATA

hdf5_data_param {

source: "examples/data/train.txt"

batch_size: 100

}

include: { phase: TRAIN}

}

layers {

top: "data"

top: "label"

type: HDF5_DATA

hdf5_data_param {

source: "examples/data/train.txt"

batch_size: 100

}

include: { phase: TEST

stage: 'test-on-train'

}

layers {

bottom: "data"

top: "encode1"

type: INNER_PRODUCT

blobs_lr: 1

weight_decay: 1

weight_decay: 0

inner_product_param {

num_output: 2048

weight_filler {

type: "gaussian"

std: 1

}

bias_filler {

type: "constant"

value: 0

}

layers {

bottom: "encode1"

top: "encode1neuron"

type: RELU

}

layers {

bottom: "encode1neuron"

top: "encode2"

type: INNER_PRODUCT

blobs_lr: 1

weight_decay: 1

weight_decay: 0

inner_product_param {

num_output: 1024

weight_filler {

type: "gaussian"

std: 1

}

bias_filler {

type: "constant"

value: 0

}

layers {

bottom: "encode2"

top: "encode2neuron"

type: RELU

}

layers {

bottom: "encode2neuron"

top: "encode3"

type: INNER_PRODUCT

blobs_lr: 1

weight_decay: 1

weight_decay: 0

inner_product_param {

num_output: 512

weight_filler {

type: "gaussian"

std: 1

}

bias_filler {

type: "constant"

value: 0

}

layers {

bottom: "encode3"

top: "encode3neuron"

type: RELU

}

layers {

bottom: "encode3neuron"

top: "encode4"

type: INNER_PRODUCT

blobs_lr: 1

weight_decay: 1

weight_decay: 0

inner_product_param {

num_output: 256

weight_filler {

type: "gaussian"

std: 1

}

bias_filler {

type: "constant"

value: 0

}

layers {

bottom: "encode4"

top: "encode4neuron"

type: RELU

}

layers {

bottom: "encode4neuron"

top: "encode5"

type: INNER_PRODUCT

blobs_lr: 1

weight_decay: 1

weight_decay: 0

inner_product_param {

num_output: 128

weight_filler {

type: "gaussian"

std: 1

}

bias_filler {

type: "constant"

value: 0

}

layers {

bottom: "encode5"

top: "encode5neuron"

type: RELU

}

layers {

bottom: "encode5neuron"

top: "encode6"

type: INNER_PRODUCT

blobs_lr: 1

weight_decay: 1

weight_decay: 0

inner_product_param {

num_output: 64

weight_filler {

type: "gaussian"

std: 1

}

bias_filler {

type: "constant"

value: 0

}

layers {

bottom: "encode6"

top: "encode6neuron"

type: RELU

}

#######DECODER#######

layers {

bottom: "encode6neuron"

top: "decode6"

type: INNER_PRODUCT

blobs_lr: 1

weight_decay: 1

weight_decay: 0

inner_product_param {

num_output: 128

weight_filler {

type: "gaussian"

std: 1

}

bias_filler {

type: "constant"

value: 0

}

layers {

bottom: "decode6"

top: "decode6neuron"

type: RELU

}

layers {

bottom: "decode6"

top: "decode5"

type: INNER_PRODUCT

blobs_lr: 1

weight_decay: 1

weight_decay: 0

inner_product_param {

num_output: 256

weight_filler {

type: "gaussian"

std: 1

}

bias_filler {

type: "constant"

value: 0

}

layers {

bottom: "decode5"

top: "decode5neuron"

type: RELU

}

layers {

bottom: "decode5"

top: "decode4"

type: INNER_PRODUCT

blobs_lr: 1

weight_decay: 1

weight_decay: 0

inner_product_param {

num_output: 512

weight_filler {

type: "gaussian"

std: 1

}

bias_filler {

type: "constant"

value: 0

}

layers {

bottom: "decode4"

top: "decode4neuron"

type: RELU

}

layers {

bottom: "decode4"

top: "decode3"

type: INNER_PRODUCT

blobs_lr: 1

weight_decay: 1

weight_decay: 0

inner_product_param {

num_output: 1024

weight_filler {

type: "gaussian"

std: 1

}

bias_filler {

type: "constant"

value: 0

}

layers {

bottom: "decode3"

top: "decode3neuron"

type: RELU

}

layers {

bottom: "decode3"

top: "decode2"

type: INNER_PRODUCT

blobs_lr: 1

weight_decay: 1

weight_decay: 0

inner_product_param {

num_output: 2048

weight_filler {

type: "gaussian"

std: 1

}

bias_filler {

type: "constant"

value: 0

}

layers {

bottom: "decode2"

top: "decode2neuron"

type: RELU

}

layers {

bottom: "decode2"

top: "decode1"

type: INNER_PRODUCT

blobs_lr: 1

weight_decay: 1

weight_decay: 0

inner_product_param {

num_output: 4096

weight_filler {

type: "gaussian"

std: 1

}

bias_filler {

type: "constant"

value: 0

}

layers {

bottom: "decode1"

top: "decode1neuron"

type: RELU

}

layers {

bottom: "decode1neuron"

bottom: "data"

top: "cross_entropy_loss"

type: SIGMOID_CROSS_ENTROPY_LOSS

loss_weight: 1

}

layers {

bottom: "decode1neuron"

bottom: "data"

top: "l2_error"

type: EUCLIDEAN_LOSS

loss_weight: 0

}

Here is my solver;

net: "examples/data/autoencoder.prototxt"

test_state: { stage: 'test-on-train' }

test_iter: 500

test_interval: 250

test_compute_loss: true

base_lr: 0.01

lr_policy: "fixed"

display: 100

max_iter: 65000

weight_decay: 0.0005

snapshot: 10000

snapshot_prefix: "examples/data/models/autoencoder_adagrad_train"

# solver mode: CPU or GPU

solver_mode: GPU

solver_type: ADAGRAD

My HDF5 file is created by python h5py including 2 datasets "data" with [22500,4096] matrix and "label" [22500, 1] with all 0 values.

Caffe can run in this setting but the problem is, I cannot observe any loss computation in the training. Instead I observe many lines of

...

I1201 17:55:11.683280 17094 solver.cpp:206] Train net output #396778: decode6neuron = 2.91948e+12

I1201 17:55:11.683292 17094 solver.cpp:206] Train net output #396779: decode6neuron = 4.074e+11

I1201 17:55:11.683305 17094 solver.cpp:206] Train net output #396780: decode6neuron = -0

I1201 17:55:11.683315 17094 solver.cpp:206] Train net output #396781: decode6neuron = -0

...

I1201 17:55:11.684697 17094 solver.cpp:206] Train net output #396896: label = 0

I1201 17:55:11.684708 17094 solver.cpp:206] Train net output #396897: label = 0

I1201 17:55:11.684720 17094 solver.cpp:206] Train net output #396898: label = 0

I1201 17:55:11.684730 17094 solver.cpp:206] Train net output #396899: label = 0

I1201 17:55:11.684741 17094 solver.cpp:206] Train net output #396900: label = 0

I1201 17:55:11.684751 17094 solver.cpp:206] Train net output #396901: label = 0

I1201 17:55:11.684764 17094 solver.cpp:632] Iteration 400, lr = 0.01

...

same again

I guess there is something wrong but cannot see it by myself. Any help?

Reply all

Reply to author

Forward

0 new messages