I try to train 6 layers AE with Caffe. How should I structure H5DF data and prototxt for this?

343 views
Skip to first unread message

Eren Gölge

unread,
Dec 1, 2014, 11:04:10 AM12/1/14
to caffe...@googlegroups.com
Here is my model.prototxt (sorry this is pretty long)

name: "AE_6_layers"
layers {
  top: "data"
  top: "label"
  name: "data"
  type: HDF5_DATA
  hdf5_data_param {
    source: "examples/data/train.txt"
    batch_size: 100
  }
  include: { phase: TRAIN}
}

layers {
  top: "data"
  top: "label"
  name: "data"
  type: HDF5_DATA
  hdf5_data_param {
    source: "examples/data/train.txt"
    batch_size: 100
  }
  include: { phase: TEST
             stage: 'test-on-train'
            }
}

layers {
  bottom: "data"
  top: "encode1"
  name: "encode1"
  type: INNER_PRODUCT
  blobs_lr: 1
  blobs_lr: 1
  weight_decay: 1
  weight_decay: 0
  inner_product_param {
    num_output: 2048
    weight_filler {
      type: "gaussian"
      std: 1
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}

layers {
  bottom: "encode1"
  top: "encode1neuron"
  name: "encode1neuron"
  type: RELU
}

layers {
  bottom: "encode1neuron"
  top: "encode2"
  name: "encode2"
  type: INNER_PRODUCT
  blobs_lr: 1
  blobs_lr: 1
  weight_decay: 1
  weight_decay: 0
  inner_product_param {
    num_output: 1024
    weight_filler {
      type: "gaussian"
      std: 1
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}

layers {
  bottom: "encode2"
  top: "encode2neuron"
  name: "encode2neuron"
  type: RELU
}
layers {
  bottom: "encode2neuron"
  top: "encode3"
  name: "encode3"
  type: INNER_PRODUCT
  blobs_lr: 1
  blobs_lr: 1
  weight_decay: 1
  weight_decay: 0
  inner_product_param {
    num_output: 512
    weight_filler {
      type: "gaussian"
      std: 1
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layers {
  bottom: "encode3"
  top: "encode3neuron"
  name: "encode3neuron"
  type: RELU
}
layers {
  bottom: "encode3neuron"
  top: "encode4"
  name: "encode4"
  type: INNER_PRODUCT
  blobs_lr: 1
  blobs_lr: 1
  weight_decay: 1
  weight_decay: 0
  inner_product_param {
    num_output: 256
    weight_filler {
      type: "gaussian"
      std: 1
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layers {
  bottom: "encode4"
  top: "encode4neuron"
  name: "encode4neuron"
  type: RELU
}
layers {
  bottom: "encode4neuron"
  top: "encode5"
  name: "encode5"
  type: INNER_PRODUCT
  blobs_lr: 1
  blobs_lr: 1
  weight_decay: 1
  weight_decay: 0
  inner_product_param {
    num_output: 128
    weight_filler {
      type: "gaussian"
      std: 1
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}

layers {
  bottom: "encode5"
  top: "encode5neuron"
  name: "encode5neuron"
  type: RELU
}

layers {
  bottom: "encode5neuron"
  top: "encode6"
  name: "encode6"
  type: INNER_PRODUCT
  blobs_lr: 1
  blobs_lr: 1
  weight_decay: 1
  weight_decay: 0
  inner_product_param {
    num_output: 64
    weight_filler {
      type: "gaussian"
      std: 1
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}

layers {
  bottom: "encode6"
  top: "encode6neuron"
  name: "encode6neuron"
  type: RELU
}


#######DECODER#######


layers {
  bottom: "encode6neuron"
  top: "decode6"
  name: "decode6"
  type: INNER_PRODUCT
  blobs_lr: 1
  blobs_lr: 1
  weight_decay: 1
  weight_decay: 0
  inner_product_param {
    num_output: 128
    weight_filler {
      type: "gaussian"
      std: 1
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}

layers {
  bottom: "decode6"
  top: "decode6neuron"
  name: "decode6neuron"
  type: RELU
}

layers {
  bottom: "decode6"
  top: "decode5"
  name: "decode5"
  type: INNER_PRODUCT
  blobs_lr: 1
  blobs_lr: 1
  weight_decay: 1
  weight_decay: 0
  inner_product_param {
    num_output: 256
    weight_filler {
      type: "gaussian"
      std: 1
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}

layers {
  bottom: "decode5"
  top: "decode5neuron"
  name: "decode5neuron"
  type: RELU
}

layers {
  bottom: "decode5"
  top: "decode4"
  name: "decode4"
  type: INNER_PRODUCT
  blobs_lr: 1
  blobs_lr: 1
  weight_decay: 1
  weight_decay: 0
  inner_product_param {
    num_output: 512
    weight_filler {
      type: "gaussian"
      std: 1
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}

layers {
  bottom: "decode4"
  top: "decode4neuron"
  name: "decode4neuron"
  type: RELU
}

layers {
  bottom: "decode4"
  top: "decode3"
  name: "decode3"
  type: INNER_PRODUCT
  blobs_lr: 1
  blobs_lr: 1
  weight_decay: 1
  weight_decay: 0
  inner_product_param {
    num_output: 1024
    weight_filler {
      type: "gaussian"
      std: 1
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}

layers {
  bottom: "decode3"
  top: "decode3neuron"
  name: "decode3neuron"
  type: RELU
}

layers {
  bottom: "decode3"
  top: "decode2"
  name: "decode2"
  type: INNER_PRODUCT
  blobs_lr: 1
  blobs_lr: 1
  weight_decay: 1
  weight_decay: 0
  inner_product_param {
    num_output: 2048
    weight_filler {
      type: "gaussian"
      std: 1
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}

layers {
  bottom: "decode2"
  top: "decode2neuron"
  name: "decode2neuron"
  type: RELU
}

layers {
  bottom: "decode2"
  top: "decode1"
  name: "decode1"
  type: INNER_PRODUCT
  blobs_lr: 1
  blobs_lr: 1
  weight_decay: 1
  weight_decay: 0
  inner_product_param {
    num_output: 4096
    weight_filler {
      type: "gaussian"
      std: 1
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}

layers {
  bottom: "decode1"
  top: "decode1neuron"
  name: "decode1neuron"
  type: RELU
}

layers {
  bottom: "decode1neuron"
  bottom: "data"
  top: "cross_entropy_loss"
  name: "loss"
  type: SIGMOID_CROSS_ENTROPY_LOSS
  loss_weight: 1
}

layers {
  bottom: "decode1neuron"
  bottom: "data"
  top: "l2_error"
  name: "loss"
  type: EUCLIDEAN_LOSS
  loss_weight: 0
}

Here is my solver;

net: "examples/data/autoencoder.prototxt"

test_state: { stage: 'test-on-train' }
test_iter: 500
test_interval: 250 
test_compute_loss: true

base_lr: 0.01
lr_policy: "fixed"
display: 100
max_iter: 65000
weight_decay: 0.0005

snapshot: 10000
snapshot_prefix: "examples/data/models/autoencoder_adagrad_train"

# solver mode: CPU or GPU
solver_mode: GPU
solver_type: ADAGRAD

My HDF5 file is created by python h5py including 2 datasets "data" with [22500,4096] matrix and "label" [22500, 1] with all 0 values.

Caffe can run in this setting but the problem is, I cannot observe any loss computation in the training. Instead I observe many lines of 

...
I1201 17:55:11.683280 17094 solver.cpp:206]     Train net output #396778: decode6neuron = 2.91948e+12
I1201 17:55:11.683292 17094 solver.cpp:206]     Train net output #396779: decode6neuron = 4.074e+11
I1201 17:55:11.683305 17094 solver.cpp:206]     Train net output #396780: decode6neuron = -0
I1201 17:55:11.683315 17094 solver.cpp:206]     Train net output #396781: decode6neuron = -0
...
I1201 17:55:11.684697 17094 solver.cpp:206]     Train net output #396896: label = 0
I1201 17:55:11.684708 17094 solver.cpp:206]     Train net output #396897: label = 0
I1201 17:55:11.684720 17094 solver.cpp:206]     Train net output #396898: label = 0
I1201 17:55:11.684730 17094 solver.cpp:206]     Train net output #396899: label = 0
I1201 17:55:11.684741 17094 solver.cpp:206]     Train net output #396900: label = 0
I1201 17:55:11.684751 17094 solver.cpp:206]     Train net output #396901: label = 0
I1201 17:55:11.684764 17094 solver.cpp:632] Iteration 400, lr = 0.01
...
same again

I guess there is something wrong but cannot see it by myself. Any help?


Reply all
Reply to author
Forward
0 new messages