The code for a single LSTM cell is below (copied from Caffe src/layers).
My question is, which of the top outputs is connected to the next layer (typically an embedding or a SoftMax).
Is it the C-top or the H-top at time t?
From the code, here are the definitions of C and H.
Dtype* C = top[0]->mutable_cpu_data();
Dtype* H = top[1]->mutable_cpu_data();
The LSTM cell code
template <typename Dtype>
void LSTMUnitLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
const int num = bottom[0]->shape(1);
const int x_dim = hidden_dim_ * 4;
const Dtype* C_prev = bottom[0]->cpu_data();
const Dtype* X = bottom[1]->cpu_data();
const Dtype* cont = bottom[2]->cpu_data();
Dtype* C = top[0]->mutable_cpu_data();
Dtype* H = top[1]->mutable_cpu_data();
for (int n = 0; n < num; ++n) {
for (int d = 0; d < hidden_dim_; ++d) {
const Dtype i = sigmoid(X[d]);
const Dtype f = (*cont == 0) ? 0 :
(*cont * sigmoid(X[1 * hidden_dim_ + d]));
const Dtype o = sigmoid(X[2 * hidden_dim_ + d]);
const Dtype g = tanh(X[3 * hidden_dim_ + d]);
const Dtype c_prev = C_prev[d];
const Dtype c = f * c_prev + i * g;
C[d] = c;
const Dtype tanh_c = tanh(c);
H[d] = o * tanh_c;
}
C_prev += hidden_dim_;
X += x_dim;
C += hidden_dim_;
H += hidden_dim_;
++cont;
}
}
Question
So if we define a layer like the one below, then the top (named '`lstm1`') is referring to which output, C or H?
layer {
name: "lstm1"
type: "LSTM"
bottom: "fc6-reshape"
bottom: "reshape-cm"
top: "lstm1"
recurrent_param {
num_output: 8
weight_filler {
type: "uniform"
min: -0.01
max: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}