template <typename Dtype> | |
void SoftmaxWithLossLayer<Dtype>::Forward_gpu( | |
const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { | |
softmax_layer_->Forward(softmax_bottom_vec_, softmax_top_vec_); | |
const Dtype* prob_data = prob_.gpu_data(); | |
const Dtype* label = bottom[1]->gpu_data(); | |
const int dim = prob_.count() / outer_num_; | |
const int nthreads = outer_num_ * inner_num_; | |
// Since this memory is not used for anything until it is overwritten | |
// on the backward pass, we use it here to avoid having to allocate new GPU | |
// memory to accumulate intermediate results in the kernel. | |
Dtype* loss_data = bottom[0]->mutable_gpu_diff(); | |
// Similarly, this memory is never used elsewhere, and thus we can use it | |
// to avoid having to allocate additional GPU memory. | |
Dtype* counts = prob_.mutable_gpu_diff(); | |
// NOLINT_NEXT_LINE(whitespace/operators) | |
SoftmaxLossForwardGPU<Dtype><<<CAFFE_GET_BLOCKS(nthreads), | |
CAFFE_CUDA_NUM_THREADS>>>(nthreads, prob_data, label, loss_data, | |
outer_num_, dim, inner_num_, has_ignore_label_, ignore_label_, counts); | |
Dtype loss; | |
caffe_gpu_asum(nthreads, loss_data, &loss); //Fails here | |
if (normalize_) { | |
Dtype count; | |
caffe_gpu_asum(nthreads, counts, &count); | |
loss /= count; | |
} else { | |
loss /= outer_num_; | |
} | |
top[0]->mutable_cpu_data()[0] = loss; | |
if (top.size() == 2) { | |
top[1]->ShareData(prob_); | |
} | |
} |
This is inconsistent with your description, but I encountered the same CUBLAS_STATUS_INTERNAL_ERROR when the number of labels mismatched the "num_output" in my train_val.prototxt. E.g., with eight classes and background, "num_output" should be nine.
--
You received this message because you are subscribed to the Google Groups "Caffe Users" group.
To unsubscribe from this group and stop receiving emails from it, send an email to caffe-users...@googlegroups.com.
To post to this group, send email to caffe...@googlegroups.com.
To view this discussion on the web visit https://groups.google.com/d/msgid/caffe-users/436c6a7c-2fcf-4ead-90d9-635d54250498%40googlegroups.com.
For more options, visit https://groups.google.com/d/optout.
To view this discussion on the web visit https://groups.google.com/d/msgid/caffe-users/49026e5a-f815-4f3d-a1e7-db24d88a8280%40googlegroups.com.
This is inconsistent with your description, but I encountered the same CUBLAS_STATUS_INTERNAL_ERROR when the number of labels mismatched the "num_output" in my train_val.prototxt. E.g., with eight classes and background, "num_output" should be nine.