std::cout<<"
Hello World";
I've fine tuned FCN-8S for semantic segmentation on my own dataset and the results look adequate
when done with Python. I've converted the python code to C++ with same
trained network and deploy.prototxt. However, with C++ version the
results are not exact and a little less accurate (~90% per class
accuracy in python vs. ~80% per class in C++, a lot of pixel misclassification within different classes but still close).
I'm suspecting the following
highlighted section could be the reason. Anybody with ideas and experience on whats happening here or what to do would be appreciated
Suspect 1:
python code: this code is similar to infer.py found here........
in_ = np.array(im, dtype=np.float32)
in_ = in_[:,:,::-1] # RGB2BGR, channel shift
in_ -= np.array((92.9630, 95.9630, 89.5160))
in_ = in_.transpose((2,0,1)) #convert to CxHxW
net = caffe.Net('/..../deploy.prototxt', '..../train__iter_5000.caffemodel', caffe.TEST)
net.blobs['data'].reshape(1, *in_.shape)
net.blobs['data'].data[...] = in_ # here the input is still np.float32 type net.forward()
..............................................
Equivalent C++ code..........................
Mat BGR; // BGR is the mean subtracted 3 channel Mat
cv::normalize(BGR,
BGR, 0, 255, NORM_MINMAX, CV_8UC3); // Need to convert to CV_8U because
if u don't then u get error # [io.cpp:213] Check failed: cv_img.depth()
== CV_8U Image data type must be unsignedcaffe::Datum datum;
CVMatToDatum(BGR, &datum);
// the input now is CV_8UC1caffe::Blob<float>* blob = new Blob<float>(1,datum.channels(),datum.height(),datum.width());
caffe::BlobProto blob_proto;
blob_proto.set_num(1);
blob_proto.set_channels(datum.channels())
blob_proto.set_height(datum.height());
blob_proto.set_width(datum.width());
int size_in_datum = std::max<int>(datum.data().size(),datum.float_data_size());
for (int i = 0; i < size_in_datum; ++i) {
blob_proto.add_data(0.);
}
const string& data = datum.data();
if (data.size() != 0) {
for (int i = 0; i < size_in_datum; ++i) {
blob_proto.set_data(i, blob_proto.data(i) + (uint8_t)data[i]);
}
}
blob->FromProto(blob_proto); // the blob is always in float!!
float* input_data = blob->mutable_cpu_data();
for (int i = 0; i<blob->channels()*blob->height()*blob->width();i++){
input_data[i] = input_data[i]/255;
}
vector<Blob<float>*> bottom_vec;
bottom_vec.push_back(blob);
float iter_loss;
const vector<Blob<float>*>& result = my_caffe.Forward(bottom_vec, &iter_loss);
=============================================================================================
Suspect 2:
Python code:
...........................
out = net.blobs['score'].data[0].argmax(axis=0) # here argmax is done so easily out = np.array(out, np.uint8)
img = Image.fromarray(out, 'L')
...........................................
Equivalent C++ code
...........................................
Blob<float>* input_layer = avcs_caffe.output_blobs()[0];
int width = input_layer->width();
int height = input_layer->height();
int channels = input_layer->channels();
const float* result_vec = result[0]->cpu_data();
result_vec is a [row x col x channels] sized vector. I reshaped it to a
matrix of [row*col] x channels size so that each row has predictions for
all channels (as cols). Then I computed the max of each row and noted
its index (Which I suppose is equivalent to ArgMax). Then I resized the
index vector to row x col matrix which gave me the prediction labeled
image.
int index = 0;
cv::Mat class_each_row (channels, width*height, CV_32FC1);
for (int i = 0; i<channels; i++){
for (int j =0; j<width*height; j++){
class_each_row.at<float> (i,j) = result_vec[index];
index++;
}
}
class_each_row = class_each_row.t(); // transpose to make each row with all probabilities
Point maxId; // point [x,y] values for index of max
double maxValue; // the holy max value itself
cv::Mat label(height, width, CV_8UC1);
for (int i=0;i<class_each_row.rows;i++){
minMaxLoc(class_each_row.row(i),0,&maxValue,0,&maxId);
label.at<uchar>(i) = maxId.x;
}==============================================
Please let me know if anyone has succeeded in getting exactly same result in python and C++ for semantic segmentation.