DEFINE_string(backend, "lmdb",
"The backend {leveldb, lmdb} containing the images");
int main(int argc, char** argv) {
::google::InitGoogleLogging(argv[0]);
#ifdef USE_OPENCV
#ifndef GFLAGS_GFLAGS_H_
namespace gflags = google;
#endif
gflags::SetUsageMessage("Compute the mean_image of a set of images given by"
" a leveldb/lmdb\n"
"Usage:\n"
" compute_image_mean [FLAGS] INPUT_DB [OUTPUT_FILE]\n");
/*gflags::ParseCommandLineFlags(&argc, &argv, true);*/
argc = 3;
argv[1] = "D:\\Deep Learning\\caffe_ssd_cuda8.0_vs2013\\caffe\\train_test_lmdb";
argv[2] = "D:\\Deep Learning\\caffe_ssd_cuda8.0_vs2013\\caffe\\train_test_mean.binaryproto";
//argv[1] = "D:\\Deep Learning\\caffe_ssd_cuda8.0_vs2013\\caffe\\00_datas\\car_1306\\trainval_lmdb\\";
//argv[2] = "D:\\Deep Learning\\caffe_ssd_cuda8.0_vs2013\\caffe\\00_datas\\car_1306\\car1306_mean.binaryproto";
if (argc < 2 || argc > 3) {
gflags::ShowUsageWithFlagsRestrict(argv[0], "tools/compute_image_mean");
return 1;
}
scoped_ptr<db::DB> db(db::GetDB(FLAGS_backend));
db->Open(argv[1], db::READ);
scoped_ptr<db::Cursor> cursor(db->NewCursor());
BlobProto sum_blob;
int count = 0;
// load first datum
Datum datum;
datum.ParseFromString(cursor->value());
if (DecodeDatumNative(&datum)) {
LOG(INFO) << "Decoding Datum";
}
sum_blob.set_num(1);
sum_blob.set_channels(datum.channels());
sum_blob.set_height(datum.height());
sum_blob.set_width(datum.width());
const int data_size = datum.channels() * datum.height() * datum.width();
int size_in_datum = std::max<int>(datum.data().size(),
datum.float_data_size());
for (int i = 0; i < size_in_datum; ++i) {
sum_blob.add_data(0.);
}
LOG(INFO) << "Starting Iteration";
while (cursor->valid()) {
Datum datum;
datum.ParseFromString(cursor->value());
DecodeDatumNative(&datum);
const std::string& data = datum.data();
size_in_datum = std::max<int>(datum.data().size(),
datum.float_data_size());
CHECK_EQ(size_in_datum, data_size) << "Incorrect data field size " <<
size_in_datum;
if (data.size() != 0) {
CHECK_EQ(data.size(), size_in_datum);
for (int i = 0; i < size_in_datum; ++i) {
sum_blob.set_data(i, sum_blob.data(i) + (uint8_t)data[i]);
}
} else {
CHECK_EQ(datum.float_data_size(), size_in_datum);
for (int i = 0; i < size_in_datum; ++i) {
sum_blob.set_data(i, sum_blob.data(i) +
static_cast<float>(datum.float_data(i)));
}
}
++count;
if (count % 10000 == 0) {
LOG(INFO) << "Processed " << count << " files.";
}
cursor->Next();
}
if (count % 10000 != 0) {
LOG(INFO) << "Processed " << count << " files.";
}
for (int i = 0; i < sum_blob.data_size(); ++i) {
sum_blob.set_data(i, sum_blob.data(i) / count);
}
// Write to disk
if (argc == 3) {
LOG(INFO) << "Write to " << argv[2];
WriteProtoToBinaryFile(sum_blob, argv[2]);
}
const int channels = sum_blob.channels();
const int dim = sum_blob.height() * sum_blob.width();
std::vector<float> mean_values(channels, 0.0);
LOG(INFO) << "Number of channels: " << channels;
for (int c = 0; c < channels; ++c) {
for (int i = 0; i < dim; ++i) {
mean_values[c] += sum_blob.data(dim * c + i);
}
LOG(INFO) << "mean_value channel [" << c << "]:" << mean_values[c] / dim;
}
#else
LOG(FATAL) << "This tool requires OpenCV; compile with USE_OPENCV.";
#endif // USE_OPENCV
return 0;
}
DEFINE_bool(gray, false,
"When this option is on, treat images as grayscale ones");
DEFINE_bool(shuffle, false,
"Randomly shuffle the order of images and their labels");
DEFINE_string(backend, "lmdb",
"The backend {lmdb, leveldb} for storing the result");
DEFINE_string(anno_type, "detection",
"The type of annotation {classification, detection}.");
DEFINE_string(label_type, "xml",
"The type of annotation file format.");
DEFINE_string(label_map_file, "D:/Deep Learning/caffe_ssd_cuda8.0_vs2013/caffe/VOC0712/labelmap_voc.prototxt","A file with LabelMap protobuf message.");
//"D:\\Deep Learning\\caffe_ssd_cuda8.0_vs2013\\caffe\\00_datas\\car_1306\\car.prototxt"
DEFINE_bool(check_label, true,
"When this option is on, check that there is no duplicated name/label.");
DEFINE_int32(min_dim, 0,
"Minimum dimension images are resized to (keep same aspect ratio)");
DEFINE_int32(max_dim, 0,
"Maximum dimension images are resized to (keep same aspect ratio)");
DEFINE_int32(resize_width, 512, "Width images are resized to");
DEFINE_int32(resize_height, 512, "Height images are resized to");
DEFINE_bool(check_size, true,
"When this option is on, check that all the datum have the same size");
DEFINE_bool(encoded, true,
"When this option is on, the encoded image will be save in datum");
DEFINE_string(encode_type, "jpg",
"Optional: What type should we encode the image as ('png','jpg',...).");
int main(int argc, char** argv) {
#ifdef USE_OPENCV
::google::InitGoogleLogging(argv[0]);
// Print output to stderr (while still logging)
FLAGS_alsologtostderr = 1;
#ifndef GFLAGS_GFLAGS_H_
namespace gflags = google;
#endif
argc = 4;
argv[1] = "D:/Deep Learning/caffe_ssd_cuda8.0_vs2013/caffe/";
argv[2] = "D:/Deep Learning/caffe_ssd_cuda8.0_vs2013/caffe/train_test.txt";
argv[3] = "D:/Deep Learning/caffe_ssd_cuda8.0_vs2013/caffe/train_test_lmdb";
train_test.txt
VOC2007/JPEGImages/000257.jpg VOC2007/Annotations/000257.xml
D:/Deep Learning/caffe_ssd_cuda8.0_vs2013/caffe/
D:/Deep Learning/caffe_ssd_cuda8.0_vs2013/caffe/train_test.txt
D:/Deep Learning/caffe_ssd_cuda8.0_vs2013/caffe/train_test_lmdb
I0418 15:37:15.999755 2340 convert_annoset.cpp:143] A total of 1 images.
I0418 15:37:16.016299 2340 db_lmdb.cpp:40] Opened lmdb D:/Deep Learning/caffe_ssd_cuda8.0_vs2013/caffe/train_test_lmdb
I0418 15:37:16.062636 2340 convert_annoset.cpp:222] Processed 1 files.