I have what I believe is a simple task: write multiple protocol buffer
messages into a single binary file and then read them back
sequentially. When reading, I should not have to load the entire file
in memory but read each protobuf object one at a time (with some
buffering). An example application would to be store a large number of
documents in a file, where each document is a single protobuf message.
Is this functionality available in C++?
Zia
Please see below my sample:
1. The file that describe the message I want to serialize:
message Person {
required int32 id = 1;
required string name = 2;
optional string email = 3;
required GendreType gendre = 4;
enum GendreType {
M = 1;
F = 2;
}
}
message Catalog {
repeated Person persons = 1;
}
2. The code that load the messages one by one (please see the
functions saveMultipleMessages and loadMultipleMessages) or at once:
#include <cassert>
#include <iostream>
#include <fstream>
#include <google/protobuf/stubs/common.h>
#include <google/protobuf/io/zero_copy_stream.h>
#include <google/protobuf/io/zero_copy_stream_impl.h>
#include <google/protobuf/io/coded_stream.h>
#include "person.pb.h"
using namespace std;
static const char *gendres[] = {"M", "F"};
//
bool saveMultipleMessages(const char *fileName);
bool loadMultipleMessages(const char *fileName);
//
int main()
{
GOOGLE_PROTOBUF_VERIFY_VERSION;
fstream out("person.pb", ios::out | ios::binary | ios::trunc);
// the container of person objects
Catalog catalog;
// add person #1
Person *p = catalog.add_persons();
p->set_id(23);
p->set_name("Bob");
p->set_email("b...@mail.com");
p->set_gendre(Person::M);
// add person #2
p = catalog.add_persons();
p->set_id(24);
p->set_name("Foo");
p->set_gendre(Person::F);
// serialize the container of the person objects
catalog.SerializeToOstream(&out);
out.close();
// deserialize the container of person objects
Catalog cat1;
fstream in("person.pb", ios::in | ios::binary);
if (!cat1.ParseFromIstream(&in))
{
cerr << "Failed to parse person.pb" << endl;
exit(1);
}
for (int i = 0; i < cat1.persons_size(); ++i)
{
Person p = cat1.persons(i);
cout << "ID: " << p.id() << endl;
cout << "name: " << p.name() << endl;
cout << "gendre: " << gendres[p.gendre()-1] << endl;
if (p.has_email())
{
cout << "e-mail: " << p.email() << endl;
}
cout << endl;
}
in.close();
//
if (saveMultipleMessages("person1.pb"))
{
loadMultipleMessages("person1.pb");
}
google::protobuf::ShutdownProtobufLibrary();
return 0;
}
//
bool saveMultipleMessages(const char *fileName)
{
fstream out(fileName, ios::out | ios::trunc | ios::binary);
if (!out)
{
cerr << "failed to open " << fileName << endl;
return false;
}
::google::protobuf::io::ZeroCopyOutputStream *raw_out =
new ::google::protobuf::io::OstreamOutputStream(&out);
::google::protobuf::io::CodedOutputStream *coded_out =
new ::google::protobuf::io::CodedOutputStream(raw_out);
// save the number of the messages to be serialized into the
output file
coded_out->WriteVarint32(2);
std::string s;
// message #1
Person p;
p.set_id(1);
p.set_name("Bob");
p.set_gendre(Person::M);
p.SerializeToString(&s);
coded_out->WriteVarint32(s.size());
coded_out->WriteRaw(s.data(), s.size()); // ->WriteString(s)
//
// message #2
p.set_id(2);
p.set_name("Jane");
p.set_email("ja...@kinky.com");
p.set_gendre(Person::F);
p.SerializeToString(&s);
coded_out->WriteVarint32(s.size());
coded_out->WriteString(s);
//
delete coded_out;
delete raw_out;
out.close();
return true;
}
//
bool loadMultipleMessages(const char *fileName)
{
fstream in(fileName, ios::in | ios::binary);
if (!in)
{
cerr << "failed to open " << fileName << endl;
return false;
}
::google::protobuf::io::ZeroCopyInputStream *raw_in =
new ::google::protobuf::io::IstreamInputStream(&in);
::google::protobuf::io::CodedInputStream *coded_in =
new ::google::protobuf::io::CodedInputStream(raw_in);
uint32_t n;
coded_in->ReadVarint32(&n);
cout << "#" << n << endl;
//
std::string s;
for (uint32_t i = 0; i < n; ++i)
{
uint32_t msgSize;
coded_in->ReadVarint32(&msgSize);
if ((msgSize > 0) &&
(coded_in->ReadString(&s, msgSize)))
{
//
Person p;
p.ParseFromString(s);
//
cout << "ID: " << p.id() << endl;
cout << "name: " << p.name() << endl;
cout << "gendre: " << gendres[p.gendre()-1] << endl;
if (p.has_email())
{
cout << "e-mail: " << p.email() << endl;
}
cout << endl;
}
}
delete coded_in;
delete raw_in;
in.close();
return true;
}
3. I hope this is the aspect you asked for.
Florin