Reading multiple protobuf messages from a binary file

瀏覽次數:3,323 次
跳到第一則未讀訊息

samarules

未讀,
2010年1月14日 晚上11:05:062010/1/14
收件者:Protocol Buffers
Hi All,

I have what I believe is a simple task: write multiple protocol buffer
messages into a single binary file and then read them back
sequentially. When reading, I should not have to load the entire file
in memory but read each protobuf object one at a time (with some
buffering). An example application would to be store a large number of
documents in a file, where each document is a single protobuf message.

Is this functionality available in C++?


Zia

lucas_ro

未讀,
2010年1月15日 上午9:59:412010/1/15
收件者:Protocol Buffers
Yes, it is possible to serialize multiple protocol buffer messages
into a binary file and then load them from one by one.

Please see below my sample:

1. The file that describe the message I want to serialize:

message Person {
required int32 id = 1;
required string name = 2;
optional string email = 3;
required GendreType gendre = 4;

enum GendreType {
M = 1;
F = 2;
}
}

message Catalog {
repeated Person persons = 1;
}

2. The code that load the messages one by one (please see the
functions saveMultipleMessages and loadMultipleMessages) or at once:

#include <cassert>
#include <iostream>
#include <fstream>
#include <google/protobuf/stubs/common.h>
#include <google/protobuf/io/zero_copy_stream.h>
#include <google/protobuf/io/zero_copy_stream_impl.h>
#include <google/protobuf/io/coded_stream.h>
#include "person.pb.h"

using namespace std;

static const char *gendres[] = {"M", "F"};

//
bool saveMultipleMessages(const char *fileName);
bool loadMultipleMessages(const char *fileName);
//

int main()
{
GOOGLE_PROTOBUF_VERIFY_VERSION;

fstream out("person.pb", ios::out | ios::binary | ios::trunc);

// the container of person objects
Catalog catalog;

// add person #1
Person *p = catalog.add_persons();

p->set_id(23);
p->set_name("Bob");
p->set_email("b...@mail.com");
p->set_gendre(Person::M);

// add person #2
p = catalog.add_persons();

p->set_id(24);
p->set_name("Foo");
p->set_gendre(Person::F);

// serialize the container of the person objects
catalog.SerializeToOstream(&out);
out.close();

// deserialize the container of person objects
Catalog cat1;

fstream in("person.pb", ios::in | ios::binary);
if (!cat1.ParseFromIstream(&in))
{
cerr << "Failed to parse person.pb" << endl;
exit(1);
}

for (int i = 0; i < cat1.persons_size(); ++i)
{
Person p = cat1.persons(i);

cout << "ID: " << p.id() << endl;
cout << "name: " << p.name() << endl;
cout << "gendre: " << gendres[p.gendre()-1] << endl;
if (p.has_email())
{
cout << "e-mail: " << p.email() << endl;
}

cout << endl;
}
in.close();

//
if (saveMultipleMessages("person1.pb"))
{
loadMultipleMessages("person1.pb");
}

google::protobuf::ShutdownProtobufLibrary();

return 0;
}

//
bool saveMultipleMessages(const char *fileName)
{
fstream out(fileName, ios::out | ios::trunc | ios::binary);
if (!out)
{
cerr << "failed to open " << fileName << endl;
return false;
}

::google::protobuf::io::ZeroCopyOutputStream *raw_out =
new ::google::protobuf::io::OstreamOutputStream(&out);
::google::protobuf::io::CodedOutputStream *coded_out =
new ::google::protobuf::io::CodedOutputStream(raw_out);

// save the number of the messages to be serialized into the
output file
coded_out->WriteVarint32(2);

std::string s;

// message #1
Person p;
p.set_id(1);
p.set_name("Bob");
p.set_gendre(Person::M);

p.SerializeToString(&s);

coded_out->WriteVarint32(s.size());
coded_out->WriteRaw(s.data(), s.size()); // ->WriteString(s)
//

// message #2
p.set_id(2);
p.set_name("Jane");
p.set_email("ja...@kinky.com");
p.set_gendre(Person::F);

p.SerializeToString(&s);

coded_out->WriteVarint32(s.size());
coded_out->WriteString(s);
//

delete coded_out;
delete raw_out;
out.close();

return true;
}

//
bool loadMultipleMessages(const char *fileName)
{
fstream in(fileName, ios::in | ios::binary);
if (!in)
{
cerr << "failed to open " << fileName << endl;
return false;
}

::google::protobuf::io::ZeroCopyInputStream *raw_in =
new ::google::protobuf::io::IstreamInputStream(&in);
::google::protobuf::io::CodedInputStream *coded_in =
new ::google::protobuf::io::CodedInputStream(raw_in);

uint32_t n;
coded_in->ReadVarint32(&n);
cout << "#" << n << endl;

//
std::string s;

for (uint32_t i = 0; i < n; ++i)
{
uint32_t msgSize;
coded_in->ReadVarint32(&msgSize);

if ((msgSize > 0) &&
(coded_in->ReadString(&s, msgSize)))
{
//
Person p;
p.ParseFromString(s);
//

cout << "ID: " << p.id() << endl;
cout << "name: " << p.name() << endl;
cout << "gendre: " << gendres[p.gendre()-1] << endl;
if (p.has_email())
{
cout << "e-mail: " << p.email() << endl;
}

cout << endl;
}
}

delete coded_in;
delete raw_in;
in.close();

return true;
}

3. I hope this is the aspect you asked for.

Florin

samarules

未讀,
2010年1月17日 清晨6:02:282010/1/17
收件者:Protocol Buffers
Thank you very much, Florin. It works great!

>     p.set_email("j...@kinky.com");

回覆所有人
回覆作者
轉寄
0 則新訊息