多个文件合并的C++实现

35 views
Skip to first unread message

Hongfei Yan

unread,
Mar 25, 2013, 3:56:13 AM3/25/13
to cs41...@googlegroups.com
打开多个文件合并的C++实现,上课时候是问这个问题吧?
我在JIANGHan程序中找了一段,请看是否有帮助。

void IndexWriter::mergePSTBlk(int numtmps) {
  string prefix = path+"/"+POSTINGS_FILE;
  ofstream merge_trm, merge_doc, merge_pos, merge_tmap;
  set<pair<string, int> > termheap;
  ifstream tmp_files[numtmps*3];

  merge_trm.open((prefix+".trm").c_str());
  merge_doc.open((prefix+".doc").c_str(), ios::binary);
  merge_pos.open((prefix+".pos").c_str(), ios::binary);
  merge_tmap.open((path+"/"+TERM_MAP_FILE).c_str());

  for (int i = 0; i < numtmps; i++) {
    tmp_files[i*3].open((prefix+".trm."+itoa(i)).c_str());
    tmp_files[i*3+1].open((prefix+".doc."+itoa(i)).c_str(), ios::binary);
    tmp_files[i*3+2].open((prefix+".pos."+itoa(i)).c_str(), ios::binary);
  }
  for (int i = 0; i < numtmps; i++) {
    ifstream& ftrm = tmp_files[i*3];
    string term;
    if (ftrm >> term) {
      termheap.insert(make_pair(term, i));
    }
  }
  int num_term = 0;
  while (!termheap.empty()) {
    set<pair<string, int> >::iterator it = termheap.begin();
    pair<string, int> head = *it;
    set<int> hit;
    int num_docs = 0;

    merge_tmap << num_term << " " << head.first << endl;
    merge_trm  << num_term << " " << ftellp(merge_doc) << endl;
    num_term++;

    while (!it->first.compare(head.first)) {
      unsigned ndoc, i = it->second;
      string term;
      ifstream& ftrm = tmp_files[i*3];
      ifstream& fdoc = tmp_files[i*3+1];

      hit.insert(i);
      if (ftrm >> term) {
        termheap.insert(make_pair(term, i));
      }
      fpeek(fdoc, &ndoc, sizeof(ndoc));
      num_docs += ndoc;
      termheap.erase(it);
      if (!termheap.empty()) {
..............
Reply all
Reply to author
Forward
0 new messages