CompWordError function explanation

12 views
Skip to first unread message

Meet Yogi

unread,
Mar 24, 2021, 7:29:27 AM3/24/21
to tesseract-ocr

double ComputeWordError(string* truth_str, string* ocr_str) {
    using StrMap = std::unordered_map<std::string, int, std::hash<std::string>>;
    //GenericVector<string> truth_words, ocr_words;
    truth_str->split(' ', &truth_words);
    if (truth_words.empty()) return 0.0;
    ocr_str->split(' ', &ocr_words);
    StrMap word_counts;
    for (int i = 0; i < truth_words.size(); ++i) {
      std::string truth_word(truth_words[i].string());
      auto it = word_counts.find(truth_word);
      if (it == word_counts.end())
        word_counts.insert(std::make_pair(truth_word, 1));
      else
        ++it->second;
    }
    for (int i = 0; i < ocr_words.size(); ++i) {
      std::string ocr_word(ocr_words[i].string());
      auto it = word_counts.find(ocr_word);
      if (it == word_counts.end())
        word_counts.insert(std::make_pair(ocr_word, -1));
      else
        --it->second;
    }
    int word_recall_errs = 0;
    for (StrMap::const_iterator it = word_counts.begin(); it != word_counts.end();
         ++it) {
      if (it->second > 0) word_recall_errs += it->second;
    }
    return static_cast<double>(word_recall_errs) / truth_words.size();
  }

I want to understand the above code as I dont have much knowledge of c++ language.

Thank You
Meet Yogi
Reply all
Reply to author
Forward
0 new messages