How to recognize kinds of letters and digits?

79 views
Skip to first unread message

Borneq

unread,
Dec 18, 2016, 4:06:49 PM12/18/16
to tesseract-ocr
void TextWindow::recognize(const char *imagepath)
{
    Pix* pixs = pixRead(imagepath);
    if (!pixs)
    {
        fprintf(stderr, "Cannot open input file: %s\n", imagepath);
        exit(2);
    }
    tesseract::TessBaseAPI api;
    const char* lang = "pol";
    const char* datapath = "/usr/share/tesseract-ocr";
    tesseract::PageSegMode pagesegmode = tesseract::PSM_AUTO;
    tesseract::OcrEngineMode enginemode = tesseract::OEM_DEFAULT;
    int init_failed = api.Init(datapath, lang, enginemode);
    if (init_failed)
    {
        fprintf(stderr, "Could not initialize tesseract.\n");
        exit(1);
    }
    api.SetImage(pixs);
    tesseract::Orientation orientation;
    tesseract::WritingDirection direction;
    tesseract::TextlineOrder order;
    float deskew_angle;

    tesseract::PageIterator* it = api.AnalyseLayout();
    if (it) {
        it->Orientation(&orientation, &direction, &order, &deskew_angle);
        printf(
                    "Orientation: %d\nWritingDirection: %d\nTextlineOrder: %d\n"
                    "Deskew angle: %.4f\n",
                    orientation, direction, order, deskew_angle);
    } else {
        //ret_val = 1;
    }

    delete it;

    pixDestroy(&pixs);
    ////
    tesseract::PointerVector<tesseract::TessResultRenderer> renderers;
    PreloadRenderers(&api, &renderers, pagesegmode, "wynik");
    if (!renderers.empty()) {
        bool succeed = api.ProcessPages(imagepath, NULL, 0, renderers[0]);
        if (!succeed) {
            fprintf(stderr, "Error during processing.\n");
            exit(1);
        }
    }
}
Above procedure recognize text from image and save it to file "wynik.txt". But my images are letter addressing: first is first and last name WITHOUT digits, next is address and post code eq 12-345 - ONLY digits. How can I use this information for more accurate recognision of text?

Reply all
Reply to author
Forward
0 new messages