tesseract::TessBaseAPI *tess = new tesseract::TessBaseAPI();
if (tess->Init(NULL, "eng"))
{
fprintf(stderr, "Could not initialize tesseract.\n");
exit(1);
}
tess->SetVariable("tessedit_char_whitelist", "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789&`.,-%/():*'@#");tesseract::TessBaseAPI *tess = new tesseract::TessBaseAPI();
if (tess->Init(NULL, "fra"))
{
fprintf(stderr, "Could not initialize tesseract.\n");
exit(1);
}
tess->SetVariable("tessedit_char_whitelist", "abécédéeeffegéacheijikaelleemmeenneopéquerreessetéuvé double véixeigreczède ");I would like to use tesseract for extracting french language. and i hope it is possible to do it with existing tesseract and available french dictionary.
--- Changes what i did:tesseract::TessBaseAPI *tess = new tesseract::TessBaseAPI();
if (tess->Init(NULL, "fra"))
{
fprintf(stderr, "Could not initialize tesseract.\n");
exit(1);
}
tess->SetVariable("tessedit_char_whitelist", "abécédéeeffegéacheijikaelleemmeenneopéquerreessetéuvé double véixeigreczède ");
if (tess->Init(NULL, "fra"))
{
fprintf(stderr, "Could not initialize tesseract.\n");
exit(1);
}
string sOut;
tess->SetImage((uchar*)TessBinaryMat.data, TessBinaryMat.size().width, TessBinaryMat.size().height, TessBinaryMat.channels(), TessBinaryMat.step1());
sOut = tess->GetUTF8Text();
What is the preferable Page Segmentation Mode for French Language ???